Index: stable/7/sys/amd64/amd64/machdep.c =================================================================== --- stable/7/sys/amd64/amd64/machdep.c (revision 177733) +++ stable/7/sys/amd64/amd64/machdep.c (revision 177734) @@ -1,1936 +1,1937 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); extern void printcpuinfo(void); /* XXX header file */ extern void identify_cpu(void); extern void panicifcpuunsupported(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook", 7) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("usable memory = %ju (%ju MB)\n", ptoa((uintmax_t)physmem), ptoa((uintmax_t)physmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; long rflags; int cs, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); td->td_pcb->pcb_flags |= PCB_FULLCTX; return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); load_ds(_udatasel); load_es(_udatasel); load_fs(_udatasel); load_gs(_udatasel); pcb->pcb_ds = _udatasel; pcb->pcb_es = _udatasel; pcb->pcb_fs = _udatasel; pcb->pcb_gs = _udatasel; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); struct amd64tss common_tss[MAXCPU]; /* software prototypes -- in more palatable form */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 1 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 2 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE32_SEL 3 32 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUDATA_SEL 4 32/64 bit Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE_SEL 5 64 bit Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 1, /* long */ 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 6 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct amd64tss)-1,/* length - all address space */ SDT_SYSTSS, /* segment type */ SEL_KPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Actually, the TSS is a system descriptor which is double size */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, /* long */ 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUGS32_SEL 8 32 bit GS Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, /* long */ 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, ist) int idx; inthand_t *func; int typ; int dpl; int ist; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(fast_syscall), IDTVEC(fast_syscall32); void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include u_int isa_irq_pending(void) { return (0); } #endif u_int basemem; /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, off, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_tunable; pt_entry_t *pte; struct bios_smap *smapbase, *smap, *smapend; u_int32_t smapsize; quad_t dcons_addr, dcons_size; bzero(physmap, sizeof(physmap)); basemem = 0; physmap_idx = 0; /* * get memory map from INT 15:E820, kindly supplied by the loader. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) panic("No BIOS smap info from loader!"); smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) continue; if (smap->length == 0) continue; for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-monotonic memory region, ignoring second region\n"); continue; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; continue; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; } /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) panic("BIOS smap did not include a basemem segment!"); #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; u_int64_t msr; char *env; thread0.td_kstack = physfree + KERNBASE; bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE); physfree += KSTACK_PAGES * PAGE_SIZE; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments */ gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1)) ssdtosd(&gdt_segs[x], &gdt[x]); } ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = thread0.td_kstack + \ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xFul; PCPU_SET(rsp0, common_tss[0].tss_rsp0); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); fpuinit(); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ thread0.td_pcb->pcb_cr3 = KPML4phys; thread0.td_frame = &proc0_tf; env = getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = (ISPL(tf->tf_cs)) ? tf->tf_rsp : (long)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_rflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; long rflags; int ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); ret = set_fpcontext(td, mcp); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { struct savefpu *fpstate; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* * XXX we violate the dubious requirement that fpusetregs() * be called with interrupts disabled. * XXX obsolete on trap-16 systems? */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; fpusetregs(td, fpstate); } else return (EINVAL); return (0); } void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called from the debugger. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* KDB */ Index: stable/7/sys/amd64/amd64/mp_watchdog.c =================================================================== --- stable/7/sys/amd64/amd64/mp_watchdog.c (revision 177733) +++ stable/7/sys/amd64/amd64/mp_watchdog.c (revision 177734) @@ -1,211 +1,212 @@ /*- * Copyright (c) 2004 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_mp_watchdog.h" #include "opt_sched.h" #ifdef SCHED_ULE #error MP_WATCHDOG cannot currently be used with SCHED_ULE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include /* * mp_watchdog hijacks the idle thread on a specified CPU, prevents new work * from being scheduled there, and uses it as a "watchdog" to detect kernel * failure on other CPUs. This is made reasonable by inclusion of logical * processors in Xeon hardware. The watchdog is configured by setting the * debug.watchdog sysctl/tunable to the CPU of interest. A callout will then * begin executing reseting a timer that is gradually lowered by the watching * thread. If the timer reaches 0, the watchdog fires by ether dropping * directly to the debugger, or by sending an NMI IPI to the boot processor. * This is a somewhat less efficient substitute for dedicated watchdog * hardware, but can be quite an effective tool for debugging hangs. * * XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but * doesn't yet. */ static int watchdog_cpu = -1; static int watchdog_dontfire = 1; static int watchdog_timer = -1; static int watchdog_nmi = 1; TUNABLE_INT("debug.watchdog", &watchdog_cpu); SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0, "IPI the boot processor with an NMI to enter the debugger"); static struct callout watchdog_callout; static void watchdog_change(int wdcpu); /* * Number of seconds before the watchdog will fire if the callout fails to * reset the timer. */ #define WATCHDOG_THRESHOLD 10 static void watchdog_init(void *arg) { callout_init(&watchdog_callout, CALLOUT_MPSAFE); if (watchdog_cpu != -1) watchdog_change(watchdog_cpu); } /* * This callout resets a timer until the watchdog kicks in. It acquires some * critical locks to make sure things haven't gotten wedged with hose locks * held. */ static void watchdog_function(void *arg) { /* * Since the timer ran, we must not be wedged. Acquire some critical * locks to make sure. Then reset the timer. */ mtx_lock(&Giant); watchdog_timer = WATCHDOG_THRESHOLD; mtx_unlock(&Giant); callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL); } SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL); static void watchdog_change(int wdcpu) { if (wdcpu == -1 || wdcpu == 0xffffffff) { /* * Disable the watchdog. */ watchdog_cpu = -1; watchdog_dontfire = 1; callout_stop(&watchdog_callout); printf("watchdog stopped\n"); } else { watchdog_timer = WATCHDOG_THRESHOLD; watchdog_dontfire = 0; watchdog_cpu = wdcpu; callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL); } } /* * This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff * to disable the watchdog. */ static int sysctl_watchdog(SYSCTL_HANDLER_ARGS) { int error, temp; temp = watchdog_cpu; error = sysctl_handle_int(oidp, &temp, 0, req); if (error) return (error); if (req->newptr != NULL) watchdog_change(temp); return (0); } SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_watchdog, "I", ""); /* * Drop into the debugger by sending an IPI NMI to the boot processor. */ static void watchdog_ipi_nmi(void) { /* * Deliver NMI to the boot processor. Why not? */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI, boot_cpu_id); lapic_ipi_wait(-1); } /* * ap_watchdog() is called by the SMP idle loop code. It works on the same * premise that the disabling of logical processors does: that if the cpu is * idle, then it can ignore the world from then on, as nothing will be * scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and * explicit process migration (sched_bind()), this is not an unreasonable * assumption. */ void ap_watchdog(u_int cpuid) { char old_pcomm[MAXCOMLEN + 1]; struct proc *p; if (watchdog_cpu != cpuid) return; printf("watchdog started on cpu %d\n", cpuid); p = curproc; bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1); snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid); while (1) { DELAY(1000000); /* One second. */ if (watchdog_cpu != cpuid) break; atomic_subtract_int(&watchdog_timer, 1); if (watchdog_timer < 4) printf("Watchdog timer: %d\n", watchdog_timer); if (watchdog_timer == 0 && watchdog_dontfire == 0) { printf("Watchdog firing!\n"); watchdog_dontfire = 1; if (watchdog_nmi) watchdog_ipi_nmi(); else - kdb_enter("mp_watchdog"); + kdb_enter_why(KDB_WHY_WATCHDOG, + "mp_watchdog"); } } bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1); printf("watchdog stopped on cpu %d\n", cpuid); } Index: stable/7/sys/arm/at91/uart_dev_at91usart.c =================================================================== --- stable/7/sys/arm/at91/uart_dev_at91usart.c (revision 177733) +++ stable/7/sys/arm/at91/uart_dev_at91usart.c (revision 177734) @@ -1,673 +1,674 @@ /*- * Copyright (c) 2005 M. Warner Losh * Copyright (c) 2005 Olivier Houchard * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_comconsole.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include "uart_if.h" #define DEFAULT_RCLK AT91C_MASTER_CLOCK #define USART_BUFFER_SIZE 128 /* * High-level UART interface. */ struct at91_usart_rx { bus_addr_t pa; uint8_t buffer[USART_BUFFER_SIZE]; bus_dmamap_t map; }; struct at91_usart_softc { struct uart_softc base; bus_dma_tag_t dmatag; /* bus dma tag for mbufs */ bus_dmamap_t tx_map; uint32_t flags; #define HAS_TIMEOUT 1 struct at91_usart_rx ping_pong[2]; struct at91_usart_rx *ping; struct at91_usart_rx *pong; }; #define RD4(bas, reg) \ bus_space_read_4((bas)->bst, (bas)->bsh, uart_regofs(bas, reg)) #define WR4(bas, reg, value) \ bus_space_write_4((bas)->bst, (bas)->bsh, uart_regofs(bas, reg), value) #define SIGCHG(c, i, s, d) \ do { \ if (c) { \ i |= (i & s) ? s : s | d; \ } else { \ i = (i & s) ? (i & ~s) | d : i; \ } \ } while (0); #define BAUD2DIVISOR(b) \ ((((DEFAULT_RCLK * 10) / ((b) * 16)) + 5) / 10) /* * Low-level UART interface. */ static int at91_usart_probe(struct uart_bas *bas); static void at91_usart_init(struct uart_bas *bas, int, int, int, int); static void at91_usart_term(struct uart_bas *bas); static void at91_usart_putc(struct uart_bas *bas, int); static int at91_usart_rxready(struct uart_bas *bas); static int at91_usart_getc(struct uart_bas *bas, struct mtx *mtx); extern SLIST_HEAD(uart_devinfo_list, uart_devinfo) uart_sysdevs; static int at91_usart_param(struct uart_bas *bas, int baudrate, int databits, int stopbits, int parity) { uint32_t mr; /* * Assume 3-write RS-232 configuration. * XXX Not sure how uart will present the other modes to us, so * XXX they are unimplemented. maybe ioctl? */ mr = USART_MR_MODE_NORMAL; mr |= USART_MR_USCLKS_MCK; /* Assume MCK */ /* * Or in the databits requested */ if (databits < 9) mr &= ~USART_MR_MODE9; switch (databits) { case 5: mr |= USART_MR_CHRL_5BITS; break; case 6: mr |= USART_MR_CHRL_6BITS; break; case 7: mr |= USART_MR_CHRL_7BITS; break; case 8: mr |= USART_MR_CHRL_8BITS; break; case 9: mr |= USART_MR_CHRL_8BITS | USART_MR_MODE9; break; default: return (EINVAL); } /* * Or in the parity */ switch (parity) { case UART_PARITY_NONE: mr |= USART_MR_PAR_NONE; break; case UART_PARITY_ODD: mr |= USART_MR_PAR_ODD; break; case UART_PARITY_EVEN: mr |= USART_MR_PAR_EVEN; break; case UART_PARITY_MARK: mr |= USART_MR_PAR_MARK; break; case UART_PARITY_SPACE: mr |= USART_MR_PAR_SPACE; break; default: return (EINVAL); } /* * Or in the stop bits. Note: The hardware supports 1.5 stop * bits in async mode, but there's no way to specify that * AFAICT. Instead, rely on the convention documented at * http://www.lammertbies.nl/comm/info/RS-232_specs.html which * states that 1.5 stop bits are used for 5 bit bytes and * 2 stop bits only for longer bytes. */ if (stopbits == 1) mr |= USART_MR_NBSTOP_1; else if (databits > 5) mr |= USART_MR_NBSTOP_2; else mr |= USART_MR_NBSTOP_1_5; /* * We want normal plumbing mode too, none of this fancy * loopback or echo mode. */ mr |= USART_MR_CHMODE_NORMAL; mr &= ~USART_MR_MSBF; /* lsb first */ mr &= ~USART_MR_CKLO_SCK; /* Don't drive SCK */ WR4(bas, USART_MR, mr); /* * Set the baud rate */ WR4(bas, USART_BRGR, BAUD2DIVISOR(baudrate)); /* XXX Need to take possible synchronous mode into account */ return (0); } static struct uart_ops at91_usart_ops = { .probe = at91_usart_probe, .init = at91_usart_init, .term = at91_usart_term, .putc = at91_usart_putc, .rxready = at91_usart_rxready, .getc = at91_usart_getc, }; static int at91_usart_probe(struct uart_bas *bas) { /* We know that this is always here */ return (0); } /* * Initialize this device for use as a console. */ static void at91_usart_init(struct uart_bas *bas, int baudrate, int databits, int stopbits, int parity) { at91_usart_param(bas, baudrate, databits, stopbits, parity); /* Reset the rx and tx buffers and turn on rx and tx */ WR4(bas, USART_CR, USART_CR_RSTSTA | USART_CR_RSTRX | USART_CR_RSTTX); WR4(bas, USART_CR, USART_CR_RXEN | USART_CR_TXEN); WR4(bas, USART_IDR, 0xffffffff); } /* * Free resources now that we're no longer the console. This appears to * be never called, and I'm unsure quite what to do if I am called. */ static void at91_usart_term(struct uart_bas *bas) { /* XXX */ } /* * Put a character of console output (so we do it here polling rather than * interrutp driven). */ static void at91_usart_putc(struct uart_bas *bas, int c) { while (!(RD4(bas, USART_CSR) & USART_CSR_TXRDY)) continue; WR4(bas, USART_THR, c); } /* * Check for a character available. */ static int at91_usart_rxready(struct uart_bas *bas) { return ((RD4(bas, USART_CSR) & USART_CSR_RXRDY) != 0 ? 1 : 0); } /* * Block waiting for a character. */ static int at91_usart_getc(struct uart_bas *bas, struct mtx *mtx) { int c; while (!(RD4(bas, USART_CSR) & USART_CSR_RXRDY)) continue; c = RD4(bas, USART_RHR); c &= 0xff; return (c); } static int at91_usart_bus_probe(struct uart_softc *sc); static int at91_usart_bus_attach(struct uart_softc *sc); static int at91_usart_bus_flush(struct uart_softc *, int); static int at91_usart_bus_getsig(struct uart_softc *); static int at91_usart_bus_ioctl(struct uart_softc *, int, intptr_t); static int at91_usart_bus_ipend(struct uart_softc *); static int at91_usart_bus_param(struct uart_softc *, int, int, int, int); static int at91_usart_bus_receive(struct uart_softc *); static int at91_usart_bus_setsig(struct uart_softc *, int); static int at91_usart_bus_transmit(struct uart_softc *); static kobj_method_t at91_usart_methods[] = { KOBJMETHOD(uart_probe, at91_usart_bus_probe), KOBJMETHOD(uart_attach, at91_usart_bus_attach), KOBJMETHOD(uart_flush, at91_usart_bus_flush), KOBJMETHOD(uart_getsig, at91_usart_bus_getsig), KOBJMETHOD(uart_ioctl, at91_usart_bus_ioctl), KOBJMETHOD(uart_ipend, at91_usart_bus_ipend), KOBJMETHOD(uart_param, at91_usart_bus_param), KOBJMETHOD(uart_receive, at91_usart_bus_receive), KOBJMETHOD(uart_setsig, at91_usart_bus_setsig), KOBJMETHOD(uart_transmit, at91_usart_bus_transmit), { 0, 0 } }; int at91_usart_bus_probe(struct uart_softc *sc) { return (0); } #ifndef SKYEYE_WORKAROUNDS static void at91_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error) { if (error != 0) return; *(bus_addr_t *)arg = segs[0].ds_addr; } #endif static int at91_usart_bus_attach(struct uart_softc *sc) { #ifndef SKYEYE_WORKAROUNDS int err; int i; #endif uint32_t cr; struct at91_usart_softc *atsc; atsc = (struct at91_usart_softc *)sc; /* * See if we have a TIMEOUT bit. We disable all interrupts as * a side effect. Boot loaders may have enabled them. Since * a TIMEOUT interrupt can't happen without other setup, the * apparent race here can't actually happen. */ WR4(&sc->sc_bas, USART_IDR, 0xffffffff); WR4(&sc->sc_bas, USART_IER, USART_CSR_TIMEOUT); if (RD4(&sc->sc_bas, USART_IMR) & USART_CSR_TIMEOUT) atsc->flags |= HAS_TIMEOUT; WR4(&sc->sc_bas, USART_IDR, 0xffffffff); sc->sc_txfifosz = USART_BUFFER_SIZE; sc->sc_rxfifosz = USART_BUFFER_SIZE; sc->sc_hwiflow = 0; #ifndef SKYEYE_WORKAROUNDS /* * Allocate DMA tags and maps */ err = bus_dma_tag_create(NULL, 1, 0, BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL, USART_BUFFER_SIZE, 1, USART_BUFFER_SIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &atsc->dmatag); if (err != 0) goto errout; err = bus_dmamap_create(atsc->dmatag, 0, &atsc->tx_map); if (err != 0) goto errout; if (atsc->flags & HAS_TIMEOUT) { for (i = 0; i < 2; i++) { err = bus_dmamap_create(atsc->dmatag, 0, &atsc->ping_pong[i].map); if (err != 0) goto errout; err = bus_dmamap_load(atsc->dmatag, atsc->ping_pong[i].map, atsc->ping_pong[i].buffer, sc->sc_rxfifosz, at91_getaddr, &atsc->ping_pong[i].pa, 0); if (err != 0) goto errout; bus_dmamap_sync(atsc->dmatag, atsc->ping_pong[i].map, BUS_DMASYNC_PREREAD); } atsc->ping = &atsc->ping_pong[0]; atsc->pong = &atsc->ping_pong[1]; } #endif /* * Prime the pump with the RX buffer. We use two 64 byte bounce * buffers here to avoid data overflow. */ /* Turn on rx and tx */ cr = USART_CR_RSTSTA | USART_CR_RSTRX | USART_CR_RSTTX; WR4(&sc->sc_bas, USART_CR, cr); WR4(&sc->sc_bas, USART_CR, USART_CR_RXEN | USART_CR_TXEN); /* * Setup the PDC to receive data. We use the ping-pong buffers * so that we can more easily bounce between the two and so that * we get an interrupt 1/2 way through the software 'fifo' we have * to avoid overruns. */ if (atsc->flags & HAS_TIMEOUT) { WR4(&sc->sc_bas, PDC_RPR, atsc->ping->pa); WR4(&sc->sc_bas, PDC_RCR, sc->sc_rxfifosz); WR4(&sc->sc_bas, PDC_RNPR, atsc->pong->pa); WR4(&sc->sc_bas, PDC_RNCR, sc->sc_rxfifosz); WR4(&sc->sc_bas, PDC_PTCR, PDC_PTCR_RXTEN); /* Set the receive timeout to be 1.5 character times. */ WR4(&sc->sc_bas, USART_RTOR, 12); WR4(&sc->sc_bas, USART_CR, USART_CR_STTTO); WR4(&sc->sc_bas, USART_IER, USART_CSR_TIMEOUT | USART_CSR_RXBUFF | USART_CSR_ENDRX); } else { WR4(&sc->sc_bas, USART_IER, USART_CSR_RXRDY); } WR4(&sc->sc_bas, USART_IER, USART_CSR_RXBRK); #ifndef SKYEYE_WORKAROUNDS errout:; // XXX bad return (err); #else return (0); #endif } static int at91_usart_bus_transmit(struct uart_softc *sc) { #ifndef SKYEYE_WORKAROUNDS bus_addr_t addr; #endif struct at91_usart_softc *atsc; atsc = (struct at91_usart_softc *)sc; #ifndef SKYEYE_WORKAROUNDS if (bus_dmamap_load(atsc->dmatag, atsc->tx_map, sc->sc_txbuf, sc->sc_txdatasz, at91_getaddr, &addr, 0) != 0) return (EAGAIN); bus_dmamap_sync(atsc->dmatag, atsc->tx_map, BUS_DMASYNC_PREWRITE); #endif uart_lock(sc->sc_hwmtx); sc->sc_txbusy = 1; #ifndef SKYEYE_WORKAROUNDS /* * Setup the PDC to transfer the data and interrupt us when it * is done. We've already requested the interrupt. */ WR4(&sc->sc_bas, PDC_TPR, addr); WR4(&sc->sc_bas, PDC_TCR, sc->sc_txdatasz); WR4(&sc->sc_bas, PDC_PTCR, PDC_PTCR_TXTEN); WR4(&sc->sc_bas, USART_IER, USART_CSR_ENDTX); uart_unlock(sc->sc_hwmtx); #else for (int i = 0; i < sc->sc_txdatasz; i++) at91_usart_putc(&sc->sc_bas, sc->sc_txbuf[i]); /* * XXX: Gross hack : Skyeye doesn't raise an interrupt once the * transfer is done, so simulate it. */ WR4(&sc->sc_bas, USART_IER, USART_CSR_TXRDY); #endif return (0); } static int at91_usart_bus_setsig(struct uart_softc *sc, int sig) { uint32_t new, old, cr; struct uart_bas *bas; do { old = sc->sc_hwsig; new = old; if (sig & SER_DDTR) SIGCHG(sig & SER_DTR, new, SER_DTR, SER_DDTR); if (sig & SER_DRTS) SIGCHG(sig & SER_RTS, new, SER_RTS, SER_DRTS); } while (!atomic_cmpset_32(&sc->sc_hwsig, old, new)); bas = &sc->sc_bas; uart_lock(sc->sc_hwmtx); cr = 0; if (new & SER_DTR) cr |= USART_CR_DTREN; else cr |= USART_CR_DTRDIS; if (new & SER_RTS) cr |= USART_CR_RTSEN; else cr |= USART_CR_RTSDIS; WR4(bas, USART_CR, cr); uart_unlock(sc->sc_hwmtx); return (0); } static int at91_usart_bus_receive(struct uart_softc *sc) { return (0); } static int at91_usart_bus_param(struct uart_softc *sc, int baudrate, int databits, int stopbits, int parity) { return (at91_usart_param(&sc->sc_bas, baudrate, databits, stopbits, parity)); } static __inline void at91_rx_put(struct uart_softc *sc, int key) { #if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER) if (sc->sc_sysdev != NULL && sc->sc_sysdev->type == UART_DEV_CONSOLE) { if (kdb_alt_break(key, &sc->sc_altbrk)) - kdb_enter("Break sequence to console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence to console"); } #endif uart_rx_put(sc, key); } static int at91_usart_bus_ipend(struct uart_softc *sc) { int csr = RD4(&sc->sc_bas, USART_CSR); int ipend = 0, i, len; struct at91_usart_softc *atsc; struct at91_usart_rx *p; atsc = (struct at91_usart_softc *)sc; if (csr & USART_CSR_ENDTX) { bus_dmamap_sync(atsc->dmatag, atsc->tx_map, BUS_DMASYNC_POSTWRITE); bus_dmamap_unload(atsc->dmatag, atsc->tx_map); } uart_lock(sc->sc_hwmtx); if (csr & USART_CSR_TXRDY) { if (sc->sc_txbusy) ipend |= SER_INT_TXIDLE; WR4(&sc->sc_bas, USART_IDR, USART_CSR_TXRDY); } if (csr & USART_CSR_ENDTX) { if (sc->sc_txbusy) ipend |= SER_INT_TXIDLE; WR4(&sc->sc_bas, USART_IDR, USART_CSR_ENDTX); } /* * Due to the contraints of the DMA engine present in the * atmel chip, I can't just say I have a rx interrupt pending * and do all the work elsewhere. I need to look at the CSR * bits right now and do things based on them to avoid races. */ if ((atsc->flags & HAS_TIMEOUT) && (csr & USART_CSR_RXBUFF)) { // Have a buffer overflow. Copy all data from both // ping and pong. Insert overflow character. Reset // ping and pong and re-enable the PDC to receive // characters again. bus_dmamap_sync(atsc->dmatag, atsc->ping->map, BUS_DMASYNC_POSTREAD); bus_dmamap_sync(atsc->dmatag, atsc->pong->map, BUS_DMASYNC_POSTREAD); for (i = 0; i < sc->sc_rxfifosz; i++) at91_rx_put(sc, atsc->ping->buffer[i]); for (i = 0; i < sc->sc_rxfifosz; i++) at91_rx_put(sc, atsc->pong->buffer[i]); uart_rx_put(sc, UART_STAT_OVERRUN); csr &= ~(USART_CSR_ENDRX | USART_CSR_TIMEOUT); WR4(&sc->sc_bas, PDC_RPR, atsc->ping->pa); WR4(&sc->sc_bas, PDC_RCR, sc->sc_rxfifosz); WR4(&sc->sc_bas, PDC_RNPR, atsc->pong->pa); WR4(&sc->sc_bas, PDC_RNCR, sc->sc_rxfifosz); WR4(&sc->sc_bas, PDC_PTCR, PDC_PTCR_RXTEN); ipend |= SER_INT_RXREADY; } if ((atsc->flags & HAS_TIMEOUT) && (csr & USART_CSR_ENDRX)) { // Shuffle data from 'ping' of ping pong buffer, but // leave current 'pong' in place, as it has become the // new 'ping'. We need to copy data and setup the old // 'ping' as the new 'pong' when we're done. bus_dmamap_sync(atsc->dmatag, atsc->ping->map, BUS_DMASYNC_POSTREAD); for (i = 0; i < sc->sc_rxfifosz; i++) at91_rx_put(sc, atsc->ping->buffer[i]); p = atsc->ping; atsc->ping = atsc->pong; atsc->pong = p; WR4(&sc->sc_bas, PDC_RNPR, atsc->pong->pa); WR4(&sc->sc_bas, PDC_RNCR, sc->sc_rxfifosz); ipend |= SER_INT_RXREADY; } if ((atsc->flags & HAS_TIMEOUT) && (csr & USART_CSR_TIMEOUT)) { // We have one partial buffer. We need to stop the // PDC, get the number of characters left and from // that compute number of valid characters. We then // need to reset ping and pong and reenable the PDC. // Not sure if there's a race here at fast baud rates // we need to worry about. WR4(&sc->sc_bas, PDC_PTCR, PDC_PTCR_RXTDIS); bus_dmamap_sync(atsc->dmatag, atsc->ping->map, BUS_DMASYNC_POSTREAD); len = sc->sc_rxfifosz - RD4(&sc->sc_bas, PDC_RCR); for (i = 0; i < len; i++) at91_rx_put(sc, atsc->ping->buffer[i]); WR4(&sc->sc_bas, PDC_RPR, atsc->ping->pa); WR4(&sc->sc_bas, PDC_RCR, sc->sc_rxfifosz); WR4(&sc->sc_bas, USART_CR, USART_CR_STTTO); WR4(&sc->sc_bas, PDC_PTCR, PDC_PTCR_RXTEN); ipend |= SER_INT_RXREADY; } if (!(atsc->flags & HAS_TIMEOUT) && (csr & USART_CSR_RXRDY)) { // We have another charater in a device that doesn't support // timeouts, so we do it one character at a time. at91_rx_put(sc, RD4(&sc->sc_bas, USART_RHR) & 0xff); ipend |= SER_INT_RXREADY; } if (csr & USART_CSR_RXBRK) { unsigned int cr = USART_CR_RSTSTA; ipend |= SER_INT_BREAK; WR4(&sc->sc_bas, USART_CR, cr); } uart_unlock(sc->sc_hwmtx); return (ipend); } static int at91_usart_bus_flush(struct uart_softc *sc, int what) { return (0); } static int at91_usart_bus_getsig(struct uart_softc *sc) { uint32_t new, sig; uint8_t csr; uart_lock(sc->sc_hwmtx); csr = RD4(&sc->sc_bas, USART_CSR); sig = 0; if (csr & USART_CSR_CTS) sig |= SER_CTS; if (csr & USART_CSR_DCD) sig |= SER_DCD; if (csr & USART_CSR_DSR) sig |= SER_DSR; if (csr & USART_CSR_RI) sig |= SER_RI; new = sig & ~SER_MASK_DELTA; sc->sc_hwsig = new; uart_unlock(sc->sc_hwmtx); return (sig); } static int at91_usart_bus_ioctl(struct uart_softc *sc, int request, intptr_t data) { switch (request) { case UART_IOCTL_BREAK: case UART_IOCTL_IFLOW: case UART_IOCTL_OFLOW: break; case UART_IOCTL_BAUD: WR4(&sc->sc_bas, USART_BRGR, BAUD2DIVISOR(*(int *)data)); return (0); } return (EINVAL); } struct uart_class at91_usart_class = { "at91_usart", at91_usart_methods, sizeof(struct at91_usart_softc), .uc_ops = &at91_usart_ops, .uc_range = 8, .uc_rclk = DEFAULT_RCLK }; Index: stable/7/sys/cam/scsi/scsi_low.h =================================================================== --- stable/7/sys/cam/scsi/scsi_low.h (revision 177733) +++ stable/7/sys/cam/scsi/scsi_low.h (revision 177734) @@ -1,887 +1,887 @@ /* $FreeBSD$ */ /* $NecBSD: scsi_low.h,v 1.24.10.5 2001/06/26 07:31:46 honda Exp $ */ /* $NetBSD$ */ #define SCSI_LOW_DIAGNOSTIC #define SCSI_LOW_ALT_QTAG_ALLOCATE /*- * [NetBSD for NEC PC-98 series] * Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000, 2001 * NetBSD/pc98 porting staff. All rights reserved. * Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000, 2001 * Naofumi HONDA. All rights reserved. * * [Ported for FreeBSD CAM] * Copyright (c) 2000, 2001 * MITSUNAGA Noriaki, NOKUBI Hirotaka and TAKAHASHI Yoshihiro. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #ifndef _SCSI_LOW_H_ #define _SCSI_LOW_H_ /*================================================ * Scsi low OSDEP * (All os depend structures should be here!) ================================================*/ /******** interface ******************************/ #ifdef __NetBSD__ #define SCSI_LOW_INTERFACE_XS #endif /* __NetBSD__ */ #ifdef __FreeBSD__ #define SCSI_LOW_INTERFACE_CAM #define CAM #endif /* __FreeBSD__ */ /******** includes *******************************/ #ifdef __NetBSD__ #include #include #endif /* __NetBSD__ */ #ifdef __FreeBSD__ #include #include #include #include #include #include #include #include #include #endif /* __FreeBSD__ */ /******** functions macro ************************/ #ifdef __NetBSD__ #define SCSI_LOW_DEBUGGER(dev) Debugger() #define SCSI_LOW_DELAY(mu) delay((mu)) #define SCSI_LOW_SPLSCSI splbio #define SCSI_LOW_BZERO(pt, size) memset((pt), 0, (size)) #endif /* __NetBSD__ */ #ifdef __FreeBSD__ #undef MSG_IDENTIFY -#define SCSI_LOW_DEBUGGER(dev) kdb_enter(dev) +#define SCSI_LOW_DEBUGGER(dev) kdb_enter_why(KDB_WHY_CAM, dev) #define SCSI_LOW_DELAY(mu) DELAY((mu)) #define SCSI_LOW_SPLSCSI splcam #define SCSI_LOW_BZERO(pt, size) bzero((pt), (size)) #endif /* __FreeBSD__ */ /******** os depend interface structures **********/ #ifdef __NetBSD__ typedef struct scsipi_sense_data scsi_low_osdep_sense_data_t; struct scsi_low_osdep_interface { struct device si_dev; struct scsipi_link *si_splp; }; struct scsi_low_osdep_targ_interface { }; struct scsi_low_osdep_lun_interface { u_int sloi_quirks; }; #endif /* __NetBSD__ */ #ifdef __FreeBSD__ typedef struct scsi_sense_data scsi_low_osdep_sense_data_t; struct scsi_low_osdep_interface { DEVPORT_DEVICE si_dev; struct cam_sim *sim; struct cam_path *path; int si_poll_count; struct callout_handle engage_ch; struct callout_handle timeout_ch; #ifdef SCSI_LOW_POWFUNC struct callout_handle recover_ch; #endif }; struct scsi_low_osdep_targ_interface { }; struct scsi_low_osdep_lun_interface { }; #endif /* __FreeBSD__ */ /******** os depend interface functions *************/ struct slccb; struct scsi_low_softc; #define SCSI_LOW_TIMEOUT_STOP 0 #define SCSI_LOW_TIMEOUT_START 1 #define SCSI_LOW_TIMEOUT_CH_IO 0 #define SCSI_LOW_TIMEOUT_CH_ENGAGE 1 #define SCSI_LOW_TIMEOUT_CH_RECOVER 2 struct scsi_low_osdep_funcs { int (*scsi_low_osdep_attach) \ (struct scsi_low_softc *); int (*scsi_low_osdep_world_start) \ (struct scsi_low_softc *); int (*scsi_low_osdep_dettach) \ (struct scsi_low_softc *); int (*scsi_low_osdep_ccb_setup) \ (struct scsi_low_softc *, struct slccb *); int (*scsi_low_osdep_done) \ (struct scsi_low_softc *, struct slccb *); void (*scsi_low_osdep_timeout) \ (struct scsi_low_softc *, int, int); }; /*================================================ * Generic Scsi Low header file * (All os depend structures should be above!) ================================================*/ /************************************************* * Scsi low definitions *************************************************/ #define SCSI_LOW_SYNC DVF_SCSI_SYNC #define SCSI_LOW_DISC DVF_SCSI_DISC #define SCSI_LOW_WAIT DVF_SCSI_WAIT #define SCSI_LOW_LINK DVF_SCSI_LINK #define SCSI_LOW_QTAG DVF_SCSI_QTAG #define SCSI_LOW_NOPARITY DVF_SCSI_NOPARITY #define SCSI_LOW_SAVESP DVF_SCSI_SAVESP #define SCSI_LOW_DEFCFG DVF_SCSI_DEFCFG #define SCSI_LOW_BITS DVF_SCSI_BITS #define SCSI_LOW_PERIOD(n) DVF_SCSI_PERIOD(n) #define SCSI_LOW_OFFSET(n) DVF_SCSI_OFFSET(n) /* host scsi id and targets macro */ #ifndef SCSI_LOW_NTARGETS #define SCSI_LOW_NTARGETS 8 #endif /* SCSI_LOW_NTARGETS */ #define SCSI_LOW_NCCB 128 #define SCSI_LOW_MAX_RETRY 3 #define SCSI_LOW_MAX_SELECTION_RETRY 10 /* timeout control macro */ #define SCSI_LOW_TIMEOUT_HZ 10 #define SCSI_LOW_MIN_TOUT 12 #define SCSI_LOW_TIMEOUT_CHECK_INTERVAL 1 #define SCSI_LOW_POWDOWN_TC 15 #define SCSI_LOW_MAX_PHCHANGES 256 #define SCSI2_RESET_DELAY 5000000 /* msg */ #define SCSI_LOW_MAX_MSGLEN 32 #define SCSI_LOW_MSG_LOG_DATALEN 8 /************************************************* * Scsi Data Pointer *************************************************/ /* scsi pointer */ struct sc_p { u_int8_t *scp_data; int scp_datalen; u_int8_t *scp_cmd; int scp_cmdlen; u_int8_t scp_direction; #define SCSI_LOW_RWUNK (-1) #define SCSI_LOW_WRITE 0 #define SCSI_LOW_READ 1 u_int8_t scp_status; u_int8_t scp_spare[2]; }; /************************************************* * Command Control Block Structure *************************************************/ typedef int scsi_low_tag_t; struct targ_info; #define SCSI_LOW_UNKLUN ((u_int) -1) #define SCSI_LOW_UNKTAG ((scsi_low_tag_t) -1) struct slccb { TAILQ_ENTRY(slccb) ccb_chain; void *osdep; /* os depend structure */ struct targ_info *ti; /* targ_info */ struct lun_info *li; /* lun info */ struct buf *bp; /* io bufs */ scsi_low_tag_t ccb_tag; /* effective qtag */ scsi_low_tag_t ccb_otag; /* allocated qtag */ /***************************************** * Scsi data pointers (original and saved) *****************************************/ struct sc_p ccb_scp; /* given */ struct sc_p ccb_sscp; /* saved scsi data pointer */ int ccb_datalen; /* transfered data counter */ /***************************************** * Msgout *****************************************/ u_int ccb_msgoutflag; u_int ccb_omsgoutflag; /***************************************** * Error or Timeout counters *****************************************/ u_int ccb_flags; #define CCB_INTERNAL 0x0001 #define CCB_SENSE 0x0002 #define CCB_CLEARQ 0x0004 #define CCB_DISCQ 0x0008 #define CCB_STARTQ 0x0010 #define CCB_POLLED 0x0100 /* polling ccb */ #define CCB_NORETRY 0x0200 /* do NOT retry */ #define CCB_AUTOSENSE 0x0400 /* do a sence after CA */ #define CCB_URGENT 0x0800 /* an urgent ccb */ #define CCB_NOSDONE 0x1000 /* do not call an os done routine */ #define CCB_SCSIIO 0x2000 /* a normal scsi io coming from upper layer */ #define CCB_SILENT 0x4000 /* no terminate messages */ u_int ccb_error; int ccb_rcnt; /* retry counter */ int ccb_selrcnt; /* selection retry counter */ int ccb_tc; /* timer counter */ int ccb_tcmax; /* max timeout */ /***************************************** * Sense data buffer *****************************************/ u_int8_t ccb_scsi_cmd[12]; scsi_low_osdep_sense_data_t ccb_sense; }; /************************************************* * Slccb functions *************************************************/ GENERIC_CCB_ASSERT(scsi_low, slccb) /************************************************* * Target and Lun structures *************************************************/ struct scsi_low_softc; LIST_HEAD(scsi_low_softc_tab, scsi_low_softc); TAILQ_HEAD(targ_info_tab, targ_info); LIST_HEAD(lun_info_tab, lun_info); struct lun_info { struct scsi_low_osdep_lun_interface li_sloi; int li_lun; struct targ_info *li_ti; /* my target */ LIST_ENTRY(lun_info) lun_chain; /* targ_info link */ struct slccbtab li_discq; /* disconnect queue */ /* * qtag control */ int li_maxnexus; int li_maxnqio; int li_nqio; int li_disc; #define SCSI_LOW_MAXNEXUS (sizeof(u_int) * NBBY) u_int li_qtagbits; #ifdef SCSI_LOW_ALT_QTAG_ALLOCATE u_int8_t li_qtagarray[SCSI_LOW_MAXNEXUS]; u_int li_qd; #endif /* SCSI_LOW_ALT_QTAG_ALLOCATE */ #define SCSI_LOW_QFLAG_CA_QCLEAR 0x01 u_int li_qflags; /* * lun state */ #define SCSI_LOW_LUN_SLEEP 0x00 #define SCSI_LOW_LUN_START 0x01 #define SCSI_LOW_LUN_INQ 0x02 #define SCSI_LOW_LUN_MODEQ 0x03 #define SCSI_LOW_LUN_OK 0x04 u_int li_state; /* target lun state */ /* * lun control flags */ u_int li_flags_valid; /* valid flags */ #define SCSI_LOW_LUN_FLAGS_USER_VALID 0x0001 #define SCSI_LOW_LUN_FLAGS_DISK_VALID 0x0002 #define SCSI_LOW_LUN_FLAGS_QUIRKS_VALID 0x0004 #define SCSI_LOW_LUN_FLAGS_ALL_VALID \ (SCSI_LOW_LUN_FLAGS_USER_VALID | \ SCSI_LOW_LUN_FLAGS_DISK_VALID | SCSI_LOW_LUN_FLAGS_QUIRKS_VALID) u_int li_flags; /* real lun control flags */ u_int li_cfgflags; /* lun control flags given by user */ u_int li_diskflags; /* lun control flags given by hardware info */ u_int li_quirks; /* lun control flags given by upper layer */ /* inq buffer */ struct scsi_low_inq_data { u_int8_t sd_type; u_int8_t sd_sp1; u_int8_t sd_version; u_int8_t sd_resp; u_int8_t sd_len; u_int8_t sd_sp2[2]; u_int8_t sd_support; } __packed li_inq; /* modeq buffer */ struct scsi_low_mode_sense_data { u_int8_t sms_header[4]; struct { u_int8_t cmp_page; u_int8_t cmp_length; u_int8_t cmp_rlec; u_int8_t cmp_qc; u_int8_t cmp_eca; u_int8_t cmp_spare[3]; } __packed sms_cmp; } li_sms; }; struct scsi_low_msg_log { int slml_ptr; struct { u_int8_t msg[2]; } slml_msg[SCSI_LOW_MSG_LOG_DATALEN]; }; struct targ_info { struct scsi_low_osdep_targ_interface ti_slti; TAILQ_ENTRY(targ_info) ti_chain; /* targ_info link */ struct scsi_low_softc *ti_sc; /* our softc */ u_int ti_id; /* scsi id */ /* * Lun chain */ struct lun_info_tab ti_litab; /* lun chain */ /* * total disconnected nexus */ int ti_disc; /* * Scsi phase control */ #define PH_NULL 0x00 #define PH_ARBSTART 0x01 #define PH_SELSTART 0x02 #define PH_SELECTED 0x03 #define PH_CMD 0x04 #define PH_DATA 0x05 #define PH_MSGIN 0x06 #define PH_MSGOUT 0x07 #define PH_STAT 0x08 #define PH_DISC 0x09 #define PH_RESEL 0x0a u_int ti_phase; /* scsi phase */ u_int ti_ophase; /* old scsi phase */ /* * Msg in */ u_int ti_msginptr; /* msgin ptr */ u_int ti_msginlen; /* expected msg length */ int ti_msgin_parity_error; /* parity error detected */ u_int8_t ti_msgin[SCSI_LOW_MAX_MSGLEN]; /* msgin buffer */ /* * Msg out */ u_int ti_msgflags; /* msgs to be asserted */ u_int ti_omsgflags; /* msgs asserted */ u_int ti_emsgflags; /* a msg currently asserted */ #define SCSI_LOW_MSG_RESET 0x00000001 #define SCSI_LOW_MSG_REJECT 0x00000002 #define SCSI_LOW_MSG_PARITY 0x00000004 #define SCSI_LOW_MSG_ERROR 0x00000008 #define SCSI_LOW_MSG_IDENTIFY 0x00000010 #define SCSI_LOW_MSG_ABORT 0x00000020 #define SCSI_LOW_MSG_TERMIO 0x00000040 #define SCSI_LOW_MSG_SIMPLE_QTAG 0x00000080 #define SCSI_LOW_MSG_ORDERED_QTAG 0x00000100 #define SCSI_LOW_MSG_HEAD_QTAG 0x00000200 #define SCSI_LOW_MSG_ABORT_QTAG 0x00000400 #define SCSI_LOW_MSG_CLEAR_QTAG 0x00000800 #define SCSI_LOW_MSG_WIDE 0x00001000 #define SCSI_LOW_MSG_SYNCH 0x00002000 #define SCSI_LOW_MSG_NOOP 0x00004000 #define SCSI_LOW_MSG_LAST 0x00008000 #define SCSI_LOW_MSG_ALL 0xffffffff /* msgout buffer */ u_int8_t ti_msgoutstr[SCSI_LOW_MAX_MSGLEN]; /* scsi msgout */ u_int ti_msgoutlen; /* msgout strlen */ /* * target initialize msgout */ u_int ti_setup_msg; /* setup msgout requests */ u_int ti_setup_msg_done; /* * synch and wide data info */ u_int ti_flags_valid; /* valid flags */ #define SCSI_LOW_TARG_FLAGS_USER_VALID 0x0001 #define SCSI_LOW_TARG_FLAGS_DISK_VALID 0x0002 #define SCSI_LOW_TARG_FLAGS_QUIRKS_VALID 0x0004 #define SCSI_LOW_TARG_FLAGS_ALL_VALID \ (SCSI_LOW_TARG_FLAGS_USER_VALID | \ SCSI_LOW_TARG_FLAGS_DISK_VALID | SCSI_LOW_TARG_FLAGS_QUIRKS_VALID) u_int ti_diskflags; /* given target disk flags */ u_int ti_quirks; /* given target quirk */ struct synch { u_int8_t offset; u_int8_t period; } ti_osynch, ti_maxsynch; /* synch data */ #define SCSI_LOW_BUS_WIDTH_8 0 #define SCSI_LOW_BUS_WIDTH_16 1 #define SCSI_LOW_BUS_WIDTH_32 2 u_int ti_owidth, ti_width; /* * lun info size. */ int ti_lunsize; #ifdef SCSI_LOW_DIAGNOSTIC struct scsi_low_msg_log ti_log_msgout; struct scsi_low_msg_log ti_log_msgin; #endif /* SCSI_LOW_DIAGNOSTIC */ }; /************************************************* * COMMON HEADER STRUCTURE *************************************************/ struct scsi_low_softc; struct proc; typedef struct scsi_low_softc *sc_low_t; #define SCSI_LOW_START_OK 0 #define SCSI_LOW_START_FAIL 1 #define SCSI_LOW_INFO_ALLOC 0 #define SCSI_LOW_INFO_REVOKE 1 #define SCSI_LOW_INFO_DEALLOC 2 #define SCSI_LOW_POWDOWN 1 #define SCSI_LOW_ENGAGE 2 #define SC_LOW_INIT_T (int (*)(sc_low_t, int)) #define SC_LOW_BUSRST_T (void (*)(sc_low_t)) #define SC_LOW_TARG_INIT_T (int (*)(sc_low_t, struct targ_info *, int)) #define SC_LOW_LUN_INIT_T (int (*)(sc_low_t, struct targ_info *, struct lun_info *, int)) #define SC_LOW_SELECT_T (int (*)(sc_low_t, struct slccb *)) #define SC_LOW_ATTEN_T (void (*)(sc_low_t)) #define SC_LOW_NEXUS_T (int (*)(sc_low_t)) #define SC_LOW_MSG_T (int (*)(sc_low_t, struct targ_info *, u_int)) #define SC_LOW_POLL_T (int (*)(void *)) #define SC_LOW_POWER_T (int (*)(sc_low_t, u_int)) #define SC_LOW_TIMEOUT_T (int (*)(sc_low_t)) struct scsi_low_funcs { int (*scsi_low_init)(sc_low_t, int); void (*scsi_low_bus_reset)(sc_low_t); int (*scsi_low_targ_init)(sc_low_t, struct targ_info *, int); int (*scsi_low_lun_init)(sc_low_t, struct targ_info *, struct lun_info *, int); int (*scsi_low_start_bus)(sc_low_t, struct slccb *); int (*scsi_low_establish_lun_nexus)(sc_low_t); int (*scsi_low_establish_ccb_nexus)(sc_low_t); void (*scsi_low_attention)(sc_low_t); int (*scsi_low_msg)(sc_low_t, struct targ_info *, u_int); int (*scsi_low_timeout)(sc_low_t); int (*scsi_low_poll)(void *); int (*scsi_low_power)(sc_low_t, u_int); int (*scsi_low_ioctl)(sc_low_t, u_long, caddr_t, int, struct proc *); }; struct scsi_low_softc { /* os depend structure */ struct scsi_low_osdep_interface sl_si; #define sl_dev sl_si.si_dev struct scsi_low_osdep_funcs *sl_osdep_fp; u_char sl_xname[16]; /* our chain */ LIST_ENTRY(scsi_low_softc) sl_chain; /* my targets */ struct targ_info *sl_ti[SCSI_LOW_NTARGETS]; struct targ_info_tab sl_titab; /* current active T_L_Q nexus */ struct targ_info *sl_Tnexus; /* Target nexus */ struct lun_info *sl_Lnexus; /* Lun nexus */ struct slccb *sl_Qnexus; /* Qtag nexus */ int sl_nexus_call; /* ccb start queue */ struct slccbtab sl_start; /* retry limit and phase change counter */ int sl_max_retry; int sl_ph_count; int sl_timeout_count; /* selection & total num disconnect targets */ int sl_nio; int sl_disc; int sl_retry_sel; struct slccb *sl_selid; /* attention */ int sl_atten; /* ATN asserted */ int sl_clear_atten; /* negate ATN required */ /* scsi phase suggested by scsi msg */ u_int sl_msgphase; #define MSGPH_NULL 0x00 /* no msg */ #define MSGPH_DISC 0x01 /* disconnect msg */ #define MSGPH_CMDC 0x02 /* cmd complete msg */ #define MSGPH_ABORT 0x03 /* abort seq */ #define MSGPH_TERM 0x04 /* current io terminate */ #define MSGPH_LCTERM 0x05 /* cmd link terminated */ #define MSGPH_RESET 0x06 /* reset target */ /* error */ u_int sl_error; /* error flags */ #define FATALIO 0x0001 /* generic io error & retry io */ #define ABORTIO 0x0002 /* generic io error & terminate io */ #define TIMEOUTIO 0x0004 /* watch dog timeout */ #define SELTIMEOUTIO 0x0008 /* selection timeout */ #define PDMAERR 0x0010 /* dma xfer error */ #define MSGERR 0x0020 /* msgsys error */ #define PARITYERR 0x0040 /* parity error */ #define BUSYERR 0x0080 /* target busy error */ #define STATERR 0x0100 /* status error */ #define UACAERR 0x0200 /* target CA state, no sense check */ #define SENSEIO 0x1000 /* cmd not excuted but sense data ok */ #define SENSEERR 0x2000 /* cmd not excuted and sense data bad */ #define UBFERR 0x4000 /* unexpected bus free */ #define PENDINGIO 0x8000 /* ccb start not yet */ #define SCSI_LOW_ERRORBITS "\020\017ubferr\016senseerr\015senseio\012uacaerr\011staterr\010busy\007parity\006msgerr\005pdmaerr\004seltimeout\003timeout\002abort\001fatal" /* current scsi data pointer */ struct sc_p sl_scp; /* power control */ u_int sl_active; /* host is busy state */ int sl_powc; /* power down timer counter */ u_int sl_rstep; /* resume step */ /* configuration flags */ u_int sl_flags; #define HW_POWDOWN 0x0001 #define HW_RESUME 0x0002 #define HW_PDMASTART 0x0004 #define HW_INACTIVE 0x0008 #define HW_POWERCTRL 0x0010 #define HW_INITIALIZING 0x0020 #define HW_READ_PADDING 0x1000 #define HW_WRITE_PADDING 0x2000 u_int sl_cfgflags; #define CFG_NODISC 0x0001 #define CFG_NOPARITY 0x0002 #define CFG_NOATTEN 0x0004 #define CFG_ASYNC 0x0008 #define CFG_NOQTAG 0x0010 int sl_show_result; #define SHOW_SYNCH_NEG 0x0001 #define SHOW_WIDE_NEG 0x0002 #define SHOW_CALCF_RES 0x0010 #define SHOW_PROBE_RES 0x0020 #define SHOW_ALL_NEG -1 /* host informations */ u_int sl_hostid; int sl_nluns; int sl_ntargs; int sl_openings; /* interface functions */ struct scsi_low_funcs *sl_funcs; /* targinfo size */ int sl_targsize; #if defined(i386) || defined(__i386__) u_int sl_irq; /* XXX */ #endif /* i386 */ }; /************************************************* * SCSI LOW service functions *************************************************/ /* * Scsi low attachment function. */ int scsi_low_attach(struct scsi_low_softc *, int, int, int, int, int); int scsi_low_dettach(struct scsi_low_softc *); /* * Scsi low interface activate or deactivate functions */ int scsi_low_is_busy(struct scsi_low_softc *); int scsi_low_activate(struct scsi_low_softc *); int scsi_low_deactivate(struct scsi_low_softc *); /* * Scsi phase "bus service" functions. * These functions are corresponding to each scsi bus phaeses. */ /* bus idle phase (other initiators or targets release bus) */ void scsi_low_bus_idle(struct scsi_low_softc *); /* arbitration and selection phase */ void scsi_low_arbit_fail(struct scsi_low_softc *, struct slccb *); static __inline void scsi_low_arbit_win(struct scsi_low_softc *); /* msgout phase */ #define SCSI_LOW_MSGOUT_INIT 0x00000001 #define SCSI_LOW_MSGOUT_UNIFY 0x00000002 int scsi_low_msgout(struct scsi_low_softc *, struct targ_info *, u_int); /* msgin phase */ #define SCSI_LOW_DATA_PE 0x80000000 int scsi_low_msgin(struct scsi_low_softc *, struct targ_info *, u_int); /* statusin phase */ static __inline int scsi_low_statusin(struct scsi_low_softc *, struct targ_info *, u_int); /* data phase */ int scsi_low_data(struct scsi_low_softc *, struct targ_info *, struct buf **, int); static __inline void scsi_low_data_finish(struct scsi_low_softc *); /* cmd phase */ int scsi_low_cmd(struct scsi_low_softc *, struct targ_info *); /* reselection phase */ struct targ_info *scsi_low_reselected(struct scsi_low_softc *, u_int); /* disconnection phase */ int scsi_low_disconnected(struct scsi_low_softc *, struct targ_info *); /* * Scsi bus restart function. * Canncel all established nexuses => scsi system initialized => restart jobs. */ #define SCSI_LOW_RESTART_HARD 1 #define SCSI_LOW_RESTART_SOFT 0 int scsi_low_restart(struct scsi_low_softc *, int, u_char *); /* * Scsi utility fucntions */ /* print current status */ void scsi_low_print(struct scsi_low_softc *, struct targ_info *); /* bus reset utility */ void scsi_low_bus_reset(struct scsi_low_softc *); /************************************************* * Message macro defs *************************************************/ #define SCSI_LOW_SETUP_PHASE(ti, phase) \ { \ (ti)->ti_ophase = ti->ti_phase; \ (ti)->ti_phase = (phase); \ } #define SCSI_LOW_SETUP_MSGPHASE(slp, PHASE) \ { \ (slp)->sl_msgphase = (PHASE); \ } #define SCSI_LOW_ASSERT_ATN(slp) \ { \ (slp)->sl_atten = 1; \ } #define SCSI_LOW_DEASSERT_ATN(slp) \ { \ (slp)->sl_atten = 0; \ } /************************************************* * Inline functions *************************************************/ static __inline void scsi_low_attention(struct scsi_low_softc *); static __inline int scsi_low_is_msgout_continue(struct targ_info *, u_int); static __inline int scsi_low_assert_msg(struct scsi_low_softc *, struct targ_info *, u_int, int); static __inline int scsi_low_is_disconnect_ok(struct slccb *); static __inline int scsi_low_is_msgout_continue(ti, mask) struct targ_info *ti; u_int mask; { return ((ti->ti_msgflags & (~mask)) != 0); } static __inline int scsi_low_is_disconnect_ok(cb) struct slccb *cb; { return ((cb->li->li_flags & SCSI_LOW_DISC) != 0 && (cb->ccb_flags & (CCB_SENSE | CCB_CLEARQ)) == 0); } static __inline void scsi_low_attention(slp) struct scsi_low_softc *slp; { if (slp->sl_atten != 0) return; (*slp->sl_funcs->scsi_low_attention) (slp); SCSI_LOW_ASSERT_ATN(slp); } static __inline int scsi_low_assert_msg(slp, ti, msg, now) struct scsi_low_softc *slp; struct targ_info *ti; u_int msg; int now; { ti->ti_msgflags |= msg; if (now != 0) scsi_low_attention(slp); return 0; } static __inline void scsi_low_arbit_win(slp) struct scsi_low_softc *slp; { slp->sl_selid = NULL; } static __inline void scsi_low_data_finish(slp) struct scsi_low_softc *slp; { if (slp->sl_Qnexus != NULL) { slp->sl_Qnexus->ccb_datalen = slp->sl_scp.scp_datalen; } } static __inline int scsi_low_statusin(slp, ti, c) struct scsi_low_softc *slp; struct targ_info *ti; u_int c; { slp->sl_ph_count ++; if ((c & SCSI_LOW_DATA_PE) != 0) { scsi_low_assert_msg(slp, ti, SCSI_LOW_MSG_ERROR, 0); return EIO; } slp->sl_scp.scp_status = (u_int8_t) c; return 0; } /************************************************* * Message out defs *************************************************/ /* XXX: use scsi_message.h */ #define ST_GOOD 0x00 #define ST_CHKCOND 0x02 #define ST_MET 0x04 #define ST_BUSY 0x08 #define ST_INTERGOOD 0x10 #define ST_INTERMET 0x14 #define ST_CONFLICT 0x18 #define ST_CMDTERM 0x22 #define ST_QUEFULL 0x28 #define ST_UNKNOWN 0xff #define MSG_COMP 0x00 #define MSG_EXTEND 0x01 #define MKMSG_EXTEND(XLEN, XCODE) ((((u_int)(XLEN)) << NBBY) | ((u_int)(XCODE))) #define MSG_EXTEND_MDPCODE 0x00 #define MSG_EXTEND_MDPLEN 0x05 #define MSG_EXTEND_SYNCHCODE 0x01 #define MSG_EXTEND_SYNCHLEN 0x03 #define MSG_EXTEND_WIDECODE 0x03 #define MSG_EXTEND_WIDELEN 0x02 #define MSG_SAVESP 0x02 #define MSG_RESTORESP 0x03 #define MSG_DISCON 0x04 #define MSG_I_ERROR 0x05 #define MSG_ABORT 0x06 #define MSG_REJECT 0x07 #define MSG_NOOP 0x08 #define MSG_PARITY 0x09 #define MSG_LCOMP 0x0a #define MSG_LCOMP_F 0x0b #define MSG_RESET 0x0c #define MSG_ABORT_QTAG 0x0d #define MSG_CLEAR_QTAG 0x0e #define MSG_TERM_IO 0x11 #define MSG_SIMPLE_QTAG 0x20 #define MSG_HEAD_QTAG 0x21 #define MSG_ORDERED_QTAG 0x22 #define MSG_IDENTIFY 0x80 #define MSG_IDENTIFY_DISCPRIV 0x40 #endif /* !_SCSI_LOW_H_ */ Index: stable/7/sys/compat/ndis/subr_ntoskrnl.c =================================================================== --- stable/7/sys/compat/ndis/subr_ntoskrnl.c (revision 177733) +++ stable/7/sys/compat/ndis/subr_ntoskrnl.c (revision 177734) @@ -1,4381 +1,4381 @@ /*- * Copyright (c) 2003 * Bill Paul . All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Bill Paul. * 4. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #if __FreeBSD_version > 502113 #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_debug, OID_AUTO, ntoskrnl_timers, CTLFLAG_RW, 0, 0, sysctl_show_timers, "I", "Show ntoskrnl timer stats"); #endif struct kdpc_queue { list_entry kq_disp; struct thread *kq_td; int kq_cpu; int kq_exit; int kq_running; kspin_lock kq_lock; nt_kevent kq_proc; nt_kevent kq_done; }; typedef struct kdpc_queue kdpc_queue; struct wb_ext { struct cv we_cv; struct thread *we_td; }; typedef struct wb_ext wb_ext; #define NTOSKRNL_TIMEOUTS 256 #ifdef NTOSKRNL_DEBUG_TIMERS static uint64_t ntoskrnl_timer_fires; static uint64_t ntoskrnl_timer_sets; static uint64_t ntoskrnl_timer_reloads; static uint64_t ntoskrnl_timer_cancels; #endif struct callout_entry { struct callout ce_callout; list_entry ce_list; }; typedef struct callout_entry callout_entry; static struct list_entry ntoskrnl_calllist; static struct mtx ntoskrnl_calllock; static struct list_entry ntoskrnl_intlist; static kspin_lock ntoskrnl_intlock; static uint8_t RtlEqualUnicodeString(unicode_string *, unicode_string *, uint8_t); static void RtlCopyUnicodeString(unicode_string *, unicode_string *); static irp *IoBuildSynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, nt_kevent *, io_status_block *); static irp *IoBuildAsynchronousFsdRequest(uint32_t, device_object *, void *, uint32_t, uint64_t *, io_status_block *); static irp *IoBuildDeviceIoControlRequest(uint32_t, device_object *, void *, uint32_t, void *, uint32_t, uint8_t, nt_kevent *, io_status_block *); static irp *IoAllocateIrp(uint8_t, uint8_t); static void IoReuseIrp(irp *, uint32_t); static void IoFreeIrp(irp *); static void IoInitializeIrp(irp *, uint16_t, uint8_t); static irp *IoMakeAssociatedIrp(irp *, uint8_t); static uint32_t KeWaitForMultipleObjects(uint32_t, nt_dispatch_header **, uint32_t, uint32_t, uint32_t, uint8_t, int64_t *, wait_block *); static void ntoskrnl_waittest(nt_dispatch_header *, uint32_t); static void ntoskrnl_satisfy_wait(nt_dispatch_header *, struct thread *); static void ntoskrnl_satisfy_multiple_waits(wait_block *); static int ntoskrnl_is_signalled(nt_dispatch_header *, struct thread *); static void ntoskrnl_insert_timer(ktimer *, int); static void ntoskrnl_remove_timer(ktimer *); #ifdef NTOSKRNL_DEBUG_TIMERS static void ntoskrnl_show_timers(void); #endif static void ntoskrnl_timercall(void *); static void ntoskrnl_dpc_thread(void *); static void ntoskrnl_destroy_dpc_threads(void); static void ntoskrnl_destroy_workitem_threads(void); static void ntoskrnl_workitem_thread(void *); static void ntoskrnl_workitem(device_object *, void *); static void ntoskrnl_unicode_to_ascii(uint16_t *, char *, int); static void ntoskrnl_ascii_to_unicode(char *, uint16_t *, int); static uint8_t ntoskrnl_insert_dpc(list_entry *, kdpc *); static void WRITE_REGISTER_USHORT(uint16_t *, uint16_t); static uint16_t READ_REGISTER_USHORT(uint16_t *); static void WRITE_REGISTER_ULONG(uint32_t *, uint32_t); static uint32_t READ_REGISTER_ULONG(uint32_t *); static void WRITE_REGISTER_UCHAR(uint8_t *, uint8_t); static uint8_t READ_REGISTER_UCHAR(uint8_t *); static int64_t _allmul(int64_t, int64_t); static int64_t _alldiv(int64_t, int64_t); static int64_t _allrem(int64_t, int64_t); static int64_t _allshr(int64_t, uint8_t); static int64_t _allshl(int64_t, uint8_t); static uint64_t _aullmul(uint64_t, uint64_t); static uint64_t _aulldiv(uint64_t, uint64_t); static uint64_t _aullrem(uint64_t, uint64_t); static uint64_t _aullshr(uint64_t, uint8_t); static uint64_t _aullshl(uint64_t, uint8_t); static slist_entry *ntoskrnl_pushsl(slist_header *, slist_entry *); static slist_entry *ntoskrnl_popsl(slist_header *); static void ExInitializePagedLookasideList(paged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeletePagedLookasideList(paged_lookaside_list *); static void ExInitializeNPagedLookasideList(npaged_lookaside_list *, lookaside_alloc_func *, lookaside_free_func *, uint32_t, size_t, uint32_t, uint16_t); static void ExDeleteNPagedLookasideList(npaged_lookaside_list *); static slist_entry *ExInterlockedPushEntrySList(slist_header *, slist_entry *, kspin_lock *); static slist_entry *ExInterlockedPopEntrySList(slist_header *, kspin_lock *); static uint32_t InterlockedIncrement(volatile uint32_t *); static uint32_t InterlockedDecrement(volatile uint32_t *); static void ExInterlockedAddLargeStatistic(uint64_t *, uint32_t); static void *MmAllocateContiguousMemory(uint32_t, uint64_t); static void *MmAllocateContiguousMemorySpecifyCache(uint32_t, uint64_t, uint64_t, uint64_t, uint32_t); static void MmFreeContiguousMemory(void *); static void MmFreeContiguousMemorySpecifyCache(void *, uint32_t, uint32_t); static uint32_t MmSizeOfMdl(void *, size_t); static void *MmMapLockedPages(mdl *, uint8_t); static void *MmMapLockedPagesSpecifyCache(mdl *, uint8_t, uint32_t, void *, uint32_t, uint32_t); static void MmUnmapLockedPages(void *, mdl *); static uint8_t MmIsAddressValid(void *); static device_t ntoskrnl_finddev(device_t, uint64_t, struct resource **); static void RtlZeroMemory(void *, size_t); static void RtlCopyMemory(void *, const void *, size_t); static size_t RtlCompareMemory(const void *, const void *, size_t); static ndis_status RtlUnicodeStringToInteger(unicode_string *, uint32_t, uint32_t *); static int atoi (const char *); static long atol (const char *); static int rand(void); static void srand(unsigned int); static void ntoskrnl_time(uint64_t *); static uint8_t IoIsWdmVersionAvailable(uint8_t, uint8_t); static void ntoskrnl_thrfunc(void *); static ndis_status PsCreateSystemThread(ndis_handle *, uint32_t, void *, ndis_handle, void *, void *, void *); static ndis_status PsTerminateSystemThread(ndis_status); static ndis_status IoGetDeviceProperty(device_object *, uint32_t, uint32_t, void *, uint32_t *); static void KeInitializeMutex(kmutant *, uint32_t); static uint32_t KeReleaseMutex(kmutant *, uint8_t); static uint32_t KeReadStateMutex(kmutant *); static ndis_status ObReferenceObjectByHandle(ndis_handle, uint32_t, void *, uint8_t, void **, void **); static void ObfDereferenceObject(void *); static uint32_t ZwClose(ndis_handle); static uint32_t WmiQueryTraceInformation(uint32_t, void *, uint32_t, uint32_t, void *); static uint32_t WmiTraceMessage(uint64_t, uint32_t, void *, uint16_t, ...); static uint32_t IoWMIRegistrationControl(device_object *, uint32_t); static void *ntoskrnl_memset(void *, int, size_t); static void *ntoskrnl_memmove(void *, void *, size_t); static void *ntoskrnl_memchr(void *, unsigned char, size_t); static char *ntoskrnl_strstr(char *, char *); static int ntoskrnl_toupper(int); static int ntoskrnl_tolower(int); static funcptr ntoskrnl_findwrap(funcptr); static uint32_t DbgPrint(char *, ...); static void DbgBreakPoint(void); static void dummy(void); static struct mtx ntoskrnl_dispatchlock; static struct mtx ntoskrnl_interlock; static kspin_lock ntoskrnl_cancellock; static int ntoskrnl_kth = 0; static struct nt_objref_head ntoskrnl_reflist; static uma_zone_t mdl_zone; static uma_zone_t iw_zone; static struct kdpc_queue *kq_queues; static struct kdpc_queue *wq_queues; static int wq_idx = 0; int ntoskrnl_libinit() { image_patch_table *patch; int error; struct proc *p; kdpc_queue *kq; callout_entry *e; int i; char name[64]; mtx_init(&ntoskrnl_dispatchlock, "ntoskrnl dispatch lock", MTX_NDIS_LOCK, MTX_DEF|MTX_RECURSE); mtx_init(&ntoskrnl_interlock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); KeInitializeSpinLock(&ntoskrnl_cancellock); KeInitializeSpinLock(&ntoskrnl_intlock); TAILQ_INIT(&ntoskrnl_reflist); InitializeListHead(&ntoskrnl_calllist); InitializeListHead(&ntoskrnl_intlist); mtx_init(&ntoskrnl_calllock, MTX_NTOSKRNL_SPIN_LOCK, NULL, MTX_SPIN); kq_queues = ExAllocatePoolWithTag(NonPagedPool, #ifdef NTOSKRNL_MULTIPLE_DPCS sizeof(kdpc_queue) * mp_ncpus, 0); #else sizeof(kdpc_queue), 0); #endif if (kq_queues == NULL) return(ENOMEM); wq_queues = ExAllocatePoolWithTag(NonPagedPool, sizeof(kdpc_queue) * WORKITEM_THREADS, 0); if (wq_queues == NULL) return(ENOMEM); #ifdef NTOSKRNL_MULTIPLE_DPCS bzero((char *)kq_queues, sizeof(kdpc_queue) * mp_ncpus); #else bzero((char *)kq_queues, sizeof(kdpc_queue)); #endif bzero((char *)wq_queues, sizeof(kdpc_queue) * WORKITEM_THREADS); /* * Launch the DPC threads. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; kq->kq_cpu = i; sprintf(name, "Windows DPC %d", i); error = kthread_create(ntoskrnl_dpc_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, name); if (error) panic("failed to launch DPC thread"); } /* * Launch the workitem threads. */ for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; sprintf(name, "Windows Workitem %d", i); error = kthread_create(ntoskrnl_workitem_thread, kq, &p, RFHIGHPID, NDIS_KSTACK_PAGES, name); if (error) panic("failed to launch workitem thread"); } patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_wrap((funcptr)patch->ipt_func, (funcptr *)&patch->ipt_wrap, patch->ipt_argcnt, patch->ipt_ftype); patch++; } for (i = 0; i < NTOSKRNL_TIMEOUTS; i++) { e = ExAllocatePoolWithTag(NonPagedPool, sizeof(callout_entry), 0); if (e == NULL) panic("failed to allocate timeouts"); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); } /* * MDLs are supposed to be variable size (they describe * buffers containing some number of pages, but we don't * know ahead of time how many pages that will be). But * always allocating them off the heap is very slow. As * a compromise, we create an MDL UMA zone big enough to * handle any buffer requiring up to 16 pages, and we * use those for any MDLs for buffers of 16 pages or less * in size. For buffers larger than that (which we assume * will be few and far between, we allocate the MDLs off * the heap. */ mdl_zone = uma_zcreate("Windows MDL", MDL_ZONE_SIZE, NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); iw_zone = uma_zcreate("Windows WorkItem", sizeof(io_workitem), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); return(0); } int ntoskrnl_libfini() { image_patch_table *patch; callout_entry *e; list_entry *l; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { windrv_unwrap(patch->ipt_wrap); patch++; } /* Stop the workitem queues. */ ntoskrnl_destroy_workitem_threads(); /* Stop the DPC queues. */ ntoskrnl_destroy_dpc_threads(); ExFreePool(kq_queues); ExFreePool(wq_queues); uma_zdestroy(mdl_zone); uma_zdestroy(iw_zone); mtx_lock_spin(&ntoskrnl_calllock); while(!IsListEmpty(&ntoskrnl_calllist)) { l = RemoveHeadList(&ntoskrnl_calllist); e = CONTAINING_RECORD(l, callout_entry, ce_list); mtx_unlock_spin(&ntoskrnl_calllock); ExFreePool(e); mtx_lock_spin(&ntoskrnl_calllock); } mtx_unlock_spin(&ntoskrnl_calllock); mtx_destroy(&ntoskrnl_dispatchlock); mtx_destroy(&ntoskrnl_interlock); mtx_destroy(&ntoskrnl_calllock); return(0); } /* * We need to be able to reference this externally from the wrapper; * GCC only generates a local implementation of memset. */ static void * ntoskrnl_memset(buf, ch, size) void *buf; int ch; size_t size; { return(memset(buf, ch, size)); } static void * ntoskrnl_memmove(dst, src, size) void *src; void *dst; size_t size; { bcopy(src, dst, size); return(dst); } static void * ntoskrnl_memchr(buf, ch, len) void *buf; unsigned char ch; size_t len; { if (len != 0) { unsigned char *p = buf; do { if (*p++ == ch) return (p - 1); } while (--len != 0); } return (NULL); } static char * ntoskrnl_strstr(s, find) char *s, *find; { char c, sc; size_t len; if ((c = *find++) != 0) { len = strlen(find); do { do { if ((sc = *s++) == 0) return (NULL); } while (sc != c); } while (strncmp(s, find, len) != 0); s--; } return ((char *)s); } static int ntoskrnl_toupper(c) int c; { return(toupper(c)); } static int ntoskrnl_tolower(c) int c; { return(tolower(c)); } static uint8_t RtlEqualUnicodeString(str1, str2, caseinsensitive) unicode_string *str1; unicode_string *str2; uint8_t caseinsensitive; { int i; if (str1->us_len != str2->us_len) return(FALSE); for (i = 0; i < str1->us_len; i++) { if (caseinsensitive == TRUE) { if (toupper((char)(str1->us_buf[i] & 0xFF)) != toupper((char)(str2->us_buf[i] & 0xFF))) return(FALSE); } else { if (str1->us_buf[i] != str2->us_buf[i]) return(FALSE); } } return(TRUE); } static void RtlCopyUnicodeString(dest, src) unicode_string *dest; unicode_string *src; { if (dest->us_maxlen >= src->us_len) dest->us_len = src->us_len; else dest->us_len = dest->us_maxlen; memcpy(dest->us_buf, src->us_buf, dest->us_len); return; } static void ntoskrnl_ascii_to_unicode(ascii, unicode, len) char *ascii; uint16_t *unicode; int len; { int i; uint16_t *ustr; ustr = unicode; for (i = 0; i < len; i++) { *ustr = (uint16_t)ascii[i]; ustr++; } return; } static void ntoskrnl_unicode_to_ascii(unicode, ascii, len) uint16_t *unicode; char *ascii; int len; { int i; uint8_t *astr; astr = ascii; for (i = 0; i < len / 2; i++) { *astr = (uint8_t)unicode[i]; astr++; } return; } uint32_t RtlUnicodeStringToAnsiString(dest, src, allocate) ansi_string *dest; unicode_string *src; uint8_t allocate; { if (dest == NULL || src == NULL) return(STATUS_INVALID_PARAMETER); dest->as_len = src->us_len / 2; if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; if (allocate == TRUE) { dest->as_buf = ExAllocatePoolWithTag(NonPagedPool, (src->us_len / 2) + 1, 0); if (dest->as_buf == NULL) return(STATUS_INSUFFICIENT_RESOURCES); dest->as_len = dest->as_maxlen = src->us_len / 2; } else { dest->as_len = src->us_len / 2; /* XXX */ if (dest->as_maxlen < dest->as_len) dest->as_len = dest->as_maxlen; } ntoskrnl_unicode_to_ascii(src->us_buf, dest->as_buf, dest->as_len * 2); return (STATUS_SUCCESS); } uint32_t RtlAnsiStringToUnicodeString(dest, src, allocate) unicode_string *dest; ansi_string *src; uint8_t allocate; { if (dest == NULL || src == NULL) return(STATUS_INVALID_PARAMETER); if (allocate == TRUE) { dest->us_buf = ExAllocatePoolWithTag(NonPagedPool, src->as_len * 2, 0); if (dest->us_buf == NULL) return(STATUS_INSUFFICIENT_RESOURCES); dest->us_len = dest->us_maxlen = strlen(src->as_buf) * 2; } else { dest->us_len = src->as_len * 2; /* XXX */ if (dest->us_maxlen < dest->us_len) dest->us_len = dest->us_maxlen; } ntoskrnl_ascii_to_unicode(src->as_buf, dest->us_buf, dest->us_len / 2); return (STATUS_SUCCESS); } void * ExAllocatePoolWithTag(pooltype, len, tag) uint32_t pooltype; size_t len; uint32_t tag; { void *buf; buf = malloc(len, M_DEVBUF, M_NOWAIT|M_ZERO); if (buf == NULL) return(NULL); return(buf); } void ExFreePool(buf) void *buf; { free(buf, M_DEVBUF); return; } uint32_t IoAllocateDriverObjectExtension(drv, clid, extlen, ext) driver_object *drv; void *clid; uint32_t extlen; void **ext; { custom_extension *ce; ce = ExAllocatePoolWithTag(NonPagedPool, sizeof(custom_extension) + extlen, 0); if (ce == NULL) return(STATUS_INSUFFICIENT_RESOURCES); ce->ce_clid = clid; InsertTailList((&drv->dro_driverext->dre_usrext), (&ce->ce_list)); *ext = (void *)(ce + 1); return(STATUS_SUCCESS); } void * IoGetDriverObjectExtension(drv, clid) driver_object *drv; void *clid; { list_entry *e; custom_extension *ce; /* * Sanity check. Our dummy bus drivers don't have * any driver extentions. */ if (drv->dro_driverext == NULL) return(NULL); e = drv->dro_driverext->dre_usrext.nle_flink; while (e != &drv->dro_driverext->dre_usrext) { ce = (custom_extension *)e; if (ce->ce_clid == clid) return((void *)(ce + 1)); e = e->nle_flink; } return(NULL); } uint32_t IoCreateDevice(drv, devextlen, devname, devtype, devchars, exclusive, newdev) driver_object *drv; uint32_t devextlen; unicode_string *devname; uint32_t devtype; uint32_t devchars; uint8_t exclusive; device_object **newdev; { device_object *dev; dev = ExAllocatePoolWithTag(NonPagedPool, sizeof(device_object), 0); if (dev == NULL) return(STATUS_INSUFFICIENT_RESOURCES); dev->do_type = devtype; dev->do_drvobj = drv; dev->do_currirp = NULL; dev->do_flags = 0; if (devextlen) { dev->do_devext = ExAllocatePoolWithTag(NonPagedPool, devextlen, 0); if (dev->do_devext == NULL) { ExFreePool(dev); return(STATUS_INSUFFICIENT_RESOURCES); } bzero(dev->do_devext, devextlen); } else dev->do_devext = NULL; dev->do_size = sizeof(device_object) + devextlen; dev->do_refcnt = 1; dev->do_attacheddev = NULL; dev->do_nextdev = NULL; dev->do_devtype = devtype; dev->do_stacksize = 1; dev->do_alignreq = 1; dev->do_characteristics = devchars; dev->do_iotimer = NULL; KeInitializeEvent(&dev->do_devlock, EVENT_TYPE_SYNC, TRUE); /* * Vpd is used for disk/tape devices, * but we don't support those. (Yet.) */ dev->do_vpb = NULL; dev->do_devobj_ext = ExAllocatePoolWithTag(NonPagedPool, sizeof(devobj_extension), 0); if (dev->do_devobj_ext == NULL) { if (dev->do_devext != NULL) ExFreePool(dev->do_devext); ExFreePool(dev); return(STATUS_INSUFFICIENT_RESOURCES); } dev->do_devobj_ext->dve_type = 0; dev->do_devobj_ext->dve_size = sizeof(devobj_extension); dev->do_devobj_ext->dve_devobj = dev; /* * Attach this device to the driver object's list * of devices. Note: this is not the same as attaching * the device to the device stack. The driver's AddDevice * routine must explicitly call IoAddDeviceToDeviceStack() * to do that. */ if (drv->dro_devobj == NULL) { drv->dro_devobj = dev; dev->do_nextdev = NULL; } else { dev->do_nextdev = drv->dro_devobj; drv->dro_devobj = dev; } *newdev = dev; return(STATUS_SUCCESS); } void IoDeleteDevice(dev) device_object *dev; { device_object *prev; if (dev == NULL) return; if (dev->do_devobj_ext != NULL) ExFreePool(dev->do_devobj_ext); if (dev->do_devext != NULL) ExFreePool(dev->do_devext); /* Unlink the device from the driver's device list. */ prev = dev->do_drvobj->dro_devobj; if (prev == dev) dev->do_drvobj->dro_devobj = dev->do_nextdev; else { while (prev->do_nextdev != dev) prev = prev->do_nextdev; prev->do_nextdev = dev->do_nextdev; } ExFreePool(dev); return; } device_object * IoGetAttachedDevice(dev) device_object *dev; { device_object *d; if (dev == NULL) return (NULL); d = dev; while (d->do_attacheddev != NULL) d = d->do_attacheddev; return (d); } static irp * IoBuildSynchronousFsdRequest(func, dobj, buf, len, off, event, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; nt_kevent *event; io_status_block *status; { irp *ip; ip = IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status); if (ip == NULL) return(NULL); ip->irp_usrevent = event; return(ip); } static irp * IoBuildAsynchronousFsdRequest(func, dobj, buf, len, off, status) uint32_t func; device_object *dobj; void *buf; uint32_t len; uint64_t *off; io_status_block *status; { irp *ip; io_stack_location *sl; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return(NULL); ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = func; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; ip->irp_userbuf = buf; if (dobj->do_flags & DO_BUFFERED_IO) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, len, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return(NULL); } bcopy(buf, ip->irp_assoc.irp_sysbuf, len); } if (dobj->do_flags & DO_DIRECT_IO) { ip->irp_mdl = IoAllocateMdl(buf, len, FALSE, FALSE, ip); if (ip->irp_mdl == NULL) { if (ip->irp_assoc.irp_sysbuf != NULL) ExFreePool(ip->irp_assoc.irp_sysbuf); IoFreeIrp(ip); return(NULL); } ip->irp_userbuf = NULL; ip->irp_assoc.irp_sysbuf = NULL; } if (func == IRP_MJ_READ) { sl->isl_parameters.isl_read.isl_len = len; if (off != NULL) sl->isl_parameters.isl_read.isl_byteoff = *off; else sl->isl_parameters.isl_read.isl_byteoff = 0; } if (func == IRP_MJ_WRITE) { sl->isl_parameters.isl_write.isl_len = len; if (off != NULL) sl->isl_parameters.isl_write.isl_byteoff = *off; else sl->isl_parameters.isl_write.isl_byteoff = 0; } return(ip); } static irp * IoBuildDeviceIoControlRequest(iocode, dobj, ibuf, ilen, obuf, olen, isinternal, event, status) uint32_t iocode; device_object *dobj; void *ibuf; uint32_t ilen; void *obuf; uint32_t olen; uint8_t isinternal; nt_kevent *event; io_status_block *status; { irp *ip; io_stack_location *sl; uint32_t buflen; ip = IoAllocateIrp(dobj->do_stacksize, TRUE); if (ip == NULL) return(NULL); ip->irp_usrevent = event; ip->irp_usriostat = status; ip->irp_tail.irp_overlay.irp_thread = NULL; sl = IoGetNextIrpStackLocation(ip); sl->isl_major = isinternal == TRUE ? IRP_MJ_INTERNAL_DEVICE_CONTROL : IRP_MJ_DEVICE_CONTROL; sl->isl_minor = 0; sl->isl_flags = 0; sl->isl_ctl = 0; sl->isl_devobj = dobj; sl->isl_fileobj = NULL; sl->isl_completionfunc = NULL; sl->isl_parameters.isl_ioctl.isl_iocode = iocode; sl->isl_parameters.isl_ioctl.isl_ibuflen = ilen; sl->isl_parameters.isl_ioctl.isl_obuflen = olen; switch(IO_METHOD(iocode)) { case METHOD_BUFFERED: if (ilen > olen) buflen = ilen; else buflen = olen; if (buflen) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, buflen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return(NULL); } } if (ilen && ibuf != NULL) { bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); bzero((char *)ip->irp_assoc.irp_sysbuf + ilen, buflen - ilen); } else bzero(ip->irp_assoc.irp_sysbuf, ilen); ip->irp_userbuf = obuf; break; case METHOD_IN_DIRECT: case METHOD_OUT_DIRECT: if (ilen && ibuf != NULL) { ip->irp_assoc.irp_sysbuf = ExAllocatePoolWithTag(NonPagedPool, ilen, 0); if (ip->irp_assoc.irp_sysbuf == NULL) { IoFreeIrp(ip); return(NULL); } bcopy(ibuf, ip->irp_assoc.irp_sysbuf, ilen); } if (olen && obuf != NULL) { ip->irp_mdl = IoAllocateMdl(obuf, olen, FALSE, FALSE, ip); /* * Normally we would MmProbeAndLockPages() * here, but we don't have to in our * imlementation. */ } break; case METHOD_NEITHER: ip->irp_userbuf = obuf; sl->isl_parameters.isl_ioctl.isl_type3ibuf = ibuf; break; default: break; } /* * Ideally, we should associate this IRP with the calling * thread here. */ return (ip); } static irp * IoAllocateIrp(stsize, chargequota) uint8_t stsize; uint8_t chargequota; { irp *i; i = ExAllocatePoolWithTag(NonPagedPool, IoSizeOfIrp(stsize), 0); if (i == NULL) return (NULL); IoInitializeIrp(i, IoSizeOfIrp(stsize), stsize); return (i); } static irp * IoMakeAssociatedIrp(ip, stsize) irp *ip; uint8_t stsize; { irp *associrp; associrp = IoAllocateIrp(stsize, FALSE); if (associrp == NULL) return(NULL); mtx_lock(&ntoskrnl_dispatchlock); associrp->irp_flags |= IRP_ASSOCIATED_IRP; associrp->irp_tail.irp_overlay.irp_thread = ip->irp_tail.irp_overlay.irp_thread; associrp->irp_assoc.irp_master = ip; mtx_unlock(&ntoskrnl_dispatchlock); return(associrp); } static void IoFreeIrp(ip) irp *ip; { ExFreePool(ip); return; } static void IoInitializeIrp(io, psize, ssize) irp *io; uint16_t psize; uint8_t ssize; { bzero((char *)io, IoSizeOfIrp(ssize)); io->irp_size = psize; io->irp_stackcnt = ssize; io->irp_currentstackloc = ssize; InitializeListHead(&io->irp_thlist); io->irp_tail.irp_overlay.irp_csl = (io_stack_location *)(io + 1) + ssize; return; } static void IoReuseIrp(ip, status) irp *ip; uint32_t status; { uint8_t allocflags; allocflags = ip->irp_allocflags; IoInitializeIrp(ip, ip->irp_size, ip->irp_stackcnt); ip->irp_iostat.isb_status = status; ip->irp_allocflags = allocflags; return; } void IoAcquireCancelSpinLock(irql) uint8_t *irql; { KeAcquireSpinLock(&ntoskrnl_cancellock, irql); return; } void IoReleaseCancelSpinLock(irql) uint8_t irql; { KeReleaseSpinLock(&ntoskrnl_cancellock, irql); return; } uint8_t IoCancelIrp(irp *ip) { cancel_func cfunc; IoAcquireCancelSpinLock(&ip->irp_cancelirql); cfunc = IoSetCancelRoutine(ip, NULL); ip->irp_cancel = TRUE; if (ip->irp_cancelfunc == NULL) { IoReleaseCancelSpinLock(ip->irp_cancelirql); return(FALSE); } MSCALL2(cfunc, IoGetCurrentIrpStackLocation(ip)->isl_devobj, ip); return(TRUE); } uint32_t IofCallDriver(dobj, ip) device_object *dobj; irp *ip; { driver_object *drvobj; io_stack_location *sl; uint32_t status; driver_dispatch disp; drvobj = dobj->do_drvobj; if (ip->irp_currentstackloc <= 0) panic("IoCallDriver(): out of stack locations"); IoSetNextIrpStackLocation(ip); sl = IoGetCurrentIrpStackLocation(ip); sl->isl_devobj = dobj; disp = drvobj->dro_dispatch[sl->isl_major]; status = MSCALL2(disp, dobj, ip); return(status); } void IofCompleteRequest(ip, prioboost) irp *ip; uint8_t prioboost; { uint32_t i; uint32_t status; device_object *dobj; io_stack_location *sl; completion_func cf; ip->irp_pendingreturned = IoGetCurrentIrpStackLocation(ip)->isl_ctl & SL_PENDING_RETURNED; sl = (io_stack_location *)(ip + 1); for (i = ip->irp_currentstackloc; i < (uint32_t)ip->irp_stackcnt; i++) { if (ip->irp_currentstackloc < ip->irp_stackcnt - 1) { IoSkipCurrentIrpStackLocation(ip); dobj = IoGetCurrentIrpStackLocation(ip)->isl_devobj; } else dobj = NULL; if (sl[i].isl_completionfunc != NULL && ((ip->irp_iostat.isb_status == STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_SUCCESS) || (ip->irp_iostat.isb_status != STATUS_SUCCESS && sl->isl_ctl & SL_INVOKE_ON_ERROR) || (ip->irp_cancel == TRUE && sl->isl_ctl & SL_INVOKE_ON_CANCEL))) { cf = sl->isl_completionfunc; status = MSCALL3(cf, dobj, ip, sl->isl_completionctx); if (status == STATUS_MORE_PROCESSING_REQUIRED) return; } if (IoGetCurrentIrpStackLocation(ip)->isl_ctl & SL_PENDING_RETURNED) ip->irp_pendingreturned = TRUE; } /* Handle any associated IRPs. */ if (ip->irp_flags & IRP_ASSOCIATED_IRP) { uint32_t masterirpcnt; irp *masterirp; mdl *m; masterirp = ip->irp_assoc.irp_master; masterirpcnt = InterlockedDecrement(&masterirp->irp_assoc.irp_irpcnt); while ((m = ip->irp_mdl) != NULL) { ip->irp_mdl = m->mdl_next; IoFreeMdl(m); } IoFreeIrp(ip); if (masterirpcnt == 0) IoCompleteRequest(masterirp, IO_NO_INCREMENT); return; } /* With any luck, these conditions will never arise. */ if (ip->irp_flags & (IRP_PAGING_IO|IRP_CLOSE_OPERATION)) { if (ip->irp_usriostat != NULL) *ip->irp_usriostat = ip->irp_iostat; if (ip->irp_usrevent != NULL) KeSetEvent(ip->irp_usrevent, prioboost, FALSE); if (ip->irp_flags & IRP_PAGING_IO) { if (ip->irp_mdl != NULL) IoFreeMdl(ip->irp_mdl); IoFreeIrp(ip); } } return; } void ntoskrnl_intr(arg) void *arg; { kinterrupt *iobj; uint8_t irql; uint8_t claimed; list_entry *l; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); l = ntoskrnl_intlist.nle_flink; while (l != &ntoskrnl_intlist) { iobj = CONTAINING_RECORD(l, kinterrupt, ki_list); claimed = MSCALL2(iobj->ki_svcfunc, iobj, iobj->ki_svcctx); if (claimed == TRUE) break; l = l->nle_flink; } KeReleaseSpinLock(&ntoskrnl_intlock, irql); return; } uint8_t KeAcquireInterruptSpinLock(iobj) kinterrupt *iobj; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); return(irql); } void KeReleaseInterruptSpinLock(iobj, irql) kinterrupt *iobj; uint8_t irql; { KeReleaseSpinLock(&ntoskrnl_intlock, irql); return; } uint8_t KeSynchronizeExecution(iobj, syncfunc, syncctx) kinterrupt *iobj; void *syncfunc; void *syncctx; { uint8_t irql; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); MSCALL1(syncfunc, syncctx); KeReleaseSpinLock(&ntoskrnl_intlock, irql); return(TRUE); } /* * IoConnectInterrupt() is passed only the interrupt vector and * irql that a device wants to use, but no device-specific tag * of any kind. This conflicts rather badly with FreeBSD's * bus_setup_intr(), which needs the device_t for the device * requesting interrupt delivery. In order to bypass this * inconsistency, we implement a second level of interrupt * dispatching on top of bus_setup_intr(). All devices use * ntoskrnl_intr() as their ISR, and any device requesting * interrupts will be registered with ntoskrnl_intr()'s interrupt * dispatch list. When an interrupt arrives, we walk the list * and invoke all the registered ISRs. This effectively makes all * interrupts shared, but it's the only way to duplicate the * semantics of IoConnectInterrupt() and IoDisconnectInterrupt() properly. */ uint32_t IoConnectInterrupt(iobj, svcfunc, svcctx, lock, vector, irql, syncirql, imode, shared, affinity, savefloat) kinterrupt **iobj; void *svcfunc; void *svcctx; uint32_t vector; kspin_lock *lock; uint8_t irql; uint8_t syncirql; uint8_t imode; uint8_t shared; uint32_t affinity; uint8_t savefloat; { uint8_t curirql; *iobj = ExAllocatePoolWithTag(NonPagedPool, sizeof(kinterrupt), 0); if (*iobj == NULL) return(STATUS_INSUFFICIENT_RESOURCES); (*iobj)->ki_svcfunc = svcfunc; (*iobj)->ki_svcctx = svcctx; if (lock == NULL) { KeInitializeSpinLock(&(*iobj)->ki_lock_priv); (*iobj)->ki_lock = &(*iobj)->ki_lock_priv; } else (*iobj)->ki_lock = lock; KeAcquireSpinLock(&ntoskrnl_intlock, &curirql); InsertHeadList((&ntoskrnl_intlist), (&(*iobj)->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, curirql); return(STATUS_SUCCESS); } void IoDisconnectInterrupt(iobj) kinterrupt *iobj; { uint8_t irql; if (iobj == NULL) return; KeAcquireSpinLock(&ntoskrnl_intlock, &irql); RemoveEntryList((&iobj->ki_list)); KeReleaseSpinLock(&ntoskrnl_intlock, irql); ExFreePool(iobj); return; } device_object * IoAttachDeviceToDeviceStack(src, dst) device_object *src; device_object *dst; { device_object *attached; mtx_lock(&ntoskrnl_dispatchlock); attached = IoGetAttachedDevice(dst); attached->do_attacheddev = src; src->do_attacheddev = NULL; src->do_stacksize = attached->do_stacksize + 1; mtx_unlock(&ntoskrnl_dispatchlock); return(attached); } void IoDetachDevice(topdev) device_object *topdev; { device_object *tail; mtx_lock(&ntoskrnl_dispatchlock); /* First, break the chain. */ tail = topdev->do_attacheddev; if (tail == NULL) { mtx_unlock(&ntoskrnl_dispatchlock); return; } topdev->do_attacheddev = tail->do_attacheddev; topdev->do_refcnt--; /* Now reduce the stacksize count for the takm_il objects. */ tail = topdev->do_attacheddev; while (tail != NULL) { tail->do_stacksize--; tail = tail->do_attacheddev; } mtx_unlock(&ntoskrnl_dispatchlock); return; } /* * For the most part, an object is considered signalled if * dh_sigstate == TRUE. The exception is for mutant objects * (mutexes), where the logic works like this: * * - If the thread already owns the object and sigstate is * less than or equal to 0, then the object is considered * signalled (recursive acquisition). * - If dh_sigstate == 1, the object is also considered * signalled. */ static int ntoskrnl_is_signalled(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; if (obj->dh_type == DISP_TYPE_MUTANT) { km = (kmutant *)obj; if ((obj->dh_sigstate <= 0 && km->km_ownerthread == td) || obj->dh_sigstate == 1) return(TRUE); return(FALSE); } if (obj->dh_sigstate > 0) return(TRUE); return(FALSE); } static void ntoskrnl_satisfy_wait(obj, td) nt_dispatch_header *obj; struct thread *td; { kmutant *km; switch (obj->dh_type) { case DISP_TYPE_MUTANT: km = (struct kmutant *)obj; obj->dh_sigstate--; /* * If sigstate reaches 0, the mutex is now * non-signalled (the new thread owns it). */ if (obj->dh_sigstate == 0) { km->km_ownerthread = td; if (km->km_abandoned == TRUE) km->km_abandoned = FALSE; } break; /* Synchronization objects get reset to unsignalled. */ case DISP_TYPE_SYNCHRONIZATION_EVENT: case DISP_TYPE_SYNCHRONIZATION_TIMER: obj->dh_sigstate = 0; break; case DISP_TYPE_SEMAPHORE: obj->dh_sigstate--; break; default: break; } return; } static void ntoskrnl_satisfy_multiple_waits(wb) wait_block *wb; { wait_block *cur; struct thread *td; cur = wb; td = wb->wb_kthread; do { ntoskrnl_satisfy_wait(wb->wb_object, td); cur->wb_awakened = TRUE; cur = cur->wb_next; } while (cur != wb); return; } /* Always called with dispatcher lock held. */ static void ntoskrnl_waittest(obj, increment) nt_dispatch_header *obj; uint32_t increment; { wait_block *w, *next; list_entry *e; struct thread *td; wb_ext *we; int satisfied; /* * Once an object has been signalled, we walk its list of * wait blocks. If a wait block can be awakened, then satisfy * waits as necessary and wake the thread. * * The rules work like this: * * If a wait block is marked as WAITTYPE_ANY, then * we can satisfy the wait conditions on the current * object and wake the thread right away. Satisfying * the wait also has the effect of breaking us out * of the search loop. * * If the object is marked as WAITTYLE_ALL, then the * wait block will be part of a circularly linked * list of wait blocks belonging to a waiting thread * that's sleeping in KeWaitForMultipleObjects(). In * order to wake the thread, all the objects in the * wait list must be in the signalled state. If they * are, we then satisfy all of them and wake the * thread. * */ e = obj->dh_waitlisthead.nle_flink; while (e != &obj->dh_waitlisthead && obj->dh_sigstate > 0) { w = CONTAINING_RECORD(e, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; satisfied = FALSE; if (w->wb_waittype == WAITTYPE_ANY) { /* * Thread can be awakened if * any wait is satisfied. */ ntoskrnl_satisfy_wait(obj, td); satisfied = TRUE; w->wb_awakened = TRUE; } else { /* * Thread can only be woken up * if all waits are satisfied. * If the thread is waiting on multiple * objects, they should all be linked * through the wb_next pointers in the * wait blocks. */ satisfied = TRUE; next = w->wb_next; while (next != w) { if (ntoskrnl_is_signalled(obj, td) == FALSE) { satisfied = FALSE; break; } next = next->wb_next; } ntoskrnl_satisfy_multiple_waits(w); } if (satisfied == TRUE) cv_broadcastpri(&we->we_cv, w->wb_oldpri - (increment * 4)); e = e->nle_flink; } return; } static void ntoskrnl_time(tval) uint64_t *tval; { struct timespec ts; nanotime(&ts); *tval = (uint64_t)ts.tv_nsec / 100 + (uint64_t)ts.tv_sec * 10000000 + 11644473600; return; } /* * KeWaitForSingleObject() is a tricky beast, because it can be used * with several different object types: semaphores, timers, events, * mutexes and threads. Semaphores don't appear very often, but the * other object types are quite common. KeWaitForSingleObject() is * what's normally used to acquire a mutex, and it can be used to * wait for a thread termination. * * The Windows NDIS API is implemented in terms of Windows kernel * primitives, and some of the object manipulation is duplicated in * NDIS. For example, NDIS has timers and events, which are actually * Windows kevents and ktimers. Now, you're supposed to only use the * NDIS variants of these objects within the confines of the NDIS API, * but there are some naughty developers out there who will use * KeWaitForSingleObject() on NDIS timer and event objects, so we * have to support that as well. Conseqently, our NDIS timer and event * code has to be closely tied into our ntoskrnl timer and event code, * just as it is in Windows. * * KeWaitForSingleObject() may do different things for different kinds * of objects: * * - For events, we check if the event has been signalled. If the * event is already in the signalled state, we just return immediately, * otherwise we wait for it to be set to the signalled state by someone * else calling KeSetEvent(). Events can be either synchronization or * notification events. * * - For timers, if the timer has already fired and the timer is in * the signalled state, we just return, otherwise we wait on the * timer. Unlike an event, timers get signalled automatically when * they expire rather than someone having to trip them manually. * Timers initialized with KeInitializeTimer() are always notification * events: KeInitializeTimerEx() lets you initialize a timer as * either a notification or synchronization event. * * - For mutexes, we try to acquire the mutex and if we can't, we wait * on the mutex until it's available and then grab it. When a mutex is * released, it enters the signalled state, which wakes up one of the * threads waiting to acquire it. Mutexes are always synchronization * events. * * - For threads, the only thing we do is wait until the thread object * enters a signalled state, which occurs when the thread terminates. * Threads are always notification events. * * A notification event wakes up all threads waiting on an object. A * synchronization event wakes up just one. Also, a synchronization event * is auto-clearing, which means we automatically set the event back to * the non-signalled state once the wakeup is done. */ uint32_t KeWaitForSingleObject(arg, reason, mode, alertable, duetime) void *arg; uint32_t reason; uint32_t mode; uint8_t alertable; int64_t *duetime; { wait_block w; struct thread *td = curthread; struct timeval tv; int error = 0; uint64_t curtime; wb_ext we; nt_dispatch_header *obj; obj = arg; if (obj == NULL) return(STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFS"); we.we_td = td; /* * Check to see if this object is already signalled, * and just return without waiting if it is. */ if (ntoskrnl_is_signalled(obj, td) == TRUE) { /* Sanity check the signal state value. */ if (obj->dh_sigstate != INT32_MIN) { ntoskrnl_satisfy_wait(obj, curthread); mtx_unlock(&ntoskrnl_dispatchlock); return (STATUS_SUCCESS); } else { /* * There's a limit to how many times we can * recursively acquire a mutant. If we hit * the limit, something is very wrong. */ if (obj->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } } } bzero((char *)&w, sizeof(wait_block)); w.wb_object = obj; w.wb_ext = &we; w.wb_waittype = WAITTYPE_ANY; w.wb_next = &w; w.wb_waitkey = 0; w.wb_awakened = FALSE; w.wb_oldpri = td->td_priority; InsertTailList((&obj->dh_waitlisthead), (&w.wb_waitlist)); /* * The timeout value is specified in 100 nanosecond units * and can be a positive or negative number. If it's positive, * then the duetime is absolute, and we need to convert it * to an absolute offset relative to now in order to use it. * If it's negative, then the duetime is relative and we * just have to convert the units. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); RemoveEntryList(&w.wb_waitlist); cv_destroy(&we.we_cv); /* We timed out. Leave the object alone and return status. */ if (error == EWOULDBLOCK) { mtx_unlock(&ntoskrnl_dispatchlock); return(STATUS_TIMEOUT); } mtx_unlock(&ntoskrnl_dispatchlock); return(STATUS_SUCCESS); /* return(KeWaitForMultipleObjects(1, &obj, WAITTYPE_ALL, reason, mode, alertable, duetime, &w)); */ } static uint32_t KeWaitForMultipleObjects(cnt, obj, wtype, reason, mode, alertable, duetime, wb_array) uint32_t cnt; nt_dispatch_header *obj[]; uint32_t wtype; uint32_t reason; uint32_t mode; uint8_t alertable; int64_t *duetime; wait_block *wb_array; { struct thread *td = curthread; wait_block *whead, *w; wait_block _wb_array[MAX_WAIT_OBJECTS]; nt_dispatch_header *cur; struct timeval tv; int i, wcnt = 0, error = 0; uint64_t curtime; struct timespec t1, t2; uint32_t status = STATUS_SUCCESS; wb_ext we; if (cnt > MAX_WAIT_OBJECTS) return(STATUS_INVALID_PARAMETER); if (cnt > THREAD_WAIT_OBJECTS && wb_array == NULL) return(STATUS_INVALID_PARAMETER); mtx_lock(&ntoskrnl_dispatchlock); cv_init(&we.we_cv, "KeWFM"); we.we_td = td; if (wb_array == NULL) whead = _wb_array; else whead = wb_array; bzero((char *)whead, sizeof(wait_block) * cnt); /* First pass: see if we can satisfy any waits immediately. */ wcnt = 0; w = whead; for (i = 0; i < cnt; i++) { InsertTailList((&obj[i]->dh_waitlisthead), (&w->wb_waitlist)); w->wb_ext = &we; w->wb_object = obj[i]; w->wb_waittype = wtype; w->wb_waitkey = i; w->wb_awakened = FALSE; w->wb_oldpri = td->td_priority; w->wb_next = w + 1; w++; wcnt++; if (ntoskrnl_is_signalled(obj[i], td)) { /* * There's a limit to how many times * we can recursively acquire a mutant. * If we hit the limit, something * is very wrong. */ if (obj[i]->dh_sigstate == INT32_MIN && obj[i]->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } /* * If this is a WAITTYPE_ANY wait, then * satisfy the waited object and exit * right now. */ if (wtype == WAITTYPE_ANY) { ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_WAIT_0 + i; goto wait_done; } else { w--; wcnt--; w->wb_object = NULL; RemoveEntryList(&w->wb_waitlist); } } } /* * If this is a WAITTYPE_ALL wait and all objects are * already signalled, satisfy the waits and exit now. */ if (wtype == WAITTYPE_ALL && wcnt == 0) { for (i = 0; i < cnt; i++) ntoskrnl_satisfy_wait(obj[i], td); status = STATUS_SUCCESS; goto wait_done; } /* * Create a circular waitblock list. The waitcount * must always be non-zero when we get here. */ (w - 1)->wb_next = whead; /* Wait on any objects that aren't yet signalled. */ /* Calculate timeout, if any. */ if (duetime != NULL) { if (*duetime < 0) { tv.tv_sec = - (*duetime) / 10000000; tv.tv_usec = (- (*duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (*duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((*duetime) - curtime) / 10000000; tv.tv_usec = ((*duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } } while (wcnt) { nanotime(&t1); if (duetime == NULL) cv_wait(&we.we_cv, &ntoskrnl_dispatchlock); else error = cv_timedwait(&we.we_cv, &ntoskrnl_dispatchlock, tvtohz(&tv)); /* Wait with timeout expired. */ if (error) { status = STATUS_TIMEOUT; goto wait_done; } nanotime(&t2); /* See what's been signalled. */ w = whead; do { cur = w->wb_object; if (ntoskrnl_is_signalled(cur, td) == TRUE || w->wb_awakened == TRUE) { /* Sanity check the signal state value. */ if (cur->dh_sigstate == INT32_MIN && cur->dh_type == DISP_TYPE_MUTANT) { mtx_unlock(&ntoskrnl_dispatchlock); panic("mutant limit exceeded"); } wcnt--; if (wtype == WAITTYPE_ANY) { status = w->wb_waitkey & STATUS_WAIT_0; goto wait_done; } } w = w->wb_next; } while (w != whead); /* * If all objects have been signalled, or if this * is a WAITTYPE_ANY wait and we were woke up by * someone, we can bail. */ if (wcnt == 0) { status = STATUS_SUCCESS; goto wait_done; } /* * If this is WAITTYPE_ALL wait, and there's still * objects that haven't been signalled, deduct the * time that's elapsed so far from the timeout and * wait again (or continue waiting indefinitely if * there's no timeout). */ if (duetime != NULL) { tv.tv_sec -= (t2.tv_sec - t1.tv_sec); tv.tv_usec -= (t2.tv_nsec - t1.tv_nsec) / 1000; } } wait_done: cv_destroy(&we.we_cv); for (i = 0; i < cnt; i++) { if (whead[i].wb_object != NULL) RemoveEntryList(&whead[i].wb_waitlist); } mtx_unlock(&ntoskrnl_dispatchlock); return(status); } static void WRITE_REGISTER_USHORT(reg, val) uint16_t *reg; uint16_t val; { bus_space_write_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); return; } static uint16_t READ_REGISTER_USHORT(reg) uint16_t *reg; { return(bus_space_read_2(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_ULONG(reg, val) uint32_t *reg; uint32_t val; { bus_space_write_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); return; } static uint32_t READ_REGISTER_ULONG(reg) uint32_t *reg; { return(bus_space_read_4(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static uint8_t READ_REGISTER_UCHAR(reg) uint8_t *reg; { return(bus_space_read_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg)); } static void WRITE_REGISTER_UCHAR(reg, val) uint8_t *reg; uint8_t val; { bus_space_write_1(NDIS_BUS_SPACE_MEM, 0x0, (bus_size_t)reg, val); return; } static int64_t _allmul(a, b) int64_t a; int64_t b; { return (a * b); } static int64_t _alldiv(a, b) int64_t a; int64_t b; { return (a / b); } static int64_t _allrem(a, b) int64_t a; int64_t b; { return (a % b); } static uint64_t _aullmul(a, b) uint64_t a; uint64_t b; { return (a * b); } static uint64_t _aulldiv(a, b) uint64_t a; uint64_t b; { return (a / b); } static uint64_t _aullrem(a, b) uint64_t a; uint64_t b; { return (a % b); } static int64_t _allshl(a, b) int64_t a; uint8_t b; { return (a << b); } static uint64_t _aullshl(a, b) uint64_t a; uint8_t b; { return (a << b); } static int64_t _allshr(a, b) int64_t a; uint8_t b; { return (a >> b); } static uint64_t _aullshr(a, b) uint64_t a; uint8_t b; { return (a >> b); } static slist_entry * ntoskrnl_pushsl(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; oldhead = head->slh_list.slh_next; entry->sl_next = head->slh_list.slh_next; head->slh_list.slh_next = entry; head->slh_list.slh_depth++; head->slh_list.slh_seq++; return(oldhead); } static slist_entry * ntoskrnl_popsl(head) slist_header *head; { slist_entry *first; first = head->slh_list.slh_next; if (first != NULL) { head->slh_list.slh_next = first->sl_next; head->slh_list.slh_depth--; head->slh_list.slh_seq++; } return(first); } /* * We need this to make lookaside lists work for amd64. * We pass a pointer to ExAllocatePoolWithTag() the lookaside * list structure. For amd64 to work right, this has to be a * pointer to the wrapped version of the routine, not the * original. Letting the Windows driver invoke the original * function directly will result in a convention calling * mismatch and a pretty crash. On x86, this effectively * becomes a no-op since ipt_func and ipt_wrap are the same. */ static funcptr ntoskrnl_findwrap(func) funcptr func; { image_patch_table *patch; patch = ntoskrnl_functbl; while (patch->ipt_func != NULL) { if ((funcptr)patch->ipt_func == func) return((funcptr)patch->ipt_wrap); patch++; } return(NULL); } static void ExInitializePagedLookasideList(lookaside, allocfunc, freefunc, flags, size, tag, depth) paged_lookaside_list *lookaside; lookaside_alloc_func *allocfunc; lookaside_free_func *freefunc; uint32_t flags; size_t size; uint32_t tag; uint16_t depth; { bzero((char *)lookaside, sizeof(paged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; return; } static void ExDeletePagedLookasideList(lookaside) paged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); return; } static void ExInitializeNPagedLookasideList(lookaside, allocfunc, freefunc, flags, size, tag, depth) npaged_lookaside_list *lookaside; lookaside_alloc_func *allocfunc; lookaside_free_func *freefunc; uint32_t flags; size_t size; uint32_t tag; uint16_t depth; { bzero((char *)lookaside, sizeof(npaged_lookaside_list)); if (size < sizeof(slist_entry)) lookaside->nll_l.gl_size = sizeof(slist_entry); else lookaside->nll_l.gl_size = size; lookaside->nll_l.gl_tag = tag; if (allocfunc == NULL) lookaside->nll_l.gl_allocfunc = ntoskrnl_findwrap((funcptr)ExAllocatePoolWithTag); else lookaside->nll_l.gl_allocfunc = allocfunc; if (freefunc == NULL) lookaside->nll_l.gl_freefunc = ntoskrnl_findwrap((funcptr)ExFreePool); else lookaside->nll_l.gl_freefunc = freefunc; #ifdef __i386__ KeInitializeSpinLock(&lookaside->nll_obsoletelock); #endif lookaside->nll_l.gl_type = NonPagedPool; lookaside->nll_l.gl_depth = depth; lookaside->nll_l.gl_maxdepth = LOOKASIDE_DEPTH; return; } static void ExDeleteNPagedLookasideList(lookaside) npaged_lookaside_list *lookaside; { void *buf; void (*freefunc)(void *); freefunc = lookaside->nll_l.gl_freefunc; while((buf = ntoskrnl_popsl(&lookaside->nll_l.gl_listhead)) != NULL) MSCALL1(freefunc, buf); return; } slist_entry * InterlockedPushEntrySList(head, entry) slist_header *head; slist_entry *entry; { slist_entry *oldhead; mtx_lock_spin(&ntoskrnl_interlock); oldhead = ntoskrnl_pushsl(head, entry); mtx_unlock_spin(&ntoskrnl_interlock); return(oldhead); } slist_entry * InterlockedPopEntrySList(head) slist_header *head; { slist_entry *first; mtx_lock_spin(&ntoskrnl_interlock); first = ntoskrnl_popsl(head); mtx_unlock_spin(&ntoskrnl_interlock); return(first); } static slist_entry * ExInterlockedPushEntrySList(head, entry, lock) slist_header *head; slist_entry *entry; kspin_lock *lock; { return(InterlockedPushEntrySList(head, entry)); } static slist_entry * ExInterlockedPopEntrySList(head, lock) slist_header *head; kspin_lock *lock; { return(InterlockedPopEntrySList(head)); } uint16_t ExQueryDepthSList(head) slist_header *head; { uint16_t depth; mtx_lock_spin(&ntoskrnl_interlock); depth = head->slh_list.slh_depth; mtx_unlock_spin(&ntoskrnl_interlock); return(depth); } void KeInitializeSpinLock(lock) kspin_lock *lock; { *lock = 0; return; } #ifdef __i386__ void KefAcquireSpinLockAtDpcLevel(lock) kspin_lock *lock; { #ifdef NTOSKRNL_DEBUG_SPINLOCKS int i = 0; #endif while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) { /* sit and spin */; #ifdef NTOSKRNL_DEBUG_SPINLOCKS i++; if (i > 200000000) panic("DEADLOCK!"); #endif } return; } void KefReleaseSpinLockFromDpcLevel(lock) kspin_lock *lock; { atomic_store_rel_int((volatile u_int *)lock, 0); return; } uint8_t KeAcquireSpinLockRaiseToDpc(kspin_lock *lock) { uint8_t oldirql; if (KeGetCurrentIrql() > DISPATCH_LEVEL) panic("IRQL_NOT_LESS_THAN_OR_EQUAL"); KeRaiseIrql(DISPATCH_LEVEL, &oldirql); KeAcquireSpinLockAtDpcLevel(lock); return(oldirql); } #else void KeAcquireSpinLockAtDpcLevel(kspin_lock *lock) { while (atomic_cmpset_acq_int((volatile u_int *)lock, 0, 1) == 0) /* sit and spin */; return; } void KeReleaseSpinLockFromDpcLevel(kspin_lock *lock) { atomic_store_rel_int((volatile u_int *)lock, 0); return; } #endif /* __i386__ */ uintptr_t InterlockedExchange(dst, val) volatile uint32_t *dst; uintptr_t val; { uintptr_t r; mtx_lock_spin(&ntoskrnl_interlock); r = *dst; *dst = val; mtx_unlock_spin(&ntoskrnl_interlock); return(r); } static uint32_t InterlockedIncrement(addend) volatile uint32_t *addend; { atomic_add_long((volatile u_long *)addend, 1); return(*addend); } static uint32_t InterlockedDecrement(addend) volatile uint32_t *addend; { atomic_subtract_long((volatile u_long *)addend, 1); return(*addend); } static void ExInterlockedAddLargeStatistic(addend, inc) uint64_t *addend; uint32_t inc; { mtx_lock_spin(&ntoskrnl_interlock); *addend += inc; mtx_unlock_spin(&ntoskrnl_interlock); return; }; mdl * IoAllocateMdl(vaddr, len, secondarybuf, chargequota, iopkt) void *vaddr; uint32_t len; uint8_t secondarybuf; uint8_t chargequota; irp *iopkt; { mdl *m; int zone = 0; if (MmSizeOfMdl(vaddr, len) > MDL_ZONE_SIZE) m = ExAllocatePoolWithTag(NonPagedPool, MmSizeOfMdl(vaddr, len), 0); else { m = uma_zalloc(mdl_zone, M_NOWAIT | M_ZERO); zone++; } if (m == NULL) return (NULL); MmInitializeMdl(m, vaddr, len); /* * MmInitializMdl() clears the flags field, so we * have to set this here. If the MDL came from the * MDL UMA zone, tag it so we can release it to * the right place later. */ if (zone) m->mdl_flags = MDL_ZONE_ALLOCED; if (iopkt != NULL) { if (secondarybuf == TRUE) { mdl *last; last = iopkt->irp_mdl; while (last->mdl_next != NULL) last = last->mdl_next; last->mdl_next = m; } else { if (iopkt->irp_mdl != NULL) panic("leaking an MDL in IoAllocateMdl()"); iopkt->irp_mdl = m; } } return (m); } void IoFreeMdl(m) mdl *m; { if (m == NULL) return; if (m->mdl_flags & MDL_ZONE_ALLOCED) uma_zfree(mdl_zone, m); else ExFreePool(m); return; } static void * MmAllocateContiguousMemory(size, highest) uint32_t size; uint64_t highest; { void *addr; size_t pagelength = roundup(size, PAGE_SIZE); addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0); return(addr); } static void * MmAllocateContiguousMemorySpecifyCache(size, lowest, highest, boundary, cachetype) uint32_t size; uint64_t lowest; uint64_t highest; uint64_t boundary; uint32_t cachetype; { void *addr; size_t pagelength = roundup(size, PAGE_SIZE); addr = ExAllocatePoolWithTag(NonPagedPool, pagelength, 0); return(addr); } static void MmFreeContiguousMemory(base) void *base; { ExFreePool(base); } static void MmFreeContiguousMemorySpecifyCache(base, size, cachetype) void *base; uint32_t size; uint32_t cachetype; { ExFreePool(base); } static uint32_t MmSizeOfMdl(vaddr, len) void *vaddr; size_t len; { uint32_t l; l = sizeof(struct mdl) + (sizeof(vm_offset_t *) * SPAN_PAGES(vaddr, len)); return(l); } /* * The Microsoft documentation says this routine fills in the * page array of an MDL with the _physical_ page addresses that * comprise the buffer, but we don't really want to do that here. * Instead, we just fill in the page array with the kernel virtual * addresses of the buffers. */ void MmBuildMdlForNonPagedPool(m) mdl *m; { vm_offset_t *mdl_pages; int pagecnt, i; pagecnt = SPAN_PAGES(m->mdl_byteoffset, m->mdl_bytecount); if (pagecnt > (m->mdl_size - sizeof(mdl)) / sizeof(vm_offset_t *)) panic("not enough pages in MDL to describe buffer"); mdl_pages = MmGetMdlPfnArray(m); for (i = 0; i < pagecnt; i++) *mdl_pages = (vm_offset_t)m->mdl_startva + (i * PAGE_SIZE); m->mdl_flags |= MDL_SOURCE_IS_NONPAGED_POOL; m->mdl_mappedsystemva = MmGetMdlVirtualAddress(m); return; } static void * MmMapLockedPages(buf, accessmode) mdl *buf; uint8_t accessmode; { buf->mdl_flags |= MDL_MAPPED_TO_SYSTEM_VA; return(MmGetMdlVirtualAddress(buf)); } static void * MmMapLockedPagesSpecifyCache(buf, accessmode, cachetype, vaddr, bugcheck, prio) mdl *buf; uint8_t accessmode; uint32_t cachetype; void *vaddr; uint32_t bugcheck; uint32_t prio; { return(MmMapLockedPages(buf, accessmode)); } static void MmUnmapLockedPages(vaddr, buf) void *vaddr; mdl *buf; { buf->mdl_flags &= ~MDL_MAPPED_TO_SYSTEM_VA; return; } /* * This function has a problem in that it will break if you * compile this module without PAE and try to use it on a PAE * kernel. Unfortunately, there's no way around this at the * moment. It's slightly less broken that using pmap_kextract(). * You'd think the virtual memory subsystem would help us out * here, but it doesn't. */ static uint8_t MmIsAddressValid(vaddr) void *vaddr; { if (pmap_extract(kernel_map->pmap, (vm_offset_t)vaddr)) return(TRUE); return(FALSE); } void * MmMapIoSpace(paddr, len, cachetype) uint64_t paddr; uint32_t len; uint32_t cachetype; { devclass_t nexus_class; device_t *nexus_devs, devp; int nexus_count = 0; device_t matching_dev = NULL; struct resource *res; int i; vm_offset_t v; /* There will always be at least one nexus. */ nexus_class = devclass_find("nexus"); devclass_get_devices(nexus_class, &nexus_devs, &nexus_count); for (i = 0; i < nexus_count; i++) { devp = nexus_devs[i]; matching_dev = ntoskrnl_finddev(devp, paddr, &res); if (matching_dev) break; } free(nexus_devs, M_TEMP); if (matching_dev == NULL) return(NULL); v = (vm_offset_t)rman_get_virtual(res); if (paddr > rman_get_start(res)) v += paddr - rman_get_start(res); return((void *)v); } void MmUnmapIoSpace(vaddr, len) void *vaddr; size_t len; { return; } static device_t ntoskrnl_finddev(dev, paddr, res) device_t dev; uint64_t paddr; struct resource **res; { device_t *children = NULL; device_t matching_dev; int childcnt; struct resource *r; struct resource_list *rl; struct resource_list_entry *rle; uint32_t flags; int i; /* We only want devices that have been successfully probed. */ if (device_is_alive(dev) == FALSE) return(NULL); rl = BUS_GET_RESOURCE_LIST(device_get_parent(dev), dev); if (rl != NULL) { #if __FreeBSD_version < 600022 SLIST_FOREACH(rle, rl, link) { #else STAILQ_FOREACH(rle, rl, link) { #endif r = rle->res; if (r == NULL) continue; flags = rman_get_flags(r); if (rle->type == SYS_RES_MEMORY && paddr >= rman_get_start(r) && paddr <= rman_get_end(r)) { if (!(flags & RF_ACTIVE)) bus_activate_resource(dev, SYS_RES_MEMORY, 0, r); *res = r; return(dev); } } } /* * If this device has children, do another * level of recursion to inspect them. */ device_get_children(dev, &children, &childcnt); for (i = 0; i < childcnt; i++) { matching_dev = ntoskrnl_finddev(children[i], paddr, res); if (matching_dev != NULL) { free(children, M_TEMP); return(matching_dev); } } /* Won't somebody please think of the children! */ if (children != NULL) free(children, M_TEMP); return(NULL); } /* * Workitems are unlike DPCs, in that they run in a user-mode thread * context rather than at DISPATCH_LEVEL in kernel context. In our * case we run them in kernel context anyway. */ static void ntoskrnl_workitem_thread(arg) void *arg; { kdpc_queue *kq; list_entry *l; io_workitem *iw; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList(&kq->kq_disp); iw = CONTAINING_RECORD(l, io_workitem, iw_listentry); InitializeListHead((&iw->iw_listentry)); if (iw->iw_func == NULL) continue; KeReleaseSpinLock(&kq->kq_lock, irql); MSCALL2(iw->iw_func, iw->iw_dobj, iw->iw_ctx); KeAcquireSpinLock(&kq->kq_lock, &irql); } KeReleaseSpinLock(&kq->kq_lock, irql); } #if __FreeBSD_version < 502113 mtx_lock(&Giant); #endif kthread_exit(0); return; /* notreached */ } static void ntoskrnl_destroy_workitem_threads(void) { kdpc_queue *kq; int i; for (i = 0; i < WORKITEM_THREADS; i++) { kq = wq_queues + i; kq->kq_exit = 1; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "waitiw", hz/10); } return; } io_workitem * IoAllocateWorkItem(dobj) device_object *dobj; { io_workitem *iw; iw = uma_zalloc(iw_zone, M_NOWAIT); if (iw == NULL) return(NULL); InitializeListHead(&iw->iw_listentry); iw->iw_dobj = dobj; mtx_lock(&ntoskrnl_dispatchlock); iw->iw_idx = wq_idx; WORKIDX_INC(wq_idx); mtx_unlock(&ntoskrnl_dispatchlock); return(iw); } void IoFreeWorkItem(iw) io_workitem *iw; { uma_zfree(iw_zone, iw); return; } void IoQueueWorkItem(iw, iw_func, qtype, ctx) io_workitem *iw; io_workitem_func iw_func; uint32_t qtype; void *ctx; { kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; kq = wq_queues + iw->iw_idx; KeAcquireSpinLock(&kq->kq_lock, &irql); /* * Traverse the list and make sure this workitem hasn't * already been inserted. Queuing the same workitem * twice will hose the list but good. */ l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur == iw) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } iw->iw_func = iw_func; iw->iw_ctx = ctx; InsertTailList((&kq->kq_disp), (&iw->iw_listentry)); KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); return; } static void ntoskrnl_workitem(dobj, arg) device_object *dobj; void *arg; { io_workitem *iw; work_queue_item *w; work_item_func f; iw = arg; w = (work_queue_item *)dobj; f = (work_item_func)w->wqi_func; uma_zfree(iw_zone, iw); MSCALL2(f, w, w->wqi_ctx); return; } /* * The ExQueueWorkItem() API is deprecated in Windows XP. Microsoft * warns that it's unsafe and to use IoQueueWorkItem() instead. The * problem with ExQueueWorkItem() is that it can't guard against * the condition where a driver submits a job to the work queue and * is then unloaded before the job is able to run. IoQueueWorkItem() * acquires a reference to the device's device_object via the * object manager and retains it until after the job has completed, * which prevents the driver from being unloaded before the job * runs. (We don't currently support this behavior, though hopefully * that will change once the object manager API is fleshed out a bit.) * * Having said all that, the ExQueueWorkItem() API remains, because * there are still other parts of Windows that use it, including * NDIS itself: NdisScheduleWorkItem() calls ExQueueWorkItem(). * We fake up the ExQueueWorkItem() API on top of our implementation * of IoQueueWorkItem(). Workitem thread #3 is reserved exclusively * for ExQueueWorkItem() jobs, and we pass a pointer to the work * queue item (provided by the caller) in to IoAllocateWorkItem() * instead of the device_object. We need to save this pointer so * we can apply a sanity check: as with the DPC queue and other * workitem queues, we can't allow the same work queue item to * be queued twice. If it's already pending, we silently return */ void ExQueueWorkItem(w, qtype) work_queue_item *w; uint32_t qtype; { io_workitem *iw; io_workitem_func iwf; kdpc_queue *kq; list_entry *l; io_workitem *cur; uint8_t irql; /* * We need to do a special sanity test to make sure * the ExQueueWorkItem() API isn't used to queue * the same workitem twice. Rather than checking the * io_workitem pointer itself, we test the attached * device object, which is really a pointer to the * legacy work queue item structure. */ kq = wq_queues + WORKITEM_LEGACY_THREAD; KeAcquireSpinLock(&kq->kq_lock, &irql); l = kq->kq_disp.nle_flink; while (l != &kq->kq_disp) { cur = CONTAINING_RECORD(l, io_workitem, iw_listentry); if (cur->iw_dobj == (device_object *)w) { /* Already queued -- do nothing. */ KeReleaseSpinLock(&kq->kq_lock, irql); return; } l = l->nle_flink; } KeReleaseSpinLock(&kq->kq_lock, irql); iw = IoAllocateWorkItem((device_object *)w); if (iw == NULL) return; iw->iw_idx = WORKITEM_LEGACY_THREAD; iwf = (io_workitem_func)ntoskrnl_findwrap((funcptr)ntoskrnl_workitem); IoQueueWorkItem(iw, iwf, qtype, iw); return; } static void RtlZeroMemory(dst, len) void *dst; size_t len; { bzero(dst, len); return; } static void RtlCopyMemory(dst, src, len) void *dst; const void *src; size_t len; { bcopy(src, dst, len); return; } static size_t RtlCompareMemory(s1, s2, len) const void *s1; const void *s2; size_t len; { size_t i, total = 0; uint8_t *m1, *m2; m1 = __DECONST(char *, s1); m2 = __DECONST(char *, s2); for (i = 0; i < len; i++) { if (m1[i] == m2[i]) total++; } return(total); } void RtlInitAnsiString(dst, src) ansi_string *dst; char *src; { ansi_string *a; a = dst; if (a == NULL) return; if (src == NULL) { a->as_len = a->as_maxlen = 0; a->as_buf = NULL; } else { a->as_buf = src; a->as_len = a->as_maxlen = strlen(src); } return; } void RtlInitUnicodeString(dst, src) unicode_string *dst; uint16_t *src; { unicode_string *u; int i; u = dst; if (u == NULL) return; if (src == NULL) { u->us_len = u->us_maxlen = 0; u->us_buf = NULL; } else { i = 0; while(src[i] != 0) i++; u->us_buf = src; u->us_len = u->us_maxlen = i * 2; } return; } ndis_status RtlUnicodeStringToInteger(ustr, base, val) unicode_string *ustr; uint32_t base; uint32_t *val; { uint16_t *uchr; int len, neg = 0; char abuf[64]; char *astr; uchr = ustr->us_buf; len = ustr->us_len; bzero(abuf, sizeof(abuf)); if ((char)((*uchr) & 0xFF) == '-') { neg = 1; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == '+') { neg = 0; uchr++; len -= 2; } if (base == 0) { if ((char)((*uchr) & 0xFF) == 'b') { base = 2; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'o') { base = 8; uchr++; len -= 2; } else if ((char)((*uchr) & 0xFF) == 'x') { base = 16; uchr++; len -= 2; } else base = 10; } astr = abuf; if (neg) { strcpy(astr, "-"); astr++; } ntoskrnl_unicode_to_ascii(uchr, astr, len); *val = strtoul(abuf, NULL, base); return(STATUS_SUCCESS); } void RtlFreeUnicodeString(ustr) unicode_string *ustr; { if (ustr->us_buf == NULL) return; ExFreePool(ustr->us_buf); ustr->us_buf = NULL; return; } void RtlFreeAnsiString(astr) ansi_string *astr; { if (astr->as_buf == NULL) return; ExFreePool(astr->as_buf); astr->as_buf = NULL; return; } static int atoi(str) const char *str; { return (int)strtol(str, (char **)NULL, 10); } static long atol(str) const char *str; { return strtol(str, (char **)NULL, 10); } static int rand(void) { struct timeval tv; microtime(&tv); srandom(tv.tv_usec); return((int)random()); } static void srand(seed) unsigned int seed; { srandom(seed); return; } static uint8_t IoIsWdmVersionAvailable(major, minor) uint8_t major; uint8_t minor; { if (major == WDM_MAJOR && minor == WDM_MINOR_WINXP) return(TRUE); return(FALSE); } static ndis_status IoGetDeviceProperty(devobj, regprop, buflen, prop, reslen) device_object *devobj; uint32_t regprop; uint32_t buflen; void *prop; uint32_t *reslen; { driver_object *drv; uint16_t **name; drv = devobj->do_drvobj; switch (regprop) { case DEVPROP_DRIVER_KEYNAME: name = prop; *name = drv->dro_drivername.us_buf; *reslen = drv->dro_drivername.us_len; break; default: return(STATUS_INVALID_PARAMETER_2); break; } return(STATUS_SUCCESS); } static void KeInitializeMutex(kmutex, level) kmutant *kmutex; uint32_t level; { InitializeListHead((&kmutex->km_header.dh_waitlisthead)); kmutex->km_abandoned = FALSE; kmutex->km_apcdisable = 1; kmutex->km_header.dh_sigstate = 1; kmutex->km_header.dh_type = DISP_TYPE_MUTANT; kmutex->km_header.dh_size = sizeof(kmutant) / sizeof(uint32_t); kmutex->km_ownerthread = NULL; return; } static uint32_t KeReleaseMutex(kmutex, kwait) kmutant *kmutex; uint8_t kwait; { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kmutex->km_header.dh_sigstate; if (kmutex->km_ownerthread != curthread) { mtx_unlock(&ntoskrnl_dispatchlock); return(STATUS_MUTANT_NOT_OWNED); } kmutex->km_header.dh_sigstate++; kmutex->km_abandoned = FALSE; if (kmutex->km_header.dh_sigstate == 1) { kmutex->km_ownerthread = NULL; ntoskrnl_waittest(&kmutex->km_header, IO_NO_INCREMENT); } mtx_unlock(&ntoskrnl_dispatchlock); return(prevstate); } static uint32_t KeReadStateMutex(kmutex) kmutant *kmutex; { return(kmutex->km_header.dh_sigstate); } void KeInitializeEvent(kevent, type, state) nt_kevent *kevent; uint32_t type; uint8_t state; { InitializeListHead((&kevent->k_header.dh_waitlisthead)); kevent->k_header.dh_sigstate = state; if (type == EVENT_TYPE_NOTIFY) kevent->k_header.dh_type = DISP_TYPE_NOTIFICATION_EVENT; else kevent->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_EVENT; kevent->k_header.dh_size = sizeof(nt_kevent) / sizeof(uint32_t); return; } uint32_t KeResetEvent(kevent) nt_kevent *kevent; { uint32_t prevstate; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; kevent->k_header.dh_sigstate = FALSE; mtx_unlock(&ntoskrnl_dispatchlock); return(prevstate); } uint32_t KeSetEvent(kevent, increment, kwait) nt_kevent *kevent; uint32_t increment; uint8_t kwait; { uint32_t prevstate; wait_block *w; nt_dispatch_header *dh; struct thread *td; wb_ext *we; mtx_lock(&ntoskrnl_dispatchlock); prevstate = kevent->k_header.dh_sigstate; dh = &kevent->k_header; if (IsListEmpty(&dh->dh_waitlisthead)) /* * If there's nobody in the waitlist, just set * the state to signalled. */ dh->dh_sigstate = 1; else { /* * Get the first waiter. If this is a synchronization * event, just wake up that one thread (don't bother * setting the state to signalled since we're supposed * to automatically clear synchronization events anyway). * * If it's a notification event, or the the first * waiter is doing a WAITTYPE_ALL wait, go through * the full wait satisfaction process. */ w = CONTAINING_RECORD(dh->dh_waitlisthead.nle_flink, wait_block, wb_waitlist); we = w->wb_ext; td = we->we_td; if (kevent->k_header.dh_type == DISP_TYPE_NOTIFICATION_EVENT || w->wb_waittype == WAITTYPE_ALL) { if (prevstate == 0) { dh->dh_sigstate = 1; ntoskrnl_waittest(dh, increment); } } else { w->wb_awakened |= TRUE; cv_broadcastpri(&we->we_cv, w->wb_oldpri - (increment * 4)); } } mtx_unlock(&ntoskrnl_dispatchlock); return(prevstate); } void KeClearEvent(kevent) nt_kevent *kevent; { kevent->k_header.dh_sigstate = FALSE; return; } uint32_t KeReadStateEvent(kevent) nt_kevent *kevent; { return(kevent->k_header.dh_sigstate); } /* * The object manager in Windows is responsible for managing * references and access to various types of objects, including * device_objects, events, threads, timers and so on. However, * there's a difference in the way objects are handled in user * mode versus kernel mode. * * In user mode (i.e. Win32 applications), all objects are * managed by the object manager. For example, when you create * a timer or event object, you actually end up with an * object_header (for the object manager's bookkeeping * purposes) and an object body (which contains the actual object * structure, e.g. ktimer, kevent, etc...). This allows Windows * to manage resource quotas and to enforce access restrictions * on basically every kind of system object handled by the kernel. * * However, in kernel mode, you only end up using the object * manager some of the time. For example, in a driver, you create * a timer object by simply allocating the memory for a ktimer * structure and initializing it with KeInitializeTimer(). Hence, * the timer has no object_header and no reference counting or * security/resource checks are done on it. The assumption in * this case is that if you're running in kernel mode, you know * what you're doing, and you're already at an elevated privilege * anyway. * * There are some exceptions to this. The two most important ones * for our purposes are device_objects and threads. We need to use * the object manager to do reference counting on device_objects, * and for threads, you can only get a pointer to a thread's * dispatch header by using ObReferenceObjectByHandle() on the * handle returned by PsCreateSystemThread(). */ static ndis_status ObReferenceObjectByHandle(handle, reqaccess, otype, accessmode, object, handleinfo) ndis_handle handle; uint32_t reqaccess; void *otype; uint8_t accessmode; void **object; void **handleinfo; { nt_objref *nr; nr = malloc(sizeof(nt_objref), M_DEVBUF, M_NOWAIT|M_ZERO); if (nr == NULL) return(STATUS_INSUFFICIENT_RESOURCES); InitializeListHead((&nr->no_dh.dh_waitlisthead)); nr->no_obj = handle; nr->no_dh.dh_type = DISP_TYPE_THREAD; nr->no_dh.dh_sigstate = 0; nr->no_dh.dh_size = (uint8_t)(sizeof(struct thread) / sizeof(uint32_t)); TAILQ_INSERT_TAIL(&ntoskrnl_reflist, nr, link); *object = nr; return(STATUS_SUCCESS); } static void ObfDereferenceObject(object) void *object; { nt_objref *nr; nr = object; TAILQ_REMOVE(&ntoskrnl_reflist, nr, link); free(nr, M_DEVBUF); return; } static uint32_t ZwClose(handle) ndis_handle handle; { return(STATUS_SUCCESS); } static uint32_t WmiQueryTraceInformation(traceclass, traceinfo, infolen, reqlen, buf) uint32_t traceclass; void *traceinfo; uint32_t infolen; uint32_t reqlen; void *buf; { return(STATUS_NOT_FOUND); } static uint32_t WmiTraceMessage(uint64_t loghandle, uint32_t messageflags, void *guid, uint16_t messagenum, ...) { return(STATUS_SUCCESS); } static uint32_t IoWMIRegistrationControl(dobj, action) device_object *dobj; uint32_t action; { return(STATUS_SUCCESS); } /* * This is here just in case the thread returns without calling * PsTerminateSystemThread(). */ static void ntoskrnl_thrfunc(arg) void *arg; { thread_context *thrctx; uint32_t (*tfunc)(void *); void *tctx; uint32_t rval; thrctx = arg; tfunc = thrctx->tc_thrfunc; tctx = thrctx->tc_thrctx; free(thrctx, M_TEMP); rval = MSCALL1(tfunc, tctx); PsTerminateSystemThread(rval); return; /* notreached */ } static ndis_status PsCreateSystemThread(handle, reqaccess, objattrs, phandle, clientid, thrfunc, thrctx) ndis_handle *handle; uint32_t reqaccess; void *objattrs; ndis_handle phandle; void *clientid; void *thrfunc; void *thrctx; { int error; char tname[128]; thread_context *tc; struct proc *p; tc = malloc(sizeof(thread_context), M_TEMP, M_NOWAIT); if (tc == NULL) return(STATUS_INSUFFICIENT_RESOURCES); tc->tc_thrctx = thrctx; tc->tc_thrfunc = thrfunc; sprintf(tname, "windows kthread %d", ntoskrnl_kth); error = kthread_create(ntoskrnl_thrfunc, tc, &p, RFHIGHPID, NDIS_KSTACK_PAGES, tname); if (error) { free(tc, M_TEMP); return(STATUS_INSUFFICIENT_RESOURCES); } *handle = p; ntoskrnl_kth++; return(STATUS_SUCCESS); } /* * In Windows, the exit of a thread is an event that you're allowed * to wait on, assuming you've obtained a reference to the thread using * ObReferenceObjectByHandle(). Unfortunately, the only way we can * simulate this behavior is to register each thread we create in a * reference list, and if someone holds a reference to us, we poke * them. */ static ndis_status PsTerminateSystemThread(status) ndis_status status; { struct nt_objref *nr; mtx_lock(&ntoskrnl_dispatchlock); TAILQ_FOREACH(nr, &ntoskrnl_reflist, link) { if (nr->no_obj != curthread->td_proc) continue; nr->no_dh.dh_sigstate = 1; ntoskrnl_waittest(&nr->no_dh, IO_NO_INCREMENT); break; } mtx_unlock(&ntoskrnl_dispatchlock); ntoskrnl_kth--; #if __FreeBSD_version < 502113 mtx_lock(&Giant); #endif kthread_exit(0); return(0); /* notreached */ } static uint32_t DbgPrint(char *fmt, ...) { va_list ap; if (bootverbose) { va_start(ap, fmt); vprintf(fmt, ap); } return(STATUS_SUCCESS); } static void DbgBreakPoint(void) { #if __FreeBSD_version < 502113 Debugger("DbgBreakPoint(): breakpoint"); #else - kdb_enter("DbgBreakPoint(): breakpoint"); + kdb_enter_why(KDB_WHY_NDIS, "DbgBreakPoint(): breakpoint"); #endif } static void ntoskrnl_timercall(arg) void *arg; { ktimer *timer; struct timeval tv; kdpc *dpc; mtx_lock(&ntoskrnl_dispatchlock); timer = arg; #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_fires++; #endif ntoskrnl_remove_timer(timer); /* * This should never happen, but complain * if it does. */ if (timer->k_header.dh_inserted == FALSE) { mtx_unlock(&ntoskrnl_dispatchlock); printf("NTOS: timer %p fired even though " "it was canceled\n", timer); return; } /* Mark the timer as no longer being on the timer queue. */ timer->k_header.dh_inserted = FALSE; /* Now signal the object and satisfy any waits on it. */ timer->k_header.dh_sigstate = 1; ntoskrnl_waittest(&timer->k_header, IO_NO_INCREMENT); /* * If this is a periodic timer, re-arm it * so it will fire again. We do this before * calling any deferred procedure calls because * it's possible the DPC might cancel the timer, * in which case it would be wrong for us to * re-arm it again afterwards. */ if (timer->k_period) { tv.tv_sec = 0; tv.tv_usec = timer->k_period * 1000; timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_reloads++; #endif } dpc = timer->k_dpc; mtx_unlock(&ntoskrnl_dispatchlock); /* If there's a DPC associated with the timer, queue it up. */ if (dpc != NULL) KeInsertQueueDpc(dpc, NULL, NULL); return; } #ifdef NTOSKRNL_DEBUG_TIMERS static int sysctl_show_timers(SYSCTL_HANDLER_ARGS) { int ret; ret = 0; ntoskrnl_show_timers(); return (sysctl_handle_int(oidp, &ret, 0, req)); } static void ntoskrnl_show_timers() { int i = 0; list_entry *l; mtx_lock_spin(&ntoskrnl_calllock); l = ntoskrnl_calllist.nle_flink; while(l != &ntoskrnl_calllist) { i++; l = l->nle_flink; } mtx_unlock_spin(&ntoskrnl_calllock); printf("\n"); printf("%d timers available (out of %d)\n", i, NTOSKRNL_TIMEOUTS); printf("timer sets: %qu\n", ntoskrnl_timer_sets); printf("timer reloads: %qu\n", ntoskrnl_timer_reloads); printf("timer cancels: %qu\n", ntoskrnl_timer_cancels); printf("timer fires: %qu\n", ntoskrnl_timer_fires); printf("\n"); return; } #endif /* * Must be called with dispatcher lock held. */ static void ntoskrnl_insert_timer(timer, ticks) ktimer *timer; int ticks; { callout_entry *e; list_entry *l; struct callout *c; /* * Try and allocate a timer. */ mtx_lock_spin(&ntoskrnl_calllock); if (IsListEmpty(&ntoskrnl_calllist)) { mtx_unlock_spin(&ntoskrnl_calllock); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_show_timers(); #endif panic("out of timers!"); } l = RemoveHeadList(&ntoskrnl_calllist); mtx_unlock_spin(&ntoskrnl_calllock); e = CONTAINING_RECORD(l, callout_entry, ce_list); c = &e->ce_callout; timer->k_callout = c; callout_init(c, CALLOUT_MPSAFE); callout_reset(c, ticks, ntoskrnl_timercall, timer); return; } static void ntoskrnl_remove_timer(timer) ktimer *timer; { callout_entry *e; e = (callout_entry *)timer->k_callout; callout_stop(timer->k_callout); mtx_lock_spin(&ntoskrnl_calllock); InsertHeadList((&ntoskrnl_calllist), (&e->ce_list)); mtx_unlock_spin(&ntoskrnl_calllock); return; } void KeInitializeTimer(timer) ktimer *timer; { if (timer == NULL) return; KeInitializeTimerEx(timer, EVENT_TYPE_NOTIFY); return; } void KeInitializeTimerEx(timer, type) ktimer *timer; uint32_t type; { if (timer == NULL) return; bzero((char *)timer, sizeof(ktimer)); InitializeListHead((&timer->k_header.dh_waitlisthead)); timer->k_header.dh_sigstate = FALSE; timer->k_header.dh_inserted = FALSE; if (type == EVENT_TYPE_NOTIFY) timer->k_header.dh_type = DISP_TYPE_NOTIFICATION_TIMER; else timer->k_header.dh_type = DISP_TYPE_SYNCHRONIZATION_TIMER; timer->k_header.dh_size = sizeof(ktimer) / sizeof(uint32_t); return; } /* * DPC subsystem. A Windows Defered Procedure Call has the following * properties: * - It runs at DISPATCH_LEVEL. * - It can have one of 3 importance values that control when it * runs relative to other DPCs in the queue. * - On SMP systems, it can be set to run on a specific processor. * In order to satisfy the last property, we create a DPC thread for * each CPU in the system and bind it to that CPU. Each thread * maintains three queues with different importance levels, which * will be processed in order from lowest to highest. * * In Windows, interrupt handlers run as DPCs. (Not to be confused * with ISRs, which run in interrupt context and can preempt DPCs.) * ISRs are given the highest importance so that they'll take * precedence over timers and other things. */ static void ntoskrnl_dpc_thread(arg) void *arg; { kdpc_queue *kq; kdpc *d; list_entry *l; uint8_t irql; kq = arg; InitializeListHead(&kq->kq_disp); kq->kq_td = curthread; kq->kq_exit = 0; kq->kq_running = FALSE; KeInitializeSpinLock(&kq->kq_lock); KeInitializeEvent(&kq->kq_proc, EVENT_TYPE_SYNC, FALSE); KeInitializeEvent(&kq->kq_done, EVENT_TYPE_SYNC, FALSE); /* * Elevate our priority. DPCs are used to run interrupt * handlers, and they should trigger as soon as possible * once scheduled by an ISR. */ thread_lock(curthread); #ifdef NTOSKRNL_MULTIPLE_DPCS #if __FreeBSD_version >= 502102 sched_bind(curthread, kq->kq_cpu); #endif #endif sched_prio(curthread, PRI_MIN_KERN); #if __FreeBSD_version < 600000 curthread->td_base_pri = PRI_MIN_KERN; #endif thread_unlock(curthread); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); KeAcquireSpinLock(&kq->kq_lock, &irql); if (kq->kq_exit) { kq->kq_exit = 0; KeReleaseSpinLock(&kq->kq_lock, irql); break; } kq->kq_running = TRUE; while (!IsListEmpty(&kq->kq_disp)) { l = RemoveHeadList((&kq->kq_disp)); d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); InitializeListHead((&d->k_dpclistentry)); KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); MSCALL4(d->k_deferedfunc, d, d->k_deferredctx, d->k_sysarg1, d->k_sysarg2); KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); } kq->kq_running = FALSE; KeReleaseSpinLock(&kq->kq_lock, irql); KeSetEvent(&kq->kq_done, IO_NO_INCREMENT, FALSE); } #if __FreeBSD_version < 502113 mtx_lock(&Giant); #endif kthread_exit(0); return; /* notreached */ } static void ntoskrnl_destroy_dpc_threads(void) { kdpc_queue *kq; kdpc dpc; int i; kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq += i; kq->kq_exit = 1; KeInitializeDpc(&dpc, NULL, NULL); KeSetTargetProcessorDpc(&dpc, i); KeInsertQueueDpc(&dpc, NULL, NULL); while (kq->kq_exit) tsleep(kq->kq_td->td_proc, PWAIT, "dpcw", hz/10); } return; } static uint8_t ntoskrnl_insert_dpc(head, dpc) list_entry *head; kdpc *dpc; { list_entry *l; kdpc *d; l = head->nle_flink; while (l != head) { d = CONTAINING_RECORD(l, kdpc, k_dpclistentry); if (d == dpc) return(FALSE); l = l->nle_flink; } if (dpc->k_importance == KDPC_IMPORTANCE_LOW) InsertTailList((head), (&dpc->k_dpclistentry)); else InsertHeadList((head), (&dpc->k_dpclistentry)); return (TRUE); } void KeInitializeDpc(dpc, dpcfunc, dpcctx) kdpc *dpc; void *dpcfunc; void *dpcctx; { if (dpc == NULL) return; dpc->k_deferedfunc = dpcfunc; dpc->k_deferredctx = dpcctx; dpc->k_num = KDPC_CPU_DEFAULT; dpc->k_importance = KDPC_IMPORTANCE_MEDIUM; InitializeListHead((&dpc->k_dpclistentry)); return; } uint8_t KeInsertQueueDpc(dpc, sysarg1, sysarg2) kdpc *dpc; void *sysarg1; void *sysarg2; { kdpc_queue *kq; uint8_t r; uint8_t irql; if (dpc == NULL) return(FALSE); kq = kq_queues; #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); /* * By default, the DPC is queued to run on the same CPU * that scheduled it. */ if (dpc->k_num == KDPC_CPU_DEFAULT) kq += curthread->td_oncpu; else kq += dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else KeAcquireSpinLock(&kq->kq_lock, &irql); #endif r = ntoskrnl_insert_dpc(&kq->kq_disp, dpc); if (r == TRUE) { dpc->k_sysarg1 = sysarg1; dpc->k_sysarg2 = sysarg2; } KeReleaseSpinLock(&kq->kq_lock, irql); if (r == FALSE) return(r); KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); return(r); } uint8_t KeRemoveQueueDpc(dpc) kdpc *dpc; { kdpc_queue *kq; uint8_t irql; if (dpc == NULL) return(FALSE); #ifdef NTOSKRNL_MULTIPLE_DPCS KeRaiseIrql(DISPATCH_LEVEL, &irql); kq = kq_queues + dpc->k_num; KeAcquireSpinLockAtDpcLevel(&kq->kq_lock); #else kq = kq_queues; KeAcquireSpinLock(&kq->kq_lock, &irql); #endif if (dpc->k_dpclistentry.nle_flink == &dpc->k_dpclistentry) { KeReleaseSpinLockFromDpcLevel(&kq->kq_lock); KeLowerIrql(irql); return(FALSE); } RemoveEntryList((&dpc->k_dpclistentry)); InitializeListHead((&dpc->k_dpclistentry)); KeReleaseSpinLock(&kq->kq_lock, irql); return(TRUE); } void KeSetImportanceDpc(dpc, imp) kdpc *dpc; uint32_t imp; { if (imp != KDPC_IMPORTANCE_LOW && imp != KDPC_IMPORTANCE_MEDIUM && imp != KDPC_IMPORTANCE_HIGH) return; dpc->k_importance = (uint8_t)imp; return; } void KeSetTargetProcessorDpc(dpc, cpu) kdpc *dpc; uint8_t cpu; { if (cpu > mp_ncpus) return; dpc->k_num = cpu; return; } void KeFlushQueuedDpcs(void) { kdpc_queue *kq; int i; /* * Poke each DPC queue and wait * for them to drain. */ #ifdef NTOSKRNL_MULTIPLE_DPCS for (i = 0; i < mp_ncpus; i++) { #else for (i = 0; i < 1; i++) { #endif kq = kq_queues + i; KeSetEvent(&kq->kq_proc, IO_NO_INCREMENT, FALSE); KeWaitForSingleObject(&kq->kq_done, 0, 0, TRUE, NULL); } return; } uint32_t KeGetCurrentProcessorNumber(void) { return((uint32_t)curthread->td_oncpu); } uint8_t KeSetTimerEx(timer, duetime, period, dpc) ktimer *timer; int64_t duetime; uint32_t period; kdpc *dpc; { struct timeval tv; uint64_t curtime; uint8_t pending; if (timer == NULL) return(FALSE); mtx_lock(&ntoskrnl_dispatchlock); if (timer->k_header.dh_inserted == TRUE) { ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif timer->k_header.dh_inserted = FALSE; pending = TRUE; } else pending = FALSE; timer->k_duetime = duetime; timer->k_period = period; timer->k_header.dh_sigstate = FALSE; timer->k_dpc = dpc; if (duetime < 0) { tv.tv_sec = - (duetime) / 10000000; tv.tv_usec = (- (duetime) / 10) - (tv.tv_sec * 1000000); } else { ntoskrnl_time(&curtime); if (duetime < curtime) tv.tv_sec = tv.tv_usec = 0; else { tv.tv_sec = ((duetime) - curtime) / 10000000; tv.tv_usec = ((duetime) - curtime) / 10 - (tv.tv_sec * 1000000); } } timer->k_header.dh_inserted = TRUE; ntoskrnl_insert_timer(timer, tvtohz(&tv)); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_sets++; #endif mtx_unlock(&ntoskrnl_dispatchlock); return(pending); } uint8_t KeSetTimer(timer, duetime, dpc) ktimer *timer; int64_t duetime; kdpc *dpc; { return (KeSetTimerEx(timer, duetime, 0, dpc)); } /* * The Windows DDK documentation seems to say that cancelling * a timer that has a DPC will result in the DPC also being * cancelled, but this isn't really the case. */ uint8_t KeCancelTimer(timer) ktimer *timer; { uint8_t pending; if (timer == NULL) return(FALSE); mtx_lock(&ntoskrnl_dispatchlock); pending = timer->k_header.dh_inserted; if (timer->k_header.dh_inserted == TRUE) { timer->k_header.dh_inserted = FALSE; ntoskrnl_remove_timer(timer); #ifdef NTOSKRNL_DEBUG_TIMERS ntoskrnl_timer_cancels++; #endif } mtx_unlock(&ntoskrnl_dispatchlock); return(pending); } uint8_t KeReadStateTimer(timer) ktimer *timer; { return(timer->k_header.dh_sigstate); } static void dummy() { printf ("ntoskrnl dummy called...\n"); return; } image_patch_table ntoskrnl_functbl[] = { IMPORT_SFUNC(RtlZeroMemory, 2), IMPORT_SFUNC(RtlCopyMemory, 3), IMPORT_SFUNC(RtlCompareMemory, 3), IMPORT_SFUNC(RtlEqualUnicodeString, 3), IMPORT_SFUNC(RtlCopyUnicodeString, 2), IMPORT_SFUNC(RtlUnicodeStringToAnsiString, 3), IMPORT_SFUNC(RtlAnsiStringToUnicodeString, 3), IMPORT_SFUNC(RtlInitAnsiString, 2), IMPORT_SFUNC_MAP(RtlInitString, RtlInitAnsiString, 2), IMPORT_SFUNC(RtlInitUnicodeString, 2), IMPORT_SFUNC(RtlFreeAnsiString, 1), IMPORT_SFUNC(RtlFreeUnicodeString, 1), IMPORT_SFUNC(RtlUnicodeStringToInteger, 3), IMPORT_CFUNC(sprintf, 0), IMPORT_CFUNC(vsprintf, 0), IMPORT_CFUNC_MAP(_snprintf, snprintf, 0), IMPORT_CFUNC_MAP(_vsnprintf, vsnprintf, 0), IMPORT_CFUNC(DbgPrint, 0), IMPORT_SFUNC(DbgBreakPoint, 0), IMPORT_CFUNC(strncmp, 0), IMPORT_CFUNC(strcmp, 0), IMPORT_CFUNC_MAP(stricmp, strcasecmp, 0), IMPORT_CFUNC(strncpy, 0), IMPORT_CFUNC(strcpy, 0), IMPORT_CFUNC(strlen, 0), IMPORT_CFUNC_MAP(toupper, ntoskrnl_toupper, 0), IMPORT_CFUNC_MAP(tolower, ntoskrnl_tolower, 0), IMPORT_CFUNC_MAP(strstr, ntoskrnl_strstr, 0), IMPORT_CFUNC_MAP(strchr, index, 0), IMPORT_CFUNC_MAP(strrchr, rindex, 0), IMPORT_CFUNC(memcpy, 0), IMPORT_CFUNC_MAP(memmove, ntoskrnl_memmove, 0), IMPORT_CFUNC_MAP(memset, ntoskrnl_memset, 0), IMPORT_CFUNC_MAP(memchr, ntoskrnl_memchr, 0), IMPORT_SFUNC(IoAllocateDriverObjectExtension, 4), IMPORT_SFUNC(IoGetDriverObjectExtension, 2), IMPORT_FFUNC(IofCallDriver, 2), IMPORT_FFUNC(IofCompleteRequest, 2), IMPORT_SFUNC(IoAcquireCancelSpinLock, 1), IMPORT_SFUNC(IoReleaseCancelSpinLock, 1), IMPORT_SFUNC(IoCancelIrp, 1), IMPORT_SFUNC(IoConnectInterrupt, 11), IMPORT_SFUNC(IoDisconnectInterrupt, 1), IMPORT_SFUNC(IoCreateDevice, 7), IMPORT_SFUNC(IoDeleteDevice, 1), IMPORT_SFUNC(IoGetAttachedDevice, 1), IMPORT_SFUNC(IoAttachDeviceToDeviceStack, 2), IMPORT_SFUNC(IoDetachDevice, 1), IMPORT_SFUNC(IoBuildSynchronousFsdRequest, 7), IMPORT_SFUNC(IoBuildAsynchronousFsdRequest, 6), IMPORT_SFUNC(IoBuildDeviceIoControlRequest, 9), IMPORT_SFUNC(IoAllocateIrp, 2), IMPORT_SFUNC(IoReuseIrp, 2), IMPORT_SFUNC(IoMakeAssociatedIrp, 2), IMPORT_SFUNC(IoFreeIrp, 1), IMPORT_SFUNC(IoInitializeIrp, 3), IMPORT_SFUNC(KeAcquireInterruptSpinLock, 1), IMPORT_SFUNC(KeReleaseInterruptSpinLock, 2), IMPORT_SFUNC(KeSynchronizeExecution, 3), IMPORT_SFUNC(KeWaitForSingleObject, 5), IMPORT_SFUNC(KeWaitForMultipleObjects, 8), IMPORT_SFUNC(_allmul, 4), IMPORT_SFUNC(_alldiv, 4), IMPORT_SFUNC(_allrem, 4), IMPORT_RFUNC(_allshr, 0), IMPORT_RFUNC(_allshl, 0), IMPORT_SFUNC(_aullmul, 4), IMPORT_SFUNC(_aulldiv, 4), IMPORT_SFUNC(_aullrem, 4), IMPORT_RFUNC(_aullshr, 0), IMPORT_RFUNC(_aullshl, 0), IMPORT_CFUNC(atoi, 0), IMPORT_CFUNC(atol, 0), IMPORT_CFUNC(rand, 0), IMPORT_CFUNC(srand, 0), IMPORT_SFUNC(WRITE_REGISTER_USHORT, 2), IMPORT_SFUNC(READ_REGISTER_USHORT, 1), IMPORT_SFUNC(WRITE_REGISTER_ULONG, 2), IMPORT_SFUNC(READ_REGISTER_ULONG, 1), IMPORT_SFUNC(READ_REGISTER_UCHAR, 1), IMPORT_SFUNC(WRITE_REGISTER_UCHAR, 2), IMPORT_SFUNC(ExInitializePagedLookasideList, 7), IMPORT_SFUNC(ExDeletePagedLookasideList, 1), IMPORT_SFUNC(ExInitializeNPagedLookasideList, 7), IMPORT_SFUNC(ExDeleteNPagedLookasideList, 1), IMPORT_FFUNC(InterlockedPopEntrySList, 1), IMPORT_FFUNC(InterlockedPushEntrySList, 2), IMPORT_SFUNC(ExQueryDepthSList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPopEntrySList, InterlockedPopEntrySList, 1), IMPORT_FFUNC_MAP(ExpInterlockedPushEntrySList, InterlockedPushEntrySList, 2), IMPORT_FFUNC(ExInterlockedPopEntrySList, 2), IMPORT_FFUNC(ExInterlockedPushEntrySList, 3), IMPORT_SFUNC(ExAllocatePoolWithTag, 3), IMPORT_SFUNC(ExFreePool, 1), #ifdef __i386__ IMPORT_FFUNC(KefAcquireSpinLockAtDpcLevel, 1), IMPORT_FFUNC(KefReleaseSpinLockFromDpcLevel,1), IMPORT_FFUNC(KeAcquireSpinLockRaiseToDpc, 1), #else /* * For AMD64, we can get away with just mapping * KeAcquireSpinLockRaiseToDpc() directly to KfAcquireSpinLock() * because the calling conventions end up being the same. * On i386, we have to be careful because KfAcquireSpinLock() * is _fastcall but KeAcquireSpinLockRaiseToDpc() isn't. */ IMPORT_SFUNC(KeAcquireSpinLockAtDpcLevel, 1), IMPORT_SFUNC(KeReleaseSpinLockFromDpcLevel, 1), IMPORT_SFUNC_MAP(KeAcquireSpinLockRaiseToDpc, KfAcquireSpinLock, 1), #endif IMPORT_SFUNC_MAP(KeReleaseSpinLock, KfReleaseSpinLock, 1), IMPORT_FFUNC(InterlockedIncrement, 1), IMPORT_FFUNC(InterlockedDecrement, 1), IMPORT_FFUNC(InterlockedExchange, 2), IMPORT_FFUNC(ExInterlockedAddLargeStatistic, 2), IMPORT_SFUNC(IoAllocateMdl, 5), IMPORT_SFUNC(IoFreeMdl, 1), IMPORT_SFUNC(MmAllocateContiguousMemory, 2), IMPORT_SFUNC(MmAllocateContiguousMemorySpecifyCache, 5), IMPORT_SFUNC(MmFreeContiguousMemory, 1), IMPORT_SFUNC(MmFreeContiguousMemorySpecifyCache, 3), IMPORT_SFUNC_MAP(MmGetPhysicalAddress, pmap_kextract, 1), IMPORT_SFUNC(MmSizeOfMdl, 1), IMPORT_SFUNC(MmMapLockedPages, 2), IMPORT_SFUNC(MmMapLockedPagesSpecifyCache, 6), IMPORT_SFUNC(MmUnmapLockedPages, 2), IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1), IMPORT_SFUNC(MmIsAddressValid, 1), IMPORT_SFUNC(MmMapIoSpace, 3 + 1), IMPORT_SFUNC(MmUnmapIoSpace, 2), IMPORT_SFUNC(KeInitializeSpinLock, 1), IMPORT_SFUNC(IoIsWdmVersionAvailable, 2), IMPORT_SFUNC(IoGetDeviceProperty, 5), IMPORT_SFUNC(IoAllocateWorkItem, 1), IMPORT_SFUNC(IoFreeWorkItem, 1), IMPORT_SFUNC(IoQueueWorkItem, 4), IMPORT_SFUNC(ExQueueWorkItem, 2), IMPORT_SFUNC(ntoskrnl_workitem, 2), IMPORT_SFUNC(KeInitializeMutex, 2), IMPORT_SFUNC(KeReleaseMutex, 2), IMPORT_SFUNC(KeReadStateMutex, 1), IMPORT_SFUNC(KeInitializeEvent, 3), IMPORT_SFUNC(KeSetEvent, 3), IMPORT_SFUNC(KeResetEvent, 1), IMPORT_SFUNC(KeClearEvent, 1), IMPORT_SFUNC(KeReadStateEvent, 1), IMPORT_SFUNC(KeInitializeTimer, 1), IMPORT_SFUNC(KeInitializeTimerEx, 2), IMPORT_SFUNC(KeSetTimer, 3), IMPORT_SFUNC(KeSetTimerEx, 4), IMPORT_SFUNC(KeCancelTimer, 1), IMPORT_SFUNC(KeReadStateTimer, 1), IMPORT_SFUNC(KeInitializeDpc, 3), IMPORT_SFUNC(KeInsertQueueDpc, 3), IMPORT_SFUNC(KeRemoveQueueDpc, 1), IMPORT_SFUNC(KeSetImportanceDpc, 2), IMPORT_SFUNC(KeSetTargetProcessorDpc, 2), IMPORT_SFUNC(KeFlushQueuedDpcs, 0), IMPORT_SFUNC(KeGetCurrentProcessorNumber, 1), IMPORT_SFUNC(ObReferenceObjectByHandle, 6), IMPORT_FFUNC(ObfDereferenceObject, 1), IMPORT_SFUNC(ZwClose, 1), IMPORT_SFUNC(PsCreateSystemThread, 7), IMPORT_SFUNC(PsTerminateSystemThread, 1), IMPORT_SFUNC(IoWMIRegistrationControl, 2), IMPORT_SFUNC(WmiQueryTraceInformation, 5), IMPORT_CFUNC(WmiTraceMessage, 0), /* * This last entry is a catch-all for any function we haven't * implemented yet. The PE import list patching routine will * use it for any function that doesn't have an explicit match * in this table. */ { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL }, /* End of list. */ { NULL, NULL, NULL } }; Index: stable/7/sys/dev/acpica/Osd/OsdDebug.c =================================================================== --- stable/7/sys/dev/acpica/Osd/OsdDebug.c (revision 177733) +++ stable/7/sys/dev/acpica/Osd/OsdDebug.c (revision 177734) @@ -1,109 +1,109 @@ /*- * Copyright (c) 2000 Michael Smith * Copyright (c) 2000 BSDi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * 6.8 : Debugging support */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include UINT32 AcpiOsGetLine(char *Buffer) { #ifdef DDB char *cp; db_readline(Buffer, 80); for (cp = Buffer; *cp != 0; cp++) if (*cp == '\n') *cp = 0; return (AE_OK); #else printf("AcpiOsGetLine called but no input support"); return (AE_NOT_EXIST); #endif /* DDB */ } ACPI_STATUS AcpiOsSignal(UINT32 Function, void *Info) { ACPI_SIGNAL_FATAL_INFO *fatal; switch (Function) { case ACPI_SIGNAL_FATAL: fatal = (ACPI_SIGNAL_FATAL_INFO *)Info; printf("ACPI fatal signal, type 0x%x code 0x%x argument 0x%x", fatal->Type, fatal->Code, fatal->Argument); #ifdef ACPI_DEBUG - kdb_enter("AcpiOsSignal"); + kdb_enter_why(KDB_WHY_ACPI, "AcpiOsSignal"); #endif break; case ACPI_SIGNAL_BREAKPOINT: #ifdef ACPI_DEBUG - kdb_enter((char *)Info); + kdb_enter_why(KDB_WHY_ACPI, (char *)Info); #endif break; default: return (AE_BAD_PARAMETER); } return (AE_OK); } #ifdef ACPI_DEBUGGER void acpi_EnterDebugger(void) { ACPI_PARSE_OBJECT obj; static int initted = 0; if (!initted) { printf("Initialising ACPICA debugger...\n"); AcpiDbInitialize(); initted = 1; } printf("Entering ACPICA debugger...\n"); AcpiDbUserCommands('A', &obj); } #endif /* ACPI_DEBUGGER */ Index: stable/7/sys/dev/dcons/dcons_os.c =================================================================== --- stable/7/sys/dev/dcons/dcons_os.c (revision 177733) +++ stable/7/sys/dev/dcons/dcons_os.c (revision 177734) @@ -1,763 +1,765 @@ /*- * Copyright (C) 2003,2004 * Hidetoshi Shimokawa. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * * This product includes software developed by Hidetoshi Shimokawa. * * 4. Neither the name of the author nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #if __FreeBSD_version >= 502122 #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef __DragonFly__ #include "dcons.h" #include "dcons_os.h" #else #include #include #endif #include #include #include #include #include #include #include "opt_comconsole.h" #include "opt_dcons.h" #include "opt_kdb.h" #include "opt_gdb.h" #include "opt_ddb.h" #ifndef DCONS_POLL_HZ #define DCONS_POLL_HZ 100 #endif #ifndef DCONS_BUF_SIZE #define DCONS_BUF_SIZE (16*1024) #endif #ifndef DCONS_FORCE_CONSOLE #define DCONS_FORCE_CONSOLE 0 /* Mostly for FreeBSD-4/DragonFly */ #endif #ifndef DCONS_FORCE_GDB #define DCONS_FORCE_GDB 1 #endif #if __FreeBSD_version >= 500101 #define CONS_NODEV 1 #if __FreeBSD_version < 502122 static struct consdev gdbconsdev; #endif #endif static d_open_t dcons_open; static d_close_t dcons_close; #if defined(__DragonFly__) || __FreeBSD_version < 500104 static d_ioctl_t dcons_ioctl; #endif static struct cdevsw dcons_cdevsw = { #ifdef __DragonFly__ #define CDEV_MAJOR 184 "dcons", CDEV_MAJOR, D_TTY, NULL, 0, dcons_open, dcons_close, ttyread, ttywrite, dcons_ioctl, ttypoll, nommap, nostrategy, nodump, nopsize, #elif __FreeBSD_version >= 500104 .d_version = D_VERSION, .d_open = dcons_open, .d_close = dcons_close, .d_name = "dcons", .d_flags = D_TTY | D_NEEDGIANT, #else #define CDEV_MAJOR 184 /* open */ dcons_open, /* close */ dcons_close, /* read */ ttyread, /* write */ ttywrite, /* ioctl */ dcons_ioctl, /* poll */ ttypoll, /* mmap */ nommap, /* strategy */ nostrategy, /* name */ "dcons", /* major */ CDEV_MAJOR, /* dump */ nodump, /* psize */ nopsize, /* flags */ D_TTY, #endif }; #ifndef KLD_MODULE static char bssbuf[DCONS_BUF_SIZE]; /* buf in bss */ #endif /* global data */ static struct dcons_global dg; struct dcons_global *dcons_conf; static int poll_hz = DCONS_POLL_HZ; static struct dcons_softc sc[DCONS_NPORT]; SYSCTL_NODE(_kern, OID_AUTO, dcons, CTLFLAG_RD, 0, "Dumb Console"); SYSCTL_INT(_kern_dcons, OID_AUTO, poll_hz, CTLFLAG_RW, &poll_hz, 0, "dcons polling rate"); static int drv_init = 0; static struct callout dcons_callout; struct dcons_buf *dcons_buf; /* for local dconschat */ #ifdef __DragonFly__ #define DEV dev_t #define THREAD d_thread_t #elif __FreeBSD_version < 500000 #define DEV dev_t #define THREAD struct proc #else #define DEV struct cdev * #define THREAD struct thread #endif static void dcons_tty_start(struct tty *); static int dcons_tty_param(struct tty *, struct termios *); static void dcons_timeout(void *); static int dcons_drv_init(int); static cn_probe_t dcons_cnprobe; static cn_init_t dcons_cninit; static cn_term_t dcons_cnterm; static cn_getc_t dcons_cngetc; static cn_putc_t dcons_cnputc; CONSOLE_DRIVER(dcons); #if defined(GDB) && (__FreeBSD_version >= 502122) static gdb_probe_f dcons_dbg_probe; static gdb_init_f dcons_dbg_init; static gdb_term_f dcons_dbg_term; static gdb_getc_f dcons_dbg_getc; static gdb_putc_f dcons_dbg_putc; GDB_DBGPORT(dcons, dcons_dbg_probe, dcons_dbg_init, dcons_dbg_term, dcons_dbg_getc, dcons_dbg_putc); extern struct gdb_dbgport *gdb_cur; #endif #if (defined(GDB) || defined(DDB)) && defined(ALT_BREAK_TO_DEBUGGER) static int dcons_check_break(struct dcons_softc *dc, int c) { if (c < 0) return (c); #if __FreeBSD_version >= 502122 if (kdb_alt_break(c, &dc->brk_state)) { if ((dc->flags & DC_GDB) != 0) { #ifdef GDB if (gdb_cur == &dcons_gdb_dbgport) { kdb_dbbe_select("gdb"); - kdb_enter("Break sequence on dcons gdb port"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on dcons gdb port"); } #endif } else - kdb_enter("Break sequence on dcons console port"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on dcons console port"); } #else switch (dc->brk_state) { case STATE1: if (c == KEY_TILDE) dc->brk_state = STATE2; else dc->brk_state = STATE0; break; case STATE2: dc->brk_state = STATE0; if (c == KEY_CTRLB) { #if DCONS_FORCE_GDB if (dc->flags & DC_GDB) boothowto |= RB_GDB; #endif breakpoint(); } } if (c == KEY_CR) dc->brk_state = STATE1; #endif return (c); } #else #define dcons_check_break(dc, c) (c) #endif static int dcons_os_checkc_nopoll(struct dcons_softc *dc) { int c; if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_POSTREAD); c = dcons_check_break(dc, dcons_checkc(dc)); if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_PREREAD); return (c); } static int dcons_os_checkc(struct dcons_softc *dc) { EVENTHANDLER_INVOKE(dcons_poll, 0); return (dcons_os_checkc_nopoll(dc)); } #if defined(GDB) || !defined(CONS_NODEV) static int dcons_os_getc(struct dcons_softc *dc) { int c; while ((c = dcons_os_checkc(dc)) == -1); return (c & 0xff); } #endif static void dcons_os_putc(struct dcons_softc *dc, int c) { if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_POSTWRITE); dcons_putc(dc, c); if (dg.dma_tag != NULL) bus_dmamap_sync(dg.dma_tag, dg.dma_map, BUS_DMASYNC_PREWRITE); } static int dcons_open(DEV dev, int flag, int mode, THREAD *td) { struct tty *tp; int unit, error, s; unit = minor(dev); if (unit != 0) return (ENXIO); tp = dev->si_tty; tp->t_oproc = dcons_tty_start; tp->t_param = dcons_tty_param; tp->t_stop = nottystop; tp->t_dev = dev; error = 0; s = spltty(); if ((tp->t_state & TS_ISOPEN) == 0) { tp->t_state |= TS_CARR_ON; ttyconsolemode(tp, 0); } else if ((tp->t_state & TS_XCLUDE) && priv_check(td, PRIV_TTY_EXCLUSIVE)) { splx(s); return (EBUSY); } splx(s); #if __FreeBSD_version < 502113 error = (*linesw[tp->t_line].l_open)(dev, tp); #else error = ttyld_open(tp, dev); #endif return (error); } static int dcons_close(DEV dev, int flag, int mode, THREAD *td) { int unit; struct tty *tp; unit = minor(dev); if (unit != 0) return (ENXIO); tp = dev->si_tty; if (tp->t_state & TS_ISOPEN) { #if __FreeBSD_version < 502113 (*linesw[tp->t_line].l_close)(tp, flag); ttyclose(tp); #else ttyld_close(tp, flag); tty_close(tp); #endif } return (0); } #if defined(__DragonFly__) || __FreeBSD_version < 500104 static int dcons_ioctl(DEV dev, u_long cmd, caddr_t data, int flag, THREAD *td) { int unit; struct tty *tp; int error; unit = minor(dev); if (unit != 0) return (ENXIO); tp = dev->si_tty; error = (*linesw[tp->t_line].l_ioctl)(tp, cmd, data, flag, td); if (error != ENOIOCTL) return (error); error = ttioctl(tp, cmd, data, flag); if (error != ENOIOCTL) return (error); return (ENOTTY); } #endif static int dcons_tty_param(struct tty *tp, struct termios *t) { tp->t_ispeed = t->c_ispeed; tp->t_ospeed = t->c_ospeed; tp->t_cflag = t->c_cflag; return 0; } static void dcons_tty_start(struct tty *tp) { struct dcons_softc *dc; int s; dc = (struct dcons_softc *)tp->t_dev->si_drv1; s = spltty(); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); return; } tp->t_state |= TS_BUSY; while (tp->t_outq.c_cc != 0) dcons_os_putc(dc, getc(&tp->t_outq)); tp->t_state &= ~TS_BUSY; ttwwakeup(tp); splx(s); } static void dcons_timeout(void *v) { struct tty *tp; struct dcons_softc *dc; int i, c, polltime; for (i = 0; i < DCONS_NPORT; i ++) { dc = &sc[i]; tp = ((DEV)dc->dev)->si_tty; while ((c = dcons_os_checkc_nopoll(dc)) != -1) if (tp->t_state & TS_ISOPEN) #if __FreeBSD_version < 502113 (*linesw[tp->t_line].l_rint)(c, tp); #else ttyld_rint(tp, c); #endif } polltime = hz / poll_hz; if (polltime < 1) polltime = 1; callout_reset(&dcons_callout, polltime, dcons_timeout, tp); } static void dcons_cnprobe(struct consdev *cp) { #ifdef __DragonFly__ cp->cn_dev = make_dev(&dcons_cdevsw, DCONS_CON, UID_ROOT, GID_WHEEL, 0600, "dcons"); #elif __FreeBSD_version >= 501109 sprintf(cp->cn_name, "dcons"); #else cp->cn_dev = makedev(CDEV_MAJOR, DCONS_CON); #endif #if DCONS_FORCE_CONSOLE cp->cn_pri = CN_REMOTE; #else cp->cn_pri = CN_NORMAL; #endif } static void dcons_cninit(struct consdev *cp) { dcons_drv_init(0); #if CONS_NODEV cp->cn_arg #else cp->cn_dev->si_drv1 #endif = (void *)&sc[DCONS_CON]; /* share port0 with unit0 */ } static void dcons_cnterm(struct consdev *cp) { } #if CONS_NODEV static int dcons_cngetc(struct consdev *cp) { struct dcons_softc *dc = (struct dcons_softc *)cp->cn_arg; return (dcons_os_checkc(dc)); } static void dcons_cnputc(struct consdev *cp, int c) { struct dcons_softc *dc = (struct dcons_softc *)cp->cn_arg; dcons_os_putc(dc, c); } #else static int dcons_cngetc(DEV dev) { struct dcons_softc *dc = (struct dcons_softc *)dev->si_drv1; return (dcons_os_getc(dc)); } static int dcons_cncheckc(DEV dev) { struct dcons_softc *dc = (struct dcons_softc *)dev->si_drv1; return (dcons_os_checkc(dc)); } static void dcons_cnputc(DEV dev, int c) { struct dcons_softc *dc = (struct dcons_softc *)dev->si_drv1; dcons_os_putc(dc, c); } #endif static int dcons_drv_init(int stage) { #if defined(__i386__) || defined(__amd64__) quad_t addr, size; #endif if (drv_init) return(drv_init); drv_init = -1; bzero(&dg, sizeof(dg)); dcons_conf = &dg; dg.cdev = &dcons_consdev; dg.buf = NULL; dg.size = DCONS_BUF_SIZE; #if defined(__i386__) || defined(__amd64__) if (getenv_quad("dcons.addr", &addr) > 0 && getenv_quad("dcons.size", &size) > 0) { #ifdef __i386__ vm_paddr_t pa; /* * Allow read/write access to dcons buffer. */ for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE) *vtopte(KERNBASE + pa) |= PG_RW; invltlb(); #endif /* XXX P to V */ dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr); dg.size = size; if (dcons_load_buffer(dg.buf, dg.size, sc) < 0) dg.buf = NULL; } #endif if (dg.buf != NULL) goto ok; #ifndef KLD_MODULE if (stage == 0) { /* XXX or cold */ /* * DCONS_FORCE_CONSOLE == 1 and statically linked. * called from cninit(). can't use contigmalloc yet . */ dg.buf = (struct dcons_buf *) bssbuf; dcons_init(dg.buf, dg.size, sc); } else #endif { /* * DCONS_FORCE_CONSOLE == 0 or kernel module case. * if the module is loaded after boot, * bssbuf could be non-continuous. */ dg.buf = (struct dcons_buf *) contigmalloc(dg.size, M_DEVBUF, 0, 0x10000, 0xffffffff, PAGE_SIZE, 0ul); dcons_init(dg.buf, dg.size, sc); } ok: dcons_buf = dg.buf; #if __FreeBSD_version < 502122 #if defined(DDB) && DCONS_FORCE_GDB #if CONS_NODEV gdbconsdev.cn_arg = (void *)&sc[DCONS_GDB]; #if __FreeBSD_version >= 501109 sprintf(gdbconsdev.cn_name, "dgdb"); #endif gdb_arg = &gdbconsdev; #elif defined(__DragonFly__) gdbdev = make_dev(&dcons_cdevsw, DCONS_GDB, UID_ROOT, GID_WHEEL, 0600, "dgdb"); #else gdbdev = makedev(CDEV_MAJOR, DCONS_GDB); #endif gdb_getc = dcons_cngetc; gdb_putc = dcons_cnputc; #endif #endif drv_init = 1; return 0; } static int dcons_attach_port(int port, char *name, int flags) { struct dcons_softc *dc; struct tty *tp; DEV dev; dc = &sc[port]; dc->flags = flags; dev = make_dev(&dcons_cdevsw, port, UID_ROOT, GID_WHEEL, 0600, name); dc->dev = (void *)dev; tp = ttyalloc(); dev->si_drv1 = (void *)dc; dev->si_tty = tp; tp->t_oproc = dcons_tty_start; tp->t_param = dcons_tty_param; tp->t_stop = nottystop; tp->t_dev = dc->dev; return(0); } static int dcons_attach(void) { int polltime; #ifdef __DragonFly__ cdevsw_add(&dcons_cdevsw, -1, 0); #endif dcons_attach_port(DCONS_CON, "dcons", 0); dcons_attach_port(DCONS_GDB, "dgdb", DC_GDB); #if __FreeBSD_version < 500000 callout_init(&dcons_callout); #else callout_init(&dcons_callout, 0); #endif polltime = hz / poll_hz; if (polltime < 1) polltime = 1; callout_reset(&dcons_callout, polltime, dcons_timeout, NULL); return(0); } static int dcons_detach(int port) { struct tty *tp; struct dcons_softc *dc; dc = &sc[port]; tp = ((DEV)dc->dev)->si_tty; if (tp->t_state & TS_ISOPEN) { printf("dcons: still opened\n"); #if __FreeBSD_version < 502113 (*linesw[tp->t_line].l_close)(tp, 0); tp->t_gen++; ttyclose(tp); ttwakeup(tp); ttwwakeup(tp); #else ttyld_close(tp, 0); tty_close(tp); #endif } /* XXX * must wait until all device are closed. */ #ifdef __DragonFly__ tsleep((void *)dc, 0, "dcodtc", hz/4); #else tsleep((void *)dc, PWAIT, "dcodtc", hz/4); #endif destroy_dev(dc->dev); return(0); } /* cnXXX works only for FreeBSD-5 */ static int dcons_modevent(module_t mode, int type, void *data) { int err = 0, ret; switch (type) { case MOD_LOAD: ret = dcons_drv_init(1); dcons_attach(); #if __FreeBSD_version >= 500000 if (ret == 0) { dcons_cnprobe(&dcons_consdev); dcons_cninit(&dcons_consdev); cnadd(&dcons_consdev); } #endif break; case MOD_UNLOAD: printf("dcons: unload\n"); callout_stop(&dcons_callout); #if __FreeBSD_version < 502122 #if defined(DDB) && DCONS_FORCE_GDB #if CONS_NODEV gdb_arg = NULL; #else gdbdev = NULL; #endif #endif #endif #if __FreeBSD_version >= 500000 cnremove(&dcons_consdev); #endif dcons_detach(DCONS_CON); dcons_detach(DCONS_GDB); dg.buf->magic = 0; contigfree(dg.buf, DCONS_BUF_SIZE, M_DEVBUF); break; case MOD_SHUTDOWN: #if 0 /* Keep connection after halt */ dg.buf->magic = 0; #endif break; default: err = EOPNOTSUPP; break; } return(err); } #if defined(GDB) && (__FreeBSD_version >= 502122) /* Debugger interface */ static int dcons_dbg_probe(void) { int dcons_gdb; if (getenv_int("dcons_gdb", &dcons_gdb) == 0) return (-1); return (dcons_gdb); } static void dcons_dbg_init(void) { } static void dcons_dbg_term(void) { } static void dcons_dbg_putc(int c) { struct dcons_softc *dc = &sc[DCONS_GDB]; dcons_os_putc(dc, c); } static int dcons_dbg_getc(void) { struct dcons_softc *dc = &sc[DCONS_GDB]; return (dcons_os_getc(dc)); } #endif DEV_MODULE(dcons, dcons_modevent, NULL); MODULE_VERSION(dcons, DCONS_VERSION); Index: stable/7/sys/dev/ofw/ofw_console.c =================================================================== --- stable/7/sys/dev/ofw/ofw_console.c (revision 177733) +++ stable/7/sys/dev/ofw/ofw_console.c (revision 177734) @@ -1,305 +1,306 @@ /*- * Copyright (C) 2001 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_comconsole.h" #include "opt_ofw.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifndef OFWCONS_POLL_HZ #define OFWCONS_POLL_HZ 4 /* 50-100 works best on Ultra2 */ #endif #define OFBURSTLEN 128 /* max number of bytes to write in one chunk */ static d_open_t ofw_dev_open; static d_close_t ofw_dev_close; static struct cdevsw ofw_cdevsw = { .d_version = D_VERSION, .d_open = ofw_dev_open, .d_close = ofw_dev_close, .d_name = "ofw", .d_flags = D_TTY | D_NEEDGIANT, }; static struct tty *ofw_tp = NULL; static int polltime; static struct callout_handle ofw_timeouthandle = CALLOUT_HANDLE_INITIALIZER(&ofw_timeouthandle); #if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER) static int alt_break_state; #endif static void ofw_tty_start(struct tty *); static int ofw_tty_param(struct tty *, struct termios *); static void ofw_tty_stop(struct tty *, int); static void ofw_timeout(void *); static cn_probe_t ofw_cnprobe; static cn_init_t ofw_cninit; static cn_term_t ofw_cnterm; static cn_getc_t ofw_cngetc; static cn_putc_t ofw_cnputc; CONSOLE_DRIVER(ofw); static void cn_drvinit(void *unused) { phandle_t options; char output[32]; struct cdev *dev; if (ofw_consdev.cn_pri != CN_DEAD && ofw_consdev.cn_name[0] != '\0') { if ((options = OF_finddevice("/options")) == -1 || OF_getprop(options, "output-device", output, sizeof(output)) == -1) return; /* * XXX: This is a hack and it may result in two /dev/ttya * XXX: devices on platforms where the sab driver works. */ dev = make_dev(&ofw_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "%s", output); make_dev_alias(dev, "ofwcons"); } } SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL) static int stdin; static int stdout; static int ofw_dev_open(struct cdev *dev, int flag, int mode, struct thread *td) { struct tty *tp; int unit; int error, setuptimeout; error = 0; setuptimeout = 0; unit = minor(dev); /* * XXX: BAD, should happen at attach time */ if (dev->si_tty == NULL) { ofw_tp = ttyalloc(); dev->si_tty = ofw_tp; ofw_tp->t_dev = dev; } tp = dev->si_tty; tp->t_oproc = ofw_tty_start; tp->t_param = ofw_tty_param; tp->t_stop = ofw_tty_stop; tp->t_dev = dev; if ((tp->t_state & TS_ISOPEN) == 0) { tp->t_state |= TS_CARR_ON; ttyconsolemode(tp, 0); setuptimeout = 1; } else if ((tp->t_state & TS_XCLUDE) && priv_check(td, PRIV_TTY_EXCLUSIVE)) { return (EBUSY); } error = ttyld_open(tp, dev); if (error == 0 && setuptimeout) { polltime = hz / OFWCONS_POLL_HZ; if (polltime < 1) { polltime = 1; } ofw_timeouthandle = timeout(ofw_timeout, tp, polltime); } return (error); } static int ofw_dev_close(struct cdev *dev, int flag, int mode, struct thread *td) { int unit; struct tty *tp; unit = minor(dev); tp = dev->si_tty; if (unit != 0) { return (ENXIO); } /* XXX Should be replaced with callout_stop(9) */ untimeout(ofw_timeout, tp, ofw_timeouthandle); ttyld_close(tp, flag); tty_close(tp); return (0); } static int ofw_tty_param(struct tty *tp, struct termios *t) { return (0); } static void ofw_tty_start(struct tty *tp) { struct clist *cl; int len; u_char buf[OFBURSTLEN]; if (tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP)) return; tp->t_state |= TS_BUSY; cl = &tp->t_outq; len = q_to_b(cl, buf, OFBURSTLEN); OF_write(stdout, buf, len); tp->t_state &= ~TS_BUSY; ttwwakeup(tp); } static void ofw_tty_stop(struct tty *tp, int flag) { if (tp->t_state & TS_BUSY) { if ((tp->t_state & TS_TTSTOP) == 0) { tp->t_state |= TS_FLUSH; } } } static void ofw_timeout(void *v) { struct tty *tp; int c; tp = (struct tty *)v; while ((c = ofw_cngetc(NULL)) != -1) { if (tp->t_state & TS_ISOPEN) { ttyld_rint(tp, c); } } ofw_timeouthandle = timeout(ofw_timeout, tp, polltime); } static void ofw_cnprobe(struct consdev *cp) { int chosen; if ((chosen = OF_finddevice("/chosen")) == -1) { cp->cn_pri = CN_DEAD; return; } if (OF_getprop(chosen, "stdin", &stdin, sizeof(stdin)) == -1) { cp->cn_pri = CN_DEAD; return; } if (OF_getprop(chosen, "stdout", &stdout, sizeof(stdout)) == -1) { cp->cn_pri = CN_DEAD; return; } cp->cn_pri = CN_LOW; } static void ofw_cninit(struct consdev *cp) { /* XXX: This is the alias, but that should be good enough */ sprintf(cp->cn_name, "ofwcons"); cp->cn_tp = ofw_tp; } static void ofw_cnterm(struct consdev *cp) { } static int ofw_cngetc(struct consdev *cp) { unsigned char ch; if (OF_read(stdin, &ch, 1) > 0) { #if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER) if (kdb_alt_break(ch, &alt_break_state)) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); #endif return (ch); } return (-1); } static void ofw_cnputc(struct consdev *cp, int c) { char cbuf; if (c == '\n') { cbuf = '\r'; OF_write(stdout, &cbuf, 1); } cbuf = c; OF_write(stdout, &cbuf, 1); } Index: stable/7/sys/dev/sio/sio.c =================================================================== --- stable/7/sys/dev/sio/sio.c (revision 177733) +++ stable/7/sys/dev/sio/sio.c (revision 177734) @@ -1,2612 +1,2614 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include __FBSDID("$FreeBSD$"); #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_gdb.h" #include "opt_kdb.h" #include "opt_sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC Card integration: * - Added PC Card driver table and handlers */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COM_ESP #include #endif #include #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #define COM_NOTAST4(flags) ((flags) & 0x04) #else #define COM_ISMULTIPORT(flags) (0) #endif /* COM_MULTIPORT */ #define COM_C_IIR_TXRDYBUG 0x80000 #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_DEBUGGER(flags) ((flags) & 0x80) #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #define COM_NOPROBE(flags) ((flags) & 0x40000) #define COM_NOSCR(flags) ((flags) & 0x100000) #define COM_PPSCTS(flags) ((flags) & 0x10000) #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_TI16754(flags) ((flags) & 0x200000) #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_char state; /* miscellaneous flag bits */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ bool_t st16650a; /* nonzero if Startech 16650A compatible */ int unit; /* unit number */ u_int flags; /* copy of device flags */ u_int tx_fifo_size; /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_ctl_port; Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; struct tty *tp; /* cross reference */ struct pps_state pps; int pps_bit; #ifdef ALT_BREAK_TO_DEBUGGER int alt_brk_state; #endif u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; u_long rclk; struct resource *irqres; struct resource *ioportres; int ioportrid; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ u_char obuf1[256]; u_char obuf2[256]; }; #ifdef COM_ESP static int espattach(struct com_s *com, Port_t esp_port); #endif static void combreak(struct tty *tp, int sig); static timeout_t siobusycheck; static u_int siodivisor(u_long rclk, speed_t speed); static void comclose(struct tty *tp); static int comopen(struct tty *tp, struct cdev *dev); static void sioinput(struct com_s *com); static void siointr1(struct com_s *com); static int siointr(void *arg); static int commodem(struct tty *tp, int sigon, int sigoff); static int comparam(struct tty *tp, struct termios *t); static void siopoll(void *); static void siosettimeout(void); static int siosetwater(struct com_s *com, speed_t speed); static void comstart(struct tty *tp); static void comstop(struct tty *tp, int rw); static timeout_t comwakeup; char sio_driver_name[] = "sio"; static struct mtx sio_lock; static int sio_inited; /* table and macro for fast conversion from a unit number to its com struct */ devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) /* XXX */ int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; static u_long comdefaultrclk = DEFAULT_RCLK; SYSCTL_ULONG(_machdep, OID_AUTO, conrclk, CTLFLAG_RW, &comdefaultrclk, 0, ""); static speed_t gdbdefaultrate = GDBSPEED; SYSCTL_UINT(_machdep, OID_AUTO, gdbspeed, CTLFLAG_RW, &gdbdefaultrate, GDBSPEED, ""); static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit = -1; static void *sio_slow_ih; static void *sio_fast_ih; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; #ifdef GDB static Port_t siogdbiobase = 0; #endif #ifdef COM_ESP /* XXX configure this properly. */ /* XXX quite broken for new-bus. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); tp = com->tp; if (tp == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ tp->t_init_in.c_ispeed = tp->t_init_in.c_ospeed = tp->t_lock_in.c_ispeed = tp->t_lock_in.c_ospeed = tp->t_init_out.c_ispeed = tp->t_init_out.c_ospeed = tp->t_lock_out.c_ispeed = tp->t_lock_out.c_ospeed = comdefaultrate; if (tp->t_state & TS_ISOPEN) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); TUNABLE_INT("machdep.conspeed", __DEVOLATILE(int *, &comdefaultrate)); #define SET_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) | (bit)) #define CLR_FLAG(dev, bit) device_set_flags(dev, device_get_flags(dev) & ~(bit)) /* * Unload the driver and clear the table. * XXX this is mostly wrong. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a kldunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ int siodetach(device_t dev) { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = TRUE; if (com->tp) ttyfree(com->tp); if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, com->ioportrid, com->ioportres); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (0); } int sioprobe(dev, xrid, rclk, noprobe) device_t dev; int xrid; u_long rclk; int noprobe; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; u_int divisor; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); com = malloc(sizeof(*com), M_DEVBUF, M_NOWAIT | M_ZERO); if (com == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } device_set_softc(dev, com); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); if (rclk == 0) rclk = DEFAULT_RCLK; com->rclk = rclk; while (sio_inited != 2) if (atomic_cmpset_int(&sio_inited, 0, 1)) { mtx_init(&sio_lock, sio_driver_name, NULL, (comconsole != -1) ? MTX_SPIN | MTX_QUIET : MTX_SPIN); atomic_store_rel_int(&sio_inited, 2); } #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; devclass_get_devices(sio_devclass, &devs, &count); for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (ENXIO); } /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { Port_t xiobase; u_long io; idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ mtx_lock_spin(&sio_lock); /* EXTRA DELAY? */ /* * For the TI16754 chips, set prescaler to 1 (4 is often the * default after-reset value) as otherwise it's impossible to * get highest baudrates. */ if (COM_TI16754(flags)) { u_char cfcr, efr; cfcr = sio_getreg(com, com_cfcr); sio_setreg(com, com_cfcr, CFCR_EFR_ENABLE); efr = sio_getreg(com, com_efr); /* Unlock extended features to turn off prescaler. */ sio_setreg(com, com_efr, efr | EFR_EFE); /* Disable EFR. */ sio_setreg(com, com_cfcr, (cfcr != CFCR_EFR_ENABLE) ? cfcr : 0); /* Turn off prescaler. */ sio_setreg(com, com_mcr, sio_getreg(com, com_mcr) & ~MCR_PRESCALE); sio_setreg(com, com_cfcr, CFCR_EFR_ENABLE); sio_setreg(com, com_efr, efr); sio_setreg(com, com_cfcr, cfcr); } /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); divisor = siodivisor(rclk, SIO_TEST_SPEED); sio_setreg(com, com_dlbl, divisor & 0xff); sio_setreg(com, com_dlbh, divisor >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); if (iobase == siocniobase) DELAY((1 + 2) * 1000000 / (comdefaultrate / 10)); else DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * It seems my Xircom CBEM56G Cardbus modem wants to be reset * to 8 bits *again*, or else probe test 0 will fail. * gwk@sgi.com, 4/19/2001 */ sio_setreg(com, com_cfcr, CFCR_8BITS); /* * Some PCMCIA cards (Palido 321s, DC-1S, ...) have the "TXRDY bug", * so we probe for a buggy IIR_TXRDY implementation even in the * noprobe case. We don't probe for it in the !noprobe case because * noprobe is always set for PCMCIA cards and the problem is not * known to affect any other cards. */ if (noprobe) { /* Read IIR a few times. */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* IIR_TXRDY should be clear. Is it? */ result = 0; if (failures[6] & IIR_TXRDY) { /* * No. We seem to have the bug. Does our fix for * it work? */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Yes. We discovered the TXRDY bug! */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* No. Just fail. XXX */ result = ENXIO; sio_setreg(com, com_mcr, 0); } } else { /* Yes. No bug. */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); mtx_unlock_spin(&sio_lock); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); if (iobase == siocniobase) result = 0; if (result != 0) { device_set_softc(dev, NULL); free(com, M_DEVBUF); } return (result == 0 ? BUS_PROBE_DEFAULT + 1 : result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; mtx_unlock_spin(&sio_lock); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) { printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); printf( "sio%d: port may not be enabled\n", device_get_unit(dev)); } if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); if (iobase == siocniobase) result = 0; if (result != 0) { device_set_softc(dev, NULL); free(com, M_DEVBUF); } return (result == 0 ? BUS_PROBE_DEFAULT + 1 : result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); /* * Bits 0,1 of dips say which COM port we are. */ if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ int sioattach(dev, xrid, rclk) device_t dev; int xrid; u_long rclk; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; int error; struct tty *tp; rid = xrid; port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); if (!port) return (ENXIO); iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->ioportrid = rid; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; com->data_port = iobase + com_data; com->int_ctl_port = iobase + com_ier; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; tp = com->tp = ttyalloc(); tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_modem = commodem; tp->t_break = combreak; tp->t_close = comclose; tp->t_open = comopen; tp->t_sc = com; if (rclk == 0) rclk = DEFAULT_RCLK; com->rclk = rclk; if (unit == comconsole) ttyconsolemode(tp, comdefaultrate); error = siosetwater(com, tp->t_init_in.c_ispeed); mtx_unlock_spin(&sio_lock); if (error) { /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } /* attempt to determine UART type */ printf("sio%d: type", unit); if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags) && !COM_NOSCR(flags)) { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250 or not responding"); goto determined_type; } } sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); break; } com->hasfifo = TRUE; if (COM_ST16650A(flags)) { printf(" ST16650A"); com->st16650a = TRUE; com->tx_fifo_size = 32; break; } if (COM_TI16754(flags)) { printf(" TI16754"); com->tx_fifo_size = 64; break; } printf(" 16550A"); #ifdef COM_ESP for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } if (com->esp) break; #endif com->tx_fifo_size = COM_FIFOSIZE(flags); if (com->tx_fifo_size == 0) com->tx_fifo_size = 16; else printf(" lookalike with %u bytes FIFO", com->tx_fifo_size); break; } #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); /* Set RTS/CTS flow control. */ outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); /* Set flow-control levels. */ outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); determined_type: ; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a buggy IIR_TXRDY implementation"); printf("\n"); if (sio_fast_ih == NULL) { swi_add(&tty_intr_event, "sio", siopoll, NULL, SWI_TTY, 0, &sio_fast_ih); swi_add(&clk_intr_event, "sio", siopoll, NULL, SWI_CLOCK, 0, &sio_slow_ih); } com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; tp->t_pps = &com->pps; if (COM_PPSCTS(flags)) com->pps_bit = MSR_CTS; else com->pps_bit = MSR_DCD; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (com->irqres) { ret = bus_setup_intr(dev, com->irqres, INTR_TYPE_TTY, siointr, NULL, com, &com->cookie); if (ret) { ret = bus_setup_intr(dev, com->irqres, INTR_TYPE_TTY, NULL, (driver_intr_t *)siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode\n"); } if (ret) device_printf(dev, "could not activate interrupt\n"); #if defined(KDB) && (defined(BREAK_TO_DEBUGGER) || \ defined(ALT_BREAK_TO_DEBUGGER)) /* * Enable interrupts for early break-to-debugger support * on the console. */ if (ret == 0 && unit == comconsole) outb(siocniobase + com_ier, IER_ERXRDY | IER_ERLS | IER_EMSC); #endif } /* We're ready, open the doors... */ ttycreate(tp, TS_CALLOUT, "d%r", unit); return (0); } static int comopen(struct tty *tp, struct cdev *dev) { struct com_s *com; int i; com = tp->t_sc; com->poll = com->no_irq; com->poll_output = com->loses_outints; if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ for (i = 0; i < 500; i++) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); if (!(inb(com->line_status_port) & LSR_RXRDY)) break; sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } if (i == 500) return (EIO); } mtx_lock_spin(&sio_lock); (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); outb(com->int_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC | (COM_IIR_TXRDYBUG(com->flags) ? 0 : IER_ETXRDY)); mtx_unlock_spin(&sio_lock); siosettimeout(); /* XXX: should be generic ? */ if (com->prev_modem_status & MSR_DCD || ISCALLOUT(dev)) ttyld_modem(tp, 1); return (0); } static void comclose(tp) struct tty *tp; { int s; struct com_s *com; s = spltty(); com = tp->t_sc; com->poll = FALSE; com->poll_output = FALSE; sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); #if defined(KDB) && (defined(BREAK_TO_DEBUGGER) || \ defined(ALT_BREAK_TO_DEBUGGER)) /* * Leave interrupts enabled and don't clear DTR if this is the * console. This allows us to detect break-to-debugger events * while the console device is closed. */ if (com->unit != comconsole) #endif { sio_setreg(com, com_ier, 0); if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!tp->t_actout && !(com->prev_modem_status & MSR_DCD) && !(tp->t_init_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { (void)commodem(tp, 0, SER_DTR); ttydtrwaitstart(tp); } } if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } tp->t_actout = FALSE; wakeup(&tp->t_actout); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ siosettimeout(); splx(s); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static u_int siodivisor(rclk, speed) u_long rclk; speed_t speed; { long actual_speed; u_int divisor; int error; if (speed == 0) return (0); #if UINT_MAX > (ULONG_MAX - 1) / 8 if (speed > (ULONG_MAX - 1) / 8) return (0); #endif divisor = (rclk / (8UL * speed) + 1) / 2; if (divisor == 0 || divisor >= 65536) return (0); actual_speed = rclk / (16UL * divisor); /* 10 times error in percent: */ error = ((actual_speed - (long)speed) * 2000 / (long)speed + 1) / 2; /* 3.0% maximum error tolerance: */ if (error < -30 || error > 30) return (0); return (divisor); } /* * Call this function with the sio_lock mutex held. It will return with the * lock still held. */ static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { /* * This may look odd, but it is using save-and-enable * semantics instead of the save-and-disable semantics * that are used everywhere else. */ mtx_unlock_spin(&sio_lock); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } mtx_lock_spin(&sio_lock); } while (buf < com->iptr); } else { do { /* * This may look odd, but it is using save-and-enable * semantics instead of the save-and-disable semantics * that are used everywhere else. */ mtx_unlock_spin(&sio_lock); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } ttyld_rint(tp, recv_data); mtx_lock_spin(&sio_lock); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } static int siointr(arg) void *arg; { struct com_s *com; #ifndef COM_MULTIPORT com = (struct com_s *)arg; mtx_lock_spin(&sio_lock); siointr1(com); mtx_unlock_spin(&sio_lock); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ mtx_lock_spin(&sio_lock); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); mtx_unlock_spin(&sio_lock); #endif /* COM_MULTIPORT */ return(FILTER_HANDLED); } static struct timespec siots[8]; static int siotso; static int volatile siotsunit = -1; static int sysctl_siots(SYSCTL_HANDLER_ARGS) { char buf[128]; long long delta; size_t len; int error, i, tso; for (i = 1, tso = siotso; i < tso; i++) { delta = (long long)(siots[i].tv_sec - siots[i - 1].tv_sec) * 1000000000 + (siots[i].tv_nsec - siots[i - 1].tv_nsec); len = sprintf(buf, "%lld\n", delta); if (delta >= 110000) len += sprintf(buf + len - 1, ": *** %ld.%09ld\n", (long)siots[i].tv_sec, siots[i].tv_nsec) - 1; if (i == tso - 1) buf[len - 1] = '\0'; error = SYSCTL_OUT(req, buf, len); if (error != 0) return (error); uio_yield(); } return (0); } SYSCTL_PROC(_machdep, OID_AUTO, siots, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_siots, "A", "sio timestamps"); static void siointr1(com) struct com_s *com; { u_char int_ctl; u_char int_ctl_new; u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl = inb(com->int_ctl_port); int_ctl_new = int_ctl; } else { int_ctl = 0; int_ctl_new = 0; } while (!com->gone) { if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & com->pps_bit) { pps_capture(&com->pps); pps_event(&com->pps, (modem_status & com->pps_bit) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); /* input event? (check first to help avoid overruns) */ while (line_status & LSR_RCV_MASK) { /* break/unnattached error bits or real input? */ if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); #ifdef KDB #ifdef ALT_BREAK_TO_DEBUGGER if (com->unit == comconsole && kdb_alt_break(recv_data, &com->alt_brk_state) != 0) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); #endif /* ALT_BREAK_TO_DEBUGGER */ #endif /* KDB */ if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(KDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { - kdb_enter("Line break on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Line break on console"); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->tp != NULL && com->tp->t_hotchar != 0 && recv_data == com->tp->t_hotchar) swi_sched(sio_fast_ih, 0); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->tp != NULL && com->tp->t_do_timestamp) microtime(&com->tp->t_timestamp); ++com_events; swi_sched(sio_slow_ih, SWI_DELAY); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) swi_sched(sio_fast_ih, 0); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) goto txrdy; /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ line_status = inb(com->line_status_port) & 0x7F; } /* modem status change? (always check before doing output) */ modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; swi_sched(sio_fast_ih, 0); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } txrdy: /* output queued and everything ready? */ if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1 && com->unit != siotsunit) { u_int ocount; ocount = com->obufq.l_tail - ioptr; if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do outb(com->data_port, *ioptr++); while (--ocount != 0); } else { outb(com->data_port, *ioptr++); ++com->bytes_out; if (com->unit == siotsunit && siotso < sizeof siots / sizeof siots[0]) nanouptime(&siots[siotso++]); } com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) int_ctl_new = int_ctl | IER_ETXRDY; if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) int_ctl_new = int_ctl & ~IER_ETXRDY; com->state &= ~CS_BUSY; } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; /* handle at high level ASAP */ swi_sched(sio_fast_ih, 0); } } if (COM_IIR_TXRDYBUG(com->flags) && int_ctl != int_ctl_new) outb(com->int_ctl_port, int_ctl_new); } /* finished? */ #ifndef COM_MULTIPORT if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } /* software interrupt handler for SWI_TTY */ static void siopoll(void *dummy) { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ mtx_lock_spin(&sio_lock); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; mtx_unlock_spin(&sio_lock); continue; } if (com->iptr != com->ibuf) { mtx_lock_spin(&sio_lock); sioinput(com); mtx_unlock_spin(&sio_lock); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; mtx_lock_spin(&sio_lock); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; mtx_unlock_spin(&sio_lock); if (delta_modem_status & MSR_DCD) ttyld_modem(tp, com->prev_modem_status & MSR_DCD); } if (com->state & CS_ODONE) { mtx_lock_spin(&sio_lock); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; mtx_unlock_spin(&sio_lock); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } ttyld_start(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static void combreak(tp, sig) struct tty *tp; int sig; { struct com_s *com; com = tp->t_sc; if (sig) sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); else sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; u_int divisor; u_char dlbh; u_char dlbl; u_char efr_flowbits; int s; com = tp->t_sc; if (com == NULL) return (ENODEV); /* check requested parameters */ if (t->c_ispeed != (t->c_ospeed != 0 ? t->c_ospeed : tp->t_ospeed)) return (EINVAL); divisor = siodivisor(com->rclk, t->c_ispeed); if (divisor == 0) return (EINVAL); /* parameters are OK, convert them to the com struct and the device */ s = spltty(); if (t->c_ospeed == 0) (void)commodem(tp, 0, SER_DTR); /* hang up line */ else (void)commodem(tp, SER_DTR, 0); cflag = t->c_cflag; switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. * * The fifo trigger level cannot be set at RX_HIGH for high * speed connections without further work on reducing * interrupt disablement times in other parts of the system, * without producing silo overflow errors. */ com->fifo_image = com->unit == siotsunit ? 0 : t->c_ispeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_MEDH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (UMC8669F), setting them while input * is arriving loses sync until data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); efr_flowbits = 0; if (cflag & CRTS_IFLOW) { com->state |= CS_RTS_IFLOW; efr_flowbits |= EFR_AUTORTS; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; efr_flowbits |= EFR_AUTOCTS; if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; } if (com->st16650a) { sio_setreg(com, com_lcr, LCR_EFR_ENABLE); sio_setreg(com, com_efr, (sio_getreg(com, com_efr) & ~(EFR_AUTOCTS | EFR_AUTORTS)) | efr_flowbits); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); /* XXX shouldn't call functions while intrs are disabled. */ ttyldoptim(tp); mtx_unlock_spin(&sio_lock); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } /* * This function must be called with the sio_lock mutex released and will * return with it obtained. */ static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; if (ibufsize == com->ibufsize) { mtx_lock_spin(&sio_lock); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { mtx_lock_spin(&sio_lock); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ mtx_lock_spin(&sio_lock); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; com = tp->t_sc; if (com == NULL) return; s = spltty(); mtx_lock_spin(&sio_lock); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } mtx_unlock_spin(&sio_lock); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, sizeof com->obuf1); com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; mtx_lock_spin(&sio_lock); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } mtx_unlock_spin(&sio_lock); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, sizeof com->obuf2); com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; mtx_lock_spin(&sio_lock); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } mtx_unlock_spin(&sio_lock); } tp->t_state |= TS_BUSY; } mtx_lock_spin(&sio_lock); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ mtx_unlock_spin(&sio_lock); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; com = tp->t_sc; if (com == NULL || com->gone) return; mtx_lock_spin(&sio_lock); if (rw & FWRITE) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } mtx_unlock_spin(&sio_lock); comstart(tp); } static int commodem(struct tty *tp, int sigon, int sigoff) { struct com_s *com; int bitand, bitor, msr; com = tp->t_sc; if (com->gone) return(0); if (sigon != 0 || sigoff != 0) { bitand = bitor = 0; if (sigoff & SER_DTR) bitand |= MCR_DTR; if (sigoff & SER_RTS) bitand |= MCR_RTS; if (sigon & SER_DTR) bitor |= MCR_DTR; if (sigon & SER_RTS) bitor |= MCR_RTS; bitand = ~bitand; mtx_lock_spin(&sio_lock); com->mcr_image &= bitand; com->mcr_image |= bitor; outb(com->modem_ctl_port, com->mcr_image); mtx_unlock_spin(&sio_lock); return (0); } else { bitor = 0; if (com->mcr_image & MCR_DTR) bitor |= SER_DTR; if (com->mcr_image & MCR_RTS) bitor |= SER_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bitor |= SER_CTS; if (msr & MSR_DCD) bitor |= SER_DCD; if (msr & MSR_DSR) bitor |= SER_DSR; if (msr & MSR_DSR) bitor |= SER_DSR; if (msr & (MSR_RI | MSR_TERI)) bitor |= SER_RI; return (bitor); } } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { mtx_lock_spin(&sio_lock); siointr1(com); mtx_unlock_spin(&sio_lock); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; mtx_lock_spin(&sio_lock); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; mtx_unlock_spin(&sio_lock); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } /* * Following are all routines needed for SIO to act as console */ struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; /* * This is a function in order to not replicate "ttyd%d" more * places than absolutely necessary. */ static void siocnset(struct consdev *cd, int unit) { cd->cn_unit = unit; sprintf(cd->cn_name, "ttyd%d", unit); } static speed_t siocngetspeed(Port_t, u_long rclk); static void siocnclose(struct siocnstate *sp, Port_t iobase); static void siocnopen(struct siocnstate *sp, Port_t iobase, int speed); static void siocntxwait(Port_t iobase); static cn_probe_t sio_cnprobe; static cn_init_t sio_cninit; static cn_term_t sio_cnterm; static cn_getc_t sio_cngetc; static cn_putc_t sio_cnputc; CONSOLE_DRIVER(sio); static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, rclk) Port_t iobase; u_long rclk; { u_int divisor; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); divisor = dlbh << 8 | dlbl; /* XXX there should be more sanity checking. */ if (divisor == 0) return (CONSPEED); return (rclk / (16UL * divisor)); } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { u_int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = siodivisor(comdefaultrclk, speed); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void sio_cnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; u_int divisor; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; if (resource_disabled("sio", unit)) continue; if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comdefaultrclk); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); divisor = siodivisor(comdefaultrclk, comdefaultrate); outb(iobase + com_dlbl, divisor & 0xff); outb(iobase + com_dlbh, divisor >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { siocnset(cp, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } #ifdef GDB if (COM_DEBUGGER(flags)) siogdbiobase = iobase; #endif } } } static void sio_cninit(cp) struct consdev *cp; { comconsole = cp->cn_unit; } static void sio_cnterm(cp) struct consdev *cp; { comconsole = -1; } static int sio_cngetc(struct consdev *cd) { int c; Port_t iobase; int s; struct siocnstate sp; speed_t speed; if (cd != NULL && cd->cn_unit == siocnunit) { iobase = siocniobase; speed = comdefaultrate; } else { #ifdef GDB iobase = siogdbiobase; speed = gdbdefaultrate; #else return (-1); #endif } s = spltty(); siocnopen(&sp, iobase, speed); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } static void sio_cnputc(struct consdev *cd, int c) { int need_unlock; int s; struct siocnstate sp; Port_t iobase; speed_t speed; if (cd != NULL && cd->cn_unit == siocnunit) { iobase = siocniobase; speed = comdefaultrate; } else { #ifdef GDB iobase = siogdbiobase; speed = gdbdefaultrate; #else return; #endif } s = spltty(); need_unlock = 0; if (!kdb_active && sio_inited == 2 && !mtx_owned(&sio_lock)) { mtx_lock_spin(&sio_lock); need_unlock = 1; } siocnopen(&sp, iobase, speed); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); if (need_unlock) mtx_unlock_spin(&sio_lock); splx(s); } /* * Remote gdb(1) support. */ #if defined(GDB) #include static gdb_probe_f siogdbprobe; static gdb_init_f siogdbinit; static gdb_term_f siogdbterm; static gdb_getc_f siogdbgetc; static gdb_putc_f siogdbputc; GDB_DBGPORT(sio, siogdbprobe, siogdbinit, siogdbterm, siogdbgetc, siogdbputc); static int siogdbprobe(void) { return ((siogdbiobase != 0) ? 0 : -1); } static void siogdbinit(void) { } static void siogdbterm(void) { } static void siogdbputc(int c) { sio_cnputc(NULL, c); } static int siogdbgetc(void) { return (sio_cngetc(NULL)); } #endif Index: stable/7/sys/dev/syscons/syscons.c =================================================================== --- stable/7/sys/dev/syscons/syscons.c (revision 177733) +++ stable/7/sys/dev/syscons/syscons.c (revision 177734) @@ -1,3704 +1,3705 @@ /*- * Copyright (c) 1992-1998 Søren Schmidt * All rights reserved. * * This code is derived from software contributed to The DragonFly Project * by Sascha Wildner * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer, * without modification, immediately at the beginning of the file. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_syscons.h" #include "opt_splash.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(__sparc64__) || defined(__powerpc__) #include #else #include #endif #if defined( __i386__) || defined(__amd64__) #include #include #endif #include #include #include #include #define COLD 0 #define WARM 1 #define DEFAULT_BLANKTIME (5*60) /* 5 minutes */ #define MAX_BLANKTIME (7*24*60*60) /* 7 days!? */ #define KEYCODE_BS 0x0e /* "<-- Backspace" key, XXX */ typedef struct default_attr { int std_color; /* normal hardware color */ int rev_color; /* reverse hardware color */ } default_attr; static default_attr user_default = { SC_NORM_ATTR, SC_NORM_REV_ATTR, }; static default_attr kernel_default = { SC_KERNEL_CONS_ATTR, SC_KERNEL_CONS_REV_ATTR, }; static int sc_console_unit = -1; static int sc_saver_keyb_only = 1; static scr_stat *sc_console; static struct tty *sc_console_tty; static struct consdev *sc_consptr; static void *kernel_console_ts; static scr_stat main_console; static struct cdev *main_devs[MAXCONS]; static char init_done = COLD; static char shutdown_in_progress = FALSE; static char sc_malloc = FALSE; static int saver_mode = CONS_NO_SAVER; /* LKM/user saver */ static int run_scrn_saver = FALSE; /* should run the saver? */ static int enable_bell = TRUE; /* enable beeper */ #ifndef SC_DISABLE_REBOOT static int enable_reboot = TRUE; /* enable keyboard reboot */ #endif #ifndef SC_DISABLE_KDBKEY static int enable_kdbkey = TRUE; /* enable keyboard debug */ #endif static long scrn_blank_time = 0; /* screen saver timeout value */ #ifdef DEV_SPLASH static int scrn_blanked; /* # of blanked screen */ static int sticky_splash = FALSE; static void none_saver(sc_softc_t *sc, int blank) { } static void (*current_saver)(sc_softc_t *, int) = none_saver; #endif SYSCTL_NODE(_hw, OID_AUTO, syscons, CTLFLAG_RD, 0, "syscons"); SYSCTL_NODE(_hw_syscons, OID_AUTO, saver, CTLFLAG_RD, 0, "saver"); SYSCTL_INT(_hw_syscons_saver, OID_AUTO, keybonly, CTLFLAG_RW, &sc_saver_keyb_only, 0, "screen saver interrupted by input only"); SYSCTL_INT(_hw_syscons, OID_AUTO, bell, CTLFLAG_RW, &enable_bell, 0, "enable bell"); #ifndef SC_DISABLE_REBOOT SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_reboot, CTLFLAG_RW|CTLFLAG_SECURE, &enable_reboot, 0, "enable keyboard reboot"); #endif #ifndef SC_DISABLE_KDBKEY SYSCTL_INT(_hw_syscons, OID_AUTO, kbd_debug, CTLFLAG_RW|CTLFLAG_SECURE, &enable_kdbkey, 0, "enable keyboard debug"); #endif #if !defined(SC_NO_FONT_LOADING) && defined(SC_DFLT_FONT) #include "font.h" #endif d_ioctl_t *sc_user_ioctl; static bios_values_t bios_value; static int enable_panic_key; SYSCTL_INT(_machdep, OID_AUTO, enable_panic_key, CTLFLAG_RW, &enable_panic_key, 0, "Enable panic via keypress specified in kbdmap(5)"); #define SC_CONSOLECTL 255 #define VIRTUAL_TTY(sc, x) (SC_DEV((sc), (x)) != NULL ? \ SC_DEV((sc), (x))->si_tty : NULL) #define ISTTYOPEN(tp) ((tp) && ((tp)->t_state & TS_ISOPEN)) static int debugger; /* prototypes */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit); static struct tty *sc_alloc_tty(struct cdev *dev); static int scvidprobe(int unit, int flags, int cons); static int sckbdprobe(int unit, int flags, int cons); static void scmeminit(void *arg); static int scdevtounit(struct cdev *dev); static kbd_callback_func_t sckbdevent; static int scparam(struct tty *tp, struct termios *t); static void scstart(struct tty *tp); static void scinit(int unit, int flags); static scr_stat *sc_get_stat(struct cdev *devptr); static void scterm(int unit, int flags); static void scshutdown(void *arg, int howto); static u_int scgetc(sc_softc_t *sc, u_int flags); #define SCGETC_CN 1 #define SCGETC_NONBLOCK 2 static int sccngetch(int flags); static void sccnupdate(scr_stat *scp); static scr_stat *alloc_scp(sc_softc_t *sc, int vty); static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp); static timeout_t scrn_timer; static int and_region(int *s1, int *e1, int s2, int e2); static void scrn_update(scr_stat *scp, int show_cursor); #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg); static void scsplash_saver(sc_softc_t *sc, int show); static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)); static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border); static int restore_scrn_saver_mode(scr_stat *scp, int changemode); static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)); static int wait_scrn_saver_stop(sc_softc_t *sc); #define scsplash_stick(stick) (sticky_splash = (stick)) #else /* !DEV_SPLASH */ #define scsplash_stick(stick) #endif /* DEV_SPLASH */ static int do_switch_scr(sc_softc_t *sc, int s); static int vt_proc_alive(scr_stat *scp); static int signal_vt_rel(scr_stat *scp); static int signal_vt_acq(scr_stat *scp); static int finish_vt_rel(scr_stat *scp, int release, int *s); static int finish_vt_acq(scr_stat *scp); static void exchange_scr(sc_softc_t *sc); static void update_cursor_image(scr_stat *scp); static void change_cursor_shape(scr_stat *scp, int flags, int base, int height); static int save_kbd_state(scr_stat *scp); static int update_kbd_state(scr_stat *scp, int state, int mask); static int update_kbd_leds(scr_stat *scp, int which); static timeout_t blink_screen; static cn_probe_t sc_cnprobe; static cn_init_t sc_cninit; static cn_term_t sc_cnterm; static cn_getc_t sc_cngetc; static cn_putc_t sc_cnputc; CONSOLE_DRIVER(sc); static d_open_t scopen; static d_close_t scclose; static d_read_t scread; static d_ioctl_t scioctl; static d_mmap_t scmmap; static struct cdevsw sc_cdevsw = { .d_version = D_VERSION, .d_open = scopen, .d_close = scclose, .d_read = scread, .d_ioctl = scioctl, .d_mmap = scmmap, .d_name = "sc", .d_flags = D_TTY | D_NEEDGIANT, }; int sc_probe_unit(int unit, int flags) { if (!scvidprobe(unit, flags, FALSE)) { if (bootverbose) printf("%s%d: no video adapter found.\n", SC_DRIVER_NAME, unit); return ENXIO; } /* syscons will be attached even when there is no keyboard */ sckbdprobe(unit, flags, FALSE); return 0; } /* probe video adapters, return TRUE if found */ static int scvidprobe(int unit, int flags, int cons) { /* * Access the video adapter driver through the back door! * Video adapter drivers need to be configured before syscons. * However, when syscons is being probed as the low-level console, * they have not been initialized yet. We force them to initialize * themselves here. XXX */ vid_configure(cons ? VIO_PROBE_ONLY : 0); return (vid_find_adapter("*", unit) >= 0); } /* probe the keyboard, return TRUE if found */ static int sckbdprobe(int unit, int flags, int cons) { /* access the keyboard driver through the backdoor! */ kbd_configure(cons ? KB_CONF_PROBE_ONLY : 0); return (kbd_find_keyboard("*", unit) >= 0); } static char *adapter_name(video_adapter_t *adp) { static struct { int type; char *name[2]; } names[] = { { KD_MONO, { "MDA", "MDA" } }, { KD_HERCULES, { "Hercules", "Hercules" } }, { KD_CGA, { "CGA", "CGA" } }, { KD_EGA, { "EGA", "EGA (mono)" } }, { KD_VGA, { "VGA", "VGA (mono)" } }, { KD_PC98, { "PC-98x1", "PC-98x1" } }, { KD_TGA, { "TGA", "TGA" } }, { -1, { "Unknown", "Unknown" } }, }; int i; for (i = 0; names[i].type != -1; ++i) if (names[i].type == adp->va_type) break; return names[i].name[(adp->va_flags & V_ADP_COLOR) ? 0 : 1]; } static struct tty * sc_alloc_tty(struct cdev *dev) { struct tty *tp; tp = dev->si_tty = ttyalloc(); ttyinitmode(tp, 1, 0); tp->t_oproc = scstart; tp->t_param = scparam; tp->t_stop = nottystop; tp->t_dev = dev; return (tp); } int sc_attach_unit(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; #ifdef SC_PIXEL_MODE video_info_t info; #endif int vc; struct cdev *dev; flags &= ~SC_KERNEL_CONSOLE; if (sc_console_unit == unit) { /* * If this unit is being used as the system console, we need to * adjust some variables and buffers before and after scinit(). */ /* assert(sc_console != NULL) */ flags |= SC_KERNEL_CONSOLE; scmeminit(NULL); scinit(unit, flags); if (sc_console->tsw->te_size > 0) { /* assert(sc_console->ts != NULL); */ kernel_console_ts = sc_console->ts; sc_console->ts = malloc(sc_console->tsw->te_size, M_DEVBUF, M_WAITOK); bcopy(kernel_console_ts, sc_console->ts, sc_console->tsw->te_size); (*sc_console->tsw->te_default_attr)(sc_console, user_default.std_color, user_default.rev_color); } } else { scinit(unit, flags); } sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); sc->config = flags; scp = sc_get_stat(sc->dev[0]); if (sc_console == NULL) /* sc_console_unit < 0 */ sc_console = scp; #ifdef SC_PIXEL_MODE if ((sc->config & SC_VESA800X600) && ((*vidsw[sc->adapter]->get_info)(sc->adp, M_VESA_800x600, &info) == 0)) { #ifdef DEV_SPLASH if (sc->flags & SC_SPLASH_SCRN) splash_term(sc->adp); #endif sc_set_graphics_mode(scp, NULL, M_VESA_800x600); sc_set_pixel_mode(scp, NULL, COL, ROW, 16, 8); sc->initial_mode = M_VESA_800x600; #ifdef DEV_SPLASH /* put up the splash again! */ if (sc->flags & SC_SPLASH_SCRN) splash_init(sc->adp, scsplash_callback, sc); #endif } #endif /* SC_PIXEL_MODE */ /* initialize cursor */ if (!ISGRAPHSC(scp)) update_cursor_image(scp); /* get screen update going */ scrn_timer(sc); /* set up the keyboard */ kbd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); printf("%s%d: %s <%d virtual consoles, flags=0x%x>\n", SC_DRIVER_NAME, unit, adapter_name(sc->adp), sc->vtys, sc->config); if (bootverbose) { printf("%s%d:", SC_DRIVER_NAME, unit); if (sc->adapter >= 0) printf(" fb%d", sc->adapter); if (sc->keyboard >= 0) printf(", kbd%d", sc->keyboard); if (scp->tsw) printf(", terminal emulator: %s (%s)", scp->tsw->te_name, scp->tsw->te_desc); printf("\n"); } /* register a shutdown callback for the kernel console */ if (sc_console_unit == unit) EVENTHANDLER_REGISTER(shutdown_pre_sync, scshutdown, (void *)(uintptr_t)unit, SHUTDOWN_PRI_DEFAULT); for (vc = 0; vc < sc->vtys; vc++) { if (sc->dev[vc] == NULL) { sc->dev[vc] = make_dev(&sc_cdevsw, vc + unit * MAXCONS, UID_ROOT, GID_WHEEL, 0600, "ttyv%r", vc + unit * MAXCONS); sc_alloc_tty(sc->dev[vc]); if (vc == 0 && sc->dev == main_devs) SC_STAT(sc->dev[0]) = &main_console; } /* * The first vty already has struct tty and scr_stat initialized * in scinit(). The other vtys will have these structs when * first opened. */ } dev = make_dev(&sc_cdevsw, SC_CONSOLECTL, UID_ROOT, GID_WHEEL, 0600, "consolectl"); sc_console_tty = sc_alloc_tty(dev); ttyconsolemode(sc_console_tty, 0); SC_STAT(dev) = sc_console; return 0; } static void scmeminit(void *arg) { if (sc_malloc) return; sc_malloc = TRUE; /* * As soon as malloc() becomes functional, we had better allocate * various buffers for the kernel console. */ if (sc_console_unit < 0) /* sc_console == NULL */ return; /* copy the temporary buffer to the final buffer */ sc_alloc_scr_buffer(sc_console, FALSE, FALSE); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(sc_console, FALSE); #endif #ifndef SC_NO_HISTORY /* initialize history buffer & pointers */ sc_alloc_history_buffer(sc_console, 0, 0, FALSE); #endif } /* XXX */ SYSINIT(sc_mem, SI_SUB_KMEM, SI_ORDER_ANY, scmeminit, NULL); static int scdevtounit(struct cdev *dev) { int vty = SC_VTY(dev); if (vty == SC_CONSOLECTL) return ((sc_console != NULL) ? sc_console->sc->unit : -1); else if ((vty < 0) || (vty >= MAXCONS*sc_max_unit())) return -1; else return vty/MAXCONS; } static int scopen(struct cdev *dev, int flag, int mode, struct thread *td) { int unit = scdevtounit(dev); sc_softc_t *sc; struct tty *tp; scr_stat *scp; #ifndef __sparc64__ keyarg_t key; #endif int error; DPRINTF(5, ("scopen: dev:%s, unit:%d, vty:%d\n", devtoname(dev), unit, SC_VTY(dev))); tp = dev->si_tty; sc = sc_get_softc(unit, (sc_console_unit == unit) ? SC_KERNEL_CONSOLE : 0); if (sc == NULL) return ENXIO; if (!ISTTYOPEN(tp)) { tp->t_termios = tp->t_init_in; /* Use the current setting of the <-- key as default VERASE. */ /* If the Delete key is preferable, an stty is necessary */ #ifndef __sparc64__ if (sc->kbd != NULL) { key.keynum = KEYCODE_BS; kbd_ioctl(sc->kbd, GIO_KEYMAPENT, (caddr_t)&key); tp->t_cc[VERASE] = key.key.map[0]; } #endif scparam(tp, &tp->t_termios); ttyld_modem(tp, 1); } else if (tp->t_state & TS_XCLUDE && priv_check(td, PRIV_TTY_EXCLUSIVE)) return(EBUSY); error = ttyld_open(tp, dev); scp = sc_get_stat(dev); if (scp == NULL) { scp = SC_STAT(dev) = alloc_scp(sc, SC_VTY(dev)); if (ISGRAPHSC(scp)) sc_set_pixel_mode(scp, NULL, COL, ROW, 16, 8); } if (!tp->t_winsize.ws_col && !tp->t_winsize.ws_row) { tp->t_winsize.ws_col = scp->xsize; tp->t_winsize.ws_row = scp->ysize; } return error; } static int scclose(struct cdev *dev, int flag, int mode, struct thread *td) { struct tty *tp = dev->si_tty; scr_stat *scp; int s; if (SC_VTY(dev) != SC_CONSOLECTL) { scp = sc_get_stat(tp->t_dev); /* were we in the middle of the VT switching process? */ DPRINTF(5, ("sc%d: scclose(), ", scp->sc->unit)); s = spltty(); if ((scp == scp->sc->cur_scp) && (scp->sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); if (finish_vt_rel(scp, TRUE, &s) == 0) /* force release */ DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) /* force acknowledge */ DPRINTF(5, ("reset WAIT_ACQ, ")); #ifdef not_yet_done if (scp == &main_console) { scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; } else { sc_vtb_destroy(&scp->vtb); #ifndef __sparc64__ sc_vtb_destroy(&scp->scr); #endif sc_free_history_buffer(scp, scp->ysize); SC_STAT(dev) = NULL; free(scp, M_DEVBUF); } #else scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; #endif scp->kbd_mode = K_XLATE; if (scp == scp->sc->cur_scp) kbd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); DPRINTF(5, ("done.\n")); } spltty(); ttyld_close(tp, flag); tty_close(tp); spl0(); return(0); } static int scread(struct cdev *dev, struct uio *uio, int flag) { if (!sc_saver_keyb_only) sc_touch_scrn_saver(); return ttyread(dev, uio, flag); } static int sckbdevent(keyboard_t *thiskbd, int event, void *arg) { sc_softc_t *sc; struct tty *cur_tty; int c; size_t len; u_char *cp; sc = (sc_softc_t *)arg; /* assert(thiskbd == sc->kbd) */ switch (event) { case KBDIO_KEYINPUT: break; case KBDIO_UNLOADING: sc->kbd = NULL; sc->keyboard = -1; kbd_release(thiskbd, (void *)&sc->keyboard); return 0; default: return EINVAL; } /* * Loop while there is still input to get from the keyboard. * I don't think this is nessesary, and it doesn't fix * the Xaccel-2.1 keyboard hang, but it can't hurt. XXX */ while ((c = scgetc(sc, SCGETC_NONBLOCK)) != NOKEY) { cur_tty = VIRTUAL_TTY(sc, sc->cur_scp->index); if (!ISTTYOPEN(cur_tty)) { cur_tty = sc_console_tty; if (!ISTTYOPEN(cur_tty)) continue; } if ((*sc->cur_scp->tsw->te_input)(sc->cur_scp, c, cur_tty)) continue; switch (KEYFLAGS(c)) { case 0x0000: /* normal key */ ttyld_rint(cur_tty, KEYCHAR(c)); break; case FKEY: /* function key, return string */ cp = kbd_get_fkeystr(thiskbd, KEYCHAR(c), &len); if (cp != NULL) { while (len-- > 0) ttyld_rint(cur_tty, *cp++); } break; case MKEY: /* meta is active, prepend ESC */ ttyld_rint(cur_tty, 0x1b); ttyld_rint(cur_tty, KEYCHAR(c)); break; case BKEY: /* backtab fixed sequence (esc [ Z) */ ttyld_rint(cur_tty, 0x1b); ttyld_rint(cur_tty, '['); ttyld_rint(cur_tty, 'Z'); break; } } sc->cur_scp->status |= MOUSE_HIDDEN; return 0; } static int scparam(struct tty *tp, struct termios *t) { tp->t_ispeed = t->c_ispeed; tp->t_ospeed = t->c_ospeed; tp->t_cflag = t->c_cflag; return 0; } static int scioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { int error; int i; struct tty *tp; sc_softc_t *sc; scr_stat *scp; int s; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) int ival; #endif tp = dev->si_tty; /* If there is a user_ioctl function call that first */ if (sc_user_ioctl) { error = (*sc_user_ioctl)(dev, cmd, data, flag, td); if (error != ENOIOCTL) return error; } error = sc_vid_ioctl(tp, cmd, data, flag, td); if (error != ENOIOCTL) return error; #ifndef SC_NO_HISTORY error = sc_hist_ioctl(tp, cmd, data, flag, td); if (error != ENOIOCTL) return error; #endif #ifndef SC_NO_SYSMOUSE error = sc_mouse_ioctl(tp, cmd, data, flag, td); if (error != ENOIOCTL) return error; #endif scp = sc_get_stat(tp->t_dev); /* assert(scp != NULL) */ /* scp is sc_console, if SC_VTY(dev) == SC_CONSOLECTL. */ sc = scp->sc; if (scp->tsw) { error = (*scp->tsw->te_ioctl)(scp, tp, cmd, data, flag, td); if (error != ENOIOCTL) return error; } switch (cmd) { /* process console hardware related ioctl's */ case GIO_ATTR: /* get current attributes */ /* this ioctl is not processed here, but in the terminal emulator */ return ENOTTY; case GIO_COLOR: /* is this a color console ? */ *(int *)data = (sc->adp->va_flags & V_ADP_COLOR) ? 1 : 0; return 0; case CONS_BLANKTIME: /* set screen saver timeout (0 = no saver) */ if (*(int *)data < 0 || *(int *)data > MAX_BLANKTIME) return EINVAL; s = spltty(); scrn_blank_time = *(int *)data; run_scrn_saver = (scrn_blank_time != 0); splx(s); return 0; case CONS_CURSORTYPE: /* set cursor type (obsolete) */ s = spltty(); *(int *)data &= CONS_CURSOR_ATTRS; sc_change_cursor_shape(scp, *(int *)data, -1, -1); splx(s); return 0; case CONS_GETCURSORSHAPE: /* get cursor shape (new interface) */ if (((int *)data)[0] & CONS_LOCAL_CURSOR) { ((int *)data)[0] = scp->curr_curs_attr.flags; ((int *)data)[1] = scp->curr_curs_attr.base; ((int *)data)[2] = scp->curr_curs_attr.height; } else { ((int *)data)[0] = sc->curs_attr.flags; ((int *)data)[1] = sc->curs_attr.base; ((int *)data)[2] = sc->curs_attr.height; } return 0; case CONS_SETCURSORSHAPE: /* set cursor shape (new interface) */ s = spltty(); sc_change_cursor_shape(scp, ((int *)data)[0], ((int *)data)[1], ((int *)data)[2]); splx(s); return 0; case CONS_BELLTYPE: /* set bell type sound/visual */ if ((*(int *)data) & CONS_VISUAL_BELL) sc->flags |= SC_VISUAL_BELL; else sc->flags &= ~SC_VISUAL_BELL; if ((*(int *)data) & CONS_QUIET_BELL) sc->flags |= SC_QUIET_BELL; else sc->flags &= ~SC_QUIET_BELL; return 0; case CONS_GETINFO: /* get current (virtual) console info */ { vid_info_t *ptr = (vid_info_t*)data; if (ptr->size == sizeof(struct vid_info)) { ptr->m_num = sc->cur_scp->index; ptr->font_size = scp->font_size; ptr->mv_col = scp->xpos; ptr->mv_row = scp->ypos; ptr->mv_csz = scp->xsize; ptr->mv_rsz = scp->ysize; ptr->mv_hsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; /* * The following fields are filled by the terminal emulator. XXX * * ptr->mv_norm.fore * ptr->mv_norm.back * ptr->mv_rev.fore * ptr->mv_rev.back */ ptr->mv_grfc.fore = 0; /* not supported */ ptr->mv_grfc.back = 0; /* not supported */ ptr->mv_ovscan = scp->border; if (scp == sc->cur_scp) save_kbd_state(scp); ptr->mk_keylock = scp->status & LOCK_MASK; return 0; } return EINVAL; } case CONS_GETVERS: /* get version number */ *(int*)data = 0x200; /* version 2.0 */ return 0; case CONS_IDLE: /* see if the screen has been idle */ /* * When the screen is in the GRAPHICS_MODE or UNKNOWN_MODE, * the user process may have been writing something on the * screen and syscons is not aware of it. Declare the screen * is NOT idle if it is in one of these modes. But there is * an exception to it; if a screen saver is running in the * graphics mode in the current screen, we should say that the * screen has been idle. */ *(int *)data = (sc->flags & SC_SCRN_IDLE) && (!ISGRAPHSC(sc->cur_scp) || (sc->cur_scp->status & SAVER_RUNNING)); return 0; case CONS_SAVERMODE: /* set saver mode */ switch(*(int *)data) { case CONS_NO_SAVER: case CONS_USR_SAVER: /* if a LKM screen saver is running, stop it first. */ scsplash_stick(FALSE); saver_mode = *(int *)data; s = spltty(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(NULL))) { splx(s); return error; } #endif run_scrn_saver = TRUE; if (saver_mode == CONS_USR_SAVER) scp->status |= SAVER_RUNNING; else scp->status &= ~SAVER_RUNNING; scsplash_stick(TRUE); splx(s); break; case CONS_LKM_SAVER: s = spltty(); if ((saver_mode == CONS_USR_SAVER) && (scp->status & SAVER_RUNNING)) scp->status &= ~SAVER_RUNNING; saver_mode = *(int *)data; splx(s); break; default: return EINVAL; } return 0; case CONS_SAVERSTART: /* immediately start/stop the screen saver */ /* * Note that this ioctl does not guarantee the screen saver * actually starts or stops. It merely attempts to do so... */ s = spltty(); run_scrn_saver = (*(int *)data != 0); if (run_scrn_saver) sc->scrn_time_stamp -= scrn_blank_time; splx(s); return 0; case CONS_SCRSHOT: /* get a screen shot */ { int retval, hist_rsz; size_t lsize, csize; vm_offset_t frbp, hstp; unsigned lnum; scrshot_t *ptr = (scrshot_t *)data; void *outp = ptr->buf; if (ptr->x < 0 || ptr->y < 0 || ptr->xsize < 0 || ptr->ysize < 0) return EINVAL; s = spltty(); if (ISGRAPHSC(scp)) { splx(s); return EOPNOTSUPP; } hist_rsz = (scp->history != NULL) ? scp->history->vtb_rows : 0; if (((u_int)ptr->x + ptr->xsize) > scp->xsize || ((u_int)ptr->y + ptr->ysize) > (scp->ysize + hist_rsz)) { splx(s); return EINVAL; } lsize = scp->xsize * sizeof(u_int16_t); csize = ptr->xsize * sizeof(u_int16_t); /* Pointer to the last line of framebuffer */ frbp = scp->vtb.vtb_buffer + scp->ysize * lsize + ptr->x * sizeof(u_int16_t); /* Pointer to the last line of target buffer */ outp = (char *)outp + ptr->ysize * csize; /* Pointer to the last line of history buffer */ if (scp->history != NULL) hstp = scp->history->vtb_buffer + sc_vtb_tail(scp->history) * sizeof(u_int16_t) + ptr->x * sizeof(u_int16_t); else hstp = 0; retval = 0; for (lnum = 0; lnum < (ptr->y + ptr->ysize); lnum++) { if (lnum < scp->ysize) { frbp -= lsize; } else { hstp -= lsize; if (hstp < scp->history->vtb_buffer) hstp += scp->history->vtb_rows * lsize; frbp = hstp; } if (lnum < ptr->y) continue; outp = (char *)outp - csize; retval = copyout((void *)frbp, outp, csize); if (retval != 0) break; } splx(s); return retval; } case VT_SETMODE: /* set screen switcher mode */ { struct vt_mode *mode; struct proc *p1; mode = (struct vt_mode *)data; DPRINTF(5, ("%s%d: VT_SETMODE ", SC_DRIVER_NAME, sc->unit)); if (scp->smode.mode == VT_PROCESS) { p1 = pfind(scp->pid); if (scp->proc == p1 && scp->proc != td->td_proc) { if (p1) PROC_UNLOCK(p1); DPRINTF(5, ("error EPERM\n")); return EPERM; } if (p1) PROC_UNLOCK(p1); } s = spltty(); if (mode->mode == VT_AUTO) { scp->smode.mode = VT_AUTO; scp->proc = NULL; scp->pid = 0; DPRINTF(5, ("VT_AUTO, ")); if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, TRUE); /* were we in the middle of the vty switching process? */ if (finish_vt_rel(scp, TRUE, &s) == 0) DPRINTF(5, ("reset WAIT_REL, ")); if (finish_vt_acq(scp) == 0) DPRINTF(5, ("reset WAIT_ACQ, ")); } else { if (!ISSIGVALID(mode->relsig) || !ISSIGVALID(mode->acqsig) || !ISSIGVALID(mode->frsig)) { splx(s); DPRINTF(5, ("error EINVAL\n")); return EINVAL; } DPRINTF(5, ("VT_PROCESS %d, ", td->td_proc->p_pid)); bcopy(data, &scp->smode, sizeof(struct vt_mode)); scp->proc = td->td_proc; scp->pid = scp->proc->p_pid; if ((scp == sc->cur_scp) && (sc->unit == sc_console_unit)) cnavailable(sc_consptr, FALSE); } splx(s); DPRINTF(5, ("\n")); return 0; } case VT_GETMODE: /* get screen switcher mode */ bcopy(&scp->smode, data, sizeof(struct vt_mode)); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 4): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_RELDISP: /* screen switcher ioctl */ s = spltty(); /* * This must be the current vty which is in the VT_PROCESS * switching mode... */ if ((scp != sc->cur_scp) || (scp->smode.mode != VT_PROCESS)) { splx(s); return EINVAL; } /* ...and this process is controlling it. */ if (scp->proc != td->td_proc) { splx(s); return EPERM; } error = EINVAL; switch(*(int *)data) { case VT_FALSE: /* user refuses to release screen, abort */ if ((error = finish_vt_rel(scp, FALSE, &s)) == 0) DPRINTF(5, ("%s%d: VT_FALSE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_TRUE: /* user has released screen, go on */ if ((error = finish_vt_rel(scp, TRUE, &s)) == 0) DPRINTF(5, ("%s%d: VT_TRUE\n", SC_DRIVER_NAME, sc->unit)); break; case VT_ACKACQ: /* acquire acknowledged, switch completed */ if ((error = finish_vt_acq(scp)) == 0) DPRINTF(5, ("%s%d: VT_ACKACQ\n", SC_DRIVER_NAME, sc->unit)); break; default: break; } splx(s); return error; case VT_OPENQRY: /* return free virtual console */ for (i = sc->first_vty; i < sc->first_vty + sc->vtys; i++) { tp = VIRTUAL_TTY(sc, i); if (!ISTTYOPEN(tp)) { *(int *)data = i + 1; return 0; } } return EINVAL; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 5): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_ACTIVATE: /* switch to screen *data */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); s = spltty(); error = sc_clean_up(sc->cur_scp); splx(s); if (error) return error; return sc_switch_scr(sc, i); #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('v', 6): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VT_WAITACTIVE: /* wait for switch to occur */ i = (*(int *)data == 0) ? scp->index : (*(int *)data - 1); if ((i < sc->first_vty) || (i >= sc->first_vty + sc->vtys)) return EINVAL; s = spltty(); error = sc_clean_up(sc->cur_scp); splx(s); if (error) return error; scp = sc_get_stat(SC_DEV(sc, i)); if (scp == NULL) return (ENXIO); if (scp == scp->sc->cur_scp) return 0; error = tsleep(&scp->smode, PZERO | PCATCH, "waitvt", 0); return error; case VT_GETACTIVE: /* get active vty # */ *(int *)data = sc->cur_scp->index + 1; return 0; case VT_GETINDEX: /* get this vty # */ *(int *)data = scp->index + 1; return 0; case VT_LOCKSWITCH: /* prevent vty switching */ if ((*(int *)data) & 0x01) sc->flags |= SC_SCRN_VTYLOCK; else sc->flags &= ~SC_SCRN_VTYLOCK; return 0; case KDENABIO: /* allow io operations */ error = priv_check(td, PRIV_IO); if (error != 0) return error; error = securelevel_gt(td->td_ucred, 0); if (error != 0) return error; #ifdef __i386__ td->td_frame->tf_eflags |= PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags |= PSL_IOPL; #endif return 0; case KDDISABIO: /* disallow io operations (default) */ #ifdef __i386__ td->td_frame->tf_eflags &= ~PSL_IOPL; #elif defined(__amd64__) td->td_frame->tf_rflags &= ~PSL_IOPL; #endif return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 20): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBSTATE: /* set keyboard state (locks) */ if (*(int *)data & ~LOCK_MASK) return EINVAL; scp->status &= ~LOCK_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_state(scp, scp->status, LOCK_MASK); return 0; case KDGKBSTATE: /* get keyboard state (locks) */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LOCK_MASK; return 0; case KDGETREPEAT: /* get keyboard repeat & delay rates */ case KDSETREPEAT: /* set keyboard repeat & delay rates (new) */ error = kbd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 67): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETRAD: /* set keyboard repeat & delay rates (old) */ if (*(int *)data & ~0x7f) return EINVAL; error = kbd_ioctl(sc->kbd, KDSETRAD, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 7): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSKBMODE: /* set keyboard mode */ switch (*(int *)data) { case K_XLATE: /* switch to XLT ascii mode */ case K_RAW: /* switch to RAW scancode mode */ case K_CODE: /* switch to CODE mode */ scp->kbd_mode = *(int *)data; if (scp == sc->cur_scp) kbd_ioctl(sc->kbd, KDSKBMODE, data); return 0; default: return EINVAL; } /* NOT REACHED */ case KDGKBMODE: /* get keyboard mode */ *(int *)data = scp->kbd_mode; return 0; case KDGKBINFO: error = kbd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 8): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDMKTONE: /* sound the bell */ if (*(int*)data) sc_bell(scp, (*(int*)data)&0xffff, (((*(int*)data)>>16)&0xffff)*hz/1000); else sc_bell(scp, scp->bell_pitch, scp->bell_duration); return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 63): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KIOCSOUND: /* make tone (*data) hz */ if (scp == sc->cur_scp) { if (*(int *)data) return sc_tone(*(int *)data); else return sc_tone(0); } return 0; case KDGKBTYPE: /* get keyboard type */ error = kbd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) { /* always return something? XXX */ *(int *)data = 0; } return 0; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('K', 66): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case KDSETLED: /* set keyboard LED status */ if (*(int *)data & ~LED_MASK) /* FIXME: LOCK_MASK? */ return EINVAL; scp->status &= ~LED_MASK; scp->status |= *(int *)data; if (scp == sc->cur_scp) update_kbd_leds(scp, scp->status); return 0; case KDGETLED: /* get keyboard LED status */ if (scp == sc->cur_scp) save_kbd_state(scp); *(int *)data = scp->status & LED_MASK; return 0; case KBADDKBD: /* add/remove keyboard to/from mux */ case KBRELKBD: error = kbd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IO('c', 110): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case CONS_SETKBD: /* set the new keyboard */ { keyboard_t *newkbd; s = spltty(); newkbd = kbd_get_keyboard(*(int *)data); if (newkbd == NULL) { splx(s); return EINVAL; } error = 0; if (sc->kbd != newkbd) { i = kbd_allocate(newkbd->kb_name, newkbd->kb_unit, (void *)&sc->keyboard, sckbdevent, sc); /* i == newkbd->kb_index */ if (i >= 0) { if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); kbd_release(sc->kbd, (void *)&sc->keyboard); } sc->kbd = kbd_get_keyboard(i); /* sc->kbd == newkbd */ sc->keyboard = i; kbd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } else { error = EPERM; /* XXX */ } } splx(s); return error; } case CONS_RELKBD: /* release the current keyboard */ s = spltty(); error = 0; if (sc->kbd != NULL) { save_kbd_state(sc->cur_scp); error = kbd_release(sc->kbd, (void *)&sc->keyboard); if (error == 0) { sc->kbd = NULL; sc->keyboard = -1; } } splx(s); return error; case CONS_GETTERM: /* get the current terminal emulator info */ { sc_term_sw_t *sw; if (((term_info_t *)data)->ti_index == 0) { sw = scp->tsw; } else { sw = sc_term_match_by_number(((term_info_t *)data)->ti_index); } if (sw != NULL) { strncpy(((term_info_t *)data)->ti_name, sw->te_name, sizeof(((term_info_t *)data)->ti_name)); strncpy(((term_info_t *)data)->ti_desc, sw->te_desc, sizeof(((term_info_t *)data)->ti_desc)); ((term_info_t *)data)->ti_flags = 0; return 0; } else { ((term_info_t *)data)->ti_name[0] = '\0'; ((term_info_t *)data)->ti_desc[0] = '\0'; ((term_info_t *)data)->ti_flags = 0; return EINVAL; } } case CONS_SETTERM: /* set the current terminal emulator */ s = spltty(); error = sc_init_emulator(scp, ((term_info_t *)data)->ti_name); /* FIXME: what if scp == sc_console! XXX */ splx(s); return error; case GIO_SCRNMAP: /* get output translation table */ bcopy(&sc->scr_map, data, sizeof(sc->scr_map)); return 0; case PIO_SCRNMAP: /* set output translation table */ bcopy(data, &sc->scr_map, sizeof(sc->scr_map)); for (i=0; iscr_map); i++) { sc->scr_rmap[sc->scr_map[i]] = i; } return 0; case GIO_KEYMAP: /* get keyboard translation table */ case PIO_KEYMAP: /* set keyboard translation table */ case GIO_DEADKEYMAP: /* get accent key translation table */ case PIO_DEADKEYMAP: /* set accent key translation table */ case GETFKEY: /* get function key string */ case SETFKEY: /* set function key string */ error = kbd_ioctl(sc->kbd, cmd, data); if (error == ENOIOCTL) error = ENODEV; return error; #ifndef SC_NO_FONT_LOADING case PIO_FONT8x8: /* set 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_8, 8*256); sc->fonts_loaded |= FONT_8; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x8. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size < 14)) sc_load_font(sc->cur_scp, 0, 8, 8, sc->font_8, 0, 256); return 0; case GIO_FONT8x8: /* get 8x8 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_8) { bcopy(sc->font_8, data, 8*256); return 0; } else return ENXIO; case PIO_FONT8x14: /* set 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_14, 14*256); sc->fonts_loaded |= FONT_14; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x14. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 14) && (sc->cur_scp->font_size < 16)) sc_load_font(sc->cur_scp, 0, 14, 8, sc->font_14, 0, 256); return 0; case GIO_FONT8x14: /* get 8x14 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_14) { bcopy(sc->font_14, data, 14*256); return 0; } else return ENXIO; case PIO_FONT8x16: /* set 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; bcopy(data, sc->font_16, 16*256); sc->fonts_loaded |= FONT_16; /* * FONT KLUDGE * Always use the font page #0. XXX * Don't load if the current font size is not 8x16. */ if (ISTEXTSC(sc->cur_scp) && (sc->cur_scp->font_size >= 16)) sc_load_font(sc->cur_scp, 0, 16, 8, sc->font_16, 0, 256); return 0; case GIO_FONT8x16: /* get 8x16 dot font */ if (!ISFONTAVAIL(sc->adp->va_flags)) return ENXIO; if (sc->fonts_loaded & FONT_16) { bcopy(sc->font_16, data, 16*256); return 0; } else return ENXIO; #endif /* SC_NO_FONT_LOADING */ default: break; } return (ttyioctl(dev, cmd, data, flag, td)); } static void scstart(struct tty *tp) { struct clist *rbp; int s, len; u_char buf[PCBURST]; scr_stat *scp = sc_get_stat(tp->t_dev); if (scp->status & SLKED || (scp == scp->sc->cur_scp && scp->sc->blink_in_progress)) return; s = spltty(); if (!(tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP))) { tp->t_state |= TS_BUSY; rbp = &tp->t_outq; while (rbp->c_cc) { len = q_to_b(rbp, buf, PCBURST); splx(s); sc_puts(scp, buf, len); s = spltty(); } tp->t_state &= ~TS_BUSY; ttwwakeup(tp); } splx(s); } static void sc_cnprobe(struct consdev *cp) { int unit; int flags; cp->cn_pri = sc_get_cons_priority(&unit, &flags); /* a video card is always required */ if (!scvidprobe(unit, flags, TRUE)) cp->cn_pri = CN_DEAD; /* syscons will become console even when there is no keyboard */ sckbdprobe(unit, flags, TRUE); if (cp->cn_pri == CN_DEAD) return; /* initialize required fields */ sprintf(cp->cn_name, "consolectl"); } static void sc_cninit(struct consdev *cp) { int unit; int flags; sc_get_cons_priority(&unit, &flags); scinit(unit, flags | SC_KERNEL_CONSOLE); sc_console_unit = unit; sc_console = sc_get_stat(sc_get_softc(unit, SC_KERNEL_CONSOLE)->dev[0]); sc_consptr = cp; } static void sc_cnterm(struct consdev *cp) { /* we are not the kernel console any more, release everything */ if (sc_console_unit < 0) return; /* shouldn't happen */ #if 0 /* XXX */ sc_clear_screen(sc_console); sccnupdate(sc_console); #endif scterm(sc_console_unit, SC_KERNEL_CONSOLE); sc_console_unit = -1; sc_console = NULL; } static void sc_cnputc(struct consdev *cd, int c) { u_char buf[1]; scr_stat *scp = sc_console; void *save; #ifndef SC_NO_HISTORY struct tty *tp; #endif /* !SC_NO_HISTORY */ int s; /* assert(sc_console != NULL) */ #ifndef SC_NO_HISTORY if (scp == scp->sc->cur_scp && scp->status & SLKED) { scp->status &= ~SLKED; update_kbd_state(scp, scp->status, SLKED); if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } tp = VIRTUAL_TTY(scp->sc, scp->index); if (ISTTYOPEN(tp)) scstart(tp); } #endif /* !SC_NO_HISTORY */ save = scp->ts; if (kernel_console_ts != NULL) scp->ts = kernel_console_ts; buf[0] = c; sc_puts(scp, buf, 1); scp->ts = save; s = spltty(); /* block sckbdevent and scrn_timer */ sccnupdate(scp); splx(s); } static int sc_cngetc(struct consdev *cd) { return sccngetch(SCGETC_NONBLOCK); } static int sccngetch(int flags) { static struct fkeytab fkey; static int fkeycp; scr_stat *scp; u_char *p; int cur_mode; int s = spltty(); /* block sckbdevent and scrn_timer while we poll */ int c; /* assert(sc_console != NULL) */ /* * Stop the screen saver and update the screen if necessary. * What if we have been running in the screen saver code... XXX */ sc_touch_scrn_saver(); scp = sc_console->sc->cur_scp; /* XXX */ sccnupdate(scp); if (fkeycp < fkey.len) { splx(s); return fkey.str[fkeycp++]; } if (scp->sc->kbd == NULL) { splx(s); return -1; } /* * Make sure the keyboard is accessible even when the kbd device * driver is disabled. */ kbd_enable(scp->sc->kbd); /* we shall always use the keyboard in the XLATE mode here */ cur_mode = scp->kbd_mode; scp->kbd_mode = K_XLATE; kbd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); kbd_poll(scp->sc->kbd, TRUE); c = scgetc(scp->sc, SCGETC_CN | flags); kbd_poll(scp->sc->kbd, FALSE); scp->kbd_mode = cur_mode; kbd_ioctl(scp->sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); kbd_disable(scp->sc->kbd); splx(s); switch (KEYFLAGS(c)) { case 0: /* normal char */ return KEYCHAR(c); case FKEY: /* function key */ p = kbd_get_fkeystr(scp->sc->kbd, KEYCHAR(c), (size_t *)&fkeycp); fkey.len = fkeycp; if ((p != NULL) && (fkey.len > 0)) { bcopy(p, fkey.str, fkey.len); fkeycp = 1; return fkey.str[0]; } return c; /* XXX */ case NOKEY: case ERRKEY: default: return -1; } /* NOT REACHED */ } static void sccnupdate(scr_stat *scp) { /* this is a cut-down version of scrn_timer()... */ if (scp->sc->font_loading_in_progress) return; if (debugger > 0 || panicstr || shutdown_in_progress) { sc_touch_scrn_saver(); } else if (scp != scp->sc->cur_scp) { return; } if (!run_scrn_saver) scp->sc->flags &= ~SC_SCRN_IDLE; #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(scp->sc->flags & SC_SCRN_IDLE)) if (scp->sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(scp->sc, current_saver); #endif if (scp != scp->sc->cur_scp || scp->sc->blink_in_progress || scp->sc->switch_in_progress) return; /* * FIXME: unlike scrn_timer(), we call scrn_update() from here even * when write_in_progress is non-zero. XXX */ if (!ISGRAPHSC(scp) && !(scp->sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); } static void scrn_timer(void *arg) { #ifndef PC98 static int kbd_interval = 0; #endif struct timeval tv; sc_softc_t *sc; scr_stat *scp; int again; int s; again = (arg != NULL); if (arg != NULL) sc = (sc_softc_t *)arg; else if (sc_console != NULL) sc = sc_console->sc; else return; /* don't do anything when we are performing some I/O operations */ if (sc->font_loading_in_progress) { if (again) timeout(scrn_timer, sc, hz / 10); return; } s = spltty(); #ifndef PC98 if ((sc->kbd == NULL) && (sc->config & SC_AUTODETECT_KBD)) { /* try to allocate a keyboard automatically */ if (++kbd_interval >= 25) { sc->keyboard = sc_allocate_keyboard(sc, -1); if (sc->keyboard >= 0) { sc->kbd = kbd_get_keyboard(sc->keyboard); kbd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&sc->cur_scp->kbd_mode); update_kbd_state(sc->cur_scp, sc->cur_scp->status, LOCK_MASK); } kbd_interval = 0; } } #endif /* PC98 */ /* find the vty to update */ scp = sc->cur_scp; /* should we stop the screen saver? */ getmicrouptime(&tv); if (debugger > 0 || panicstr || shutdown_in_progress) sc_touch_scrn_saver(); if (run_scrn_saver) { if (tv.tv_sec > sc->scrn_time_stamp + scrn_blank_time) sc->flags |= SC_SCRN_IDLE; else sc->flags &= ~SC_SCRN_IDLE; } else { sc->scrn_time_stamp = tv.tv_sec; sc->flags &= ~SC_SCRN_IDLE; if (scrn_blank_time > 0) run_scrn_saver = TRUE; } #ifdef DEV_SPLASH if ((saver_mode != CONS_LKM_SAVER) || !(sc->flags & SC_SCRN_IDLE)) if (sc->flags & SC_SCRN_BLANKED) stop_scrn_saver(sc, current_saver); #endif /* should we just return ? */ if (sc->blink_in_progress || sc->switch_in_progress || sc->write_in_progress) { if (again) timeout(scrn_timer, sc, hz / 10); splx(s); return; } /* Update the screen */ scp = sc->cur_scp; /* cur_scp may have changed... */ if (!ISGRAPHSC(scp) && !(sc->flags & SC_SCRN_BLANKED)) scrn_update(scp, TRUE); #ifdef DEV_SPLASH /* should we activate the screen saver? */ if ((saver_mode == CONS_LKM_SAVER) && (sc->flags & SC_SCRN_IDLE)) if (!ISGRAPHSC(scp) || (sc->flags & SC_SCRN_BLANKED)) (*current_saver)(sc, TRUE); #endif if (again) timeout(scrn_timer, sc, hz / 25); splx(s); } static int and_region(int *s1, int *e1, int s2, int e2) { if (*e1 < s2 || e2 < *s1) return FALSE; *s1 = imax(*s1, s2); *e1 = imin(*e1, e2); return TRUE; } static void scrn_update(scr_stat *scp, int show_cursor) { int start; int end; int s; int e; /* assert(scp == scp->sc->cur_scp) */ SC_VIDEO_LOCK(scp->sc); #ifndef SC_NO_CUTPASTE /* remove the previous mouse pointer image if necessary */ if (scp->status & MOUSE_VISIBLE) { s = scp->mouse_pos; e = scp->mouse_pos + scp->xsize + 1; if ((scp->status & (MOUSE_MOVED | MOUSE_HIDDEN)) || and_region(&s, &e, scp->start, scp->end) || ((scp->status & CURSOR_ENABLED) && (scp->cursor_pos != scp->cursor_oldpos) && (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos) || and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)))) { sc_remove_mouse_image(scp); if (scp->end >= scp->xsize*scp->ysize) scp->end = scp->xsize*scp->ysize - 1; } } #endif /* !SC_NO_CUTPASTE */ #if 1 /* debug: XXX */ if (scp->end >= scp->xsize*scp->ysize) { printf("scrn_update(): scp->end %d > size_of_screen!!\n", scp->end); scp->end = scp->xsize*scp->ysize - 1; } if (scp->start < 0) { printf("scrn_update(): scp->start %d < 0\n", scp->start); scp->start = 0; } #endif /* update screen image */ if (scp->start <= scp->end) { if (scp->mouse_cut_end >= 0) { /* there is a marked region for cut & paste */ if (scp->mouse_cut_start <= scp->mouse_cut_end) { start = scp->mouse_cut_start; end = scp->mouse_cut_end; } else { start = scp->mouse_cut_end; end = scp->mouse_cut_start - 1; } s = start; e = end; /* does the cut-mark region overlap with the update region? */ if (and_region(&s, &e, scp->start, scp->end)) { (*scp->rndr->draw)(scp, s, e - s + 1, TRUE); s = 0; e = start - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); s = end + 1; e = scp->xsize*scp->ysize - 1; if (and_region(&s, &e, scp->start, scp->end)) (*scp->rndr->draw)(scp, s, e - s + 1, FALSE); } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } else { (*scp->rndr->draw)(scp, scp->start, scp->end - scp->start + 1, FALSE); } } /* we are not to show the cursor and the mouse pointer... */ if (!show_cursor) { scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); return; } /* update cursor image */ if (scp->status & CURSOR_ENABLED) { s = scp->start; e = scp->end; /* did cursor move since last time ? */ if (scp->cursor_pos != scp->cursor_oldpos) { /* do we need to remove old cursor image ? */ if (!and_region(&s, &e, scp->cursor_oldpos, scp->cursor_oldpos)) sc_remove_cursor_image(scp); sc_draw_cursor_image(scp); } else { if (and_region(&s, &e, scp->cursor_pos, scp->cursor_pos)) /* cursor didn't move, but has been overwritten */ sc_draw_cursor_image(scp); else if (scp->curs_attr.flags & CONS_BLINK_CURSOR) /* if it's a blinking cursor, update it */ (*scp->rndr->blink_cursor)(scp, scp->cursor_pos, sc_inside_cutmark(scp, scp->cursor_pos)); } } #ifndef SC_NO_CUTPASTE /* update "pseudo" mouse pointer image */ if (scp->sc->flags & SC_MOUSE_ENABLED) { if (!(scp->status & (MOUSE_VISIBLE | MOUSE_HIDDEN))) { scp->status &= ~MOUSE_MOVED; sc_draw_mouse_image(scp); } } #endif /* SC_NO_CUTPASTE */ scp->end = 0; scp->start = scp->xsize*scp->ysize - 1; SC_VIDEO_UNLOCK(scp->sc); } #ifdef DEV_SPLASH static int scsplash_callback(int event, void *arg) { sc_softc_t *sc; int error; sc = (sc_softc_t *)arg; switch (event) { case SPLASH_INIT: if (add_scrn_saver(scsplash_saver) == 0) { sc->flags &= ~SC_SAVER_FAILED; run_scrn_saver = TRUE; if (cold && !(boothowto & RB_VERBOSE)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } return 0; case SPLASH_TERM: if (current_saver == scsplash_saver) { scsplash_stick(FALSE); error = remove_scrn_saver(scsplash_saver); if (error) return error; } return 0; default: return EINVAL; } } static void scsplash_saver(sc_softc_t *sc, int show) { static int busy = FALSE; scr_stat *scp; if (busy) return; busy = TRUE; scp = sc->cur_scp; if (show) { if (!(sc->flags & SC_SAVER_FAILED)) { if (!(sc->flags & SC_SCRN_BLANKED)) set_scrn_saver_mode(scp, -1, NULL, 0); switch (splash(sc->adp, TRUE)) { case 0: /* succeeded */ break; case EAGAIN: /* try later */ restore_scrn_saver_mode(scp, FALSE); sc_touch_scrn_saver(); /* XXX */ break; default: sc->flags |= SC_SAVER_FAILED; scsplash_stick(FALSE); restore_scrn_saver_mode(scp, TRUE); printf("scsplash_saver(): failed to put up the image\n"); break; } } } else if (!sticky_splash) { if ((sc->flags & SC_SCRN_BLANKED) && (splash(sc->adp, FALSE) == 0)) restore_scrn_saver_mode(scp, TRUE); } busy = FALSE; } static int add_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { #if 0 int error; if (current_saver != none_saver) { error = remove_scrn_saver(current_saver); if (error) return error; } #endif if (current_saver != none_saver) return EBUSY; run_scrn_saver = FALSE; saver_mode = CONS_LKM_SAVER; current_saver = this_saver; return 0; } static int remove_scrn_saver(void (*this_saver)(sc_softc_t *, int)) { if (current_saver != this_saver) return EINVAL; #if 0 /* * In order to prevent `current_saver' from being called by * the timeout routine `scrn_timer()' while we manipulate * the saver list, we shall set `current_saver' to `none_saver' * before stopping the current saver, rather than blocking by `splXX()'. */ current_saver = none_saver; if (scrn_blanked) stop_scrn_saver(this_saver); #endif /* unblank all blanked screens */ wait_scrn_saver_stop(NULL); if (scrn_blanked) return EBUSY; current_saver = none_saver; return 0; } static int set_scrn_saver_mode(scr_stat *scp, int mode, u_char *pal, int border) { int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); if (!ISGRAPHSC(scp)) sc_remove_cursor_image(scp); scp->splash_save_mode = scp->mode; scp->splash_save_status = scp->status & (GRAPHICS_MODE | PIXEL_MODE); scp->status &= ~(GRAPHICS_MODE | PIXEL_MODE); scp->status |= (UNKNOWN_MODE | SAVER_RUNNING); scp->sc->flags |= SC_SCRN_BLANKED; ++scrn_blanked; splx(s); if (mode < 0) return 0; scp->mode = mode; if (set_mode(scp) == 0) { if (scp->sc->adp->va_info.vi_flags & V_INFO_GRAPHICS) scp->status |= GRAPHICS_MODE; #ifndef SC_NO_PALETTE_LOADING if (pal != NULL) load_palette(scp->sc->adp, pal); #endif sc_set_border(scp, border); return 0; } else { s = spltty(); scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; splx(s); return 1; } } static int restore_scrn_saver_mode(scr_stat *scp, int changemode) { int mode; int status; int s; /* assert(scp == scp->sc->cur_scp) */ s = spltty(); mode = scp->mode; status = scp->status; scp->mode = scp->splash_save_mode; scp->status &= ~(UNKNOWN_MODE | SAVER_RUNNING); scp->status |= scp->splash_save_status; scp->sc->flags &= ~SC_SCRN_BLANKED; if (!changemode) { if (!ISGRAPHSC(scp)) sc_draw_cursor_image(scp); --scrn_blanked; splx(s); return 0; } if (set_mode(scp) == 0) { #ifndef SC_NO_PALETTE_LOADING load_palette(scp->sc->adp, scp->sc->palette); #endif --scrn_blanked; splx(s); return 0; } else { scp->mode = mode; scp->status = status; splx(s); return 1; } } static void stop_scrn_saver(sc_softc_t *sc, void (*saver)(sc_softc_t *, int)) { (*saver)(sc, FALSE); run_scrn_saver = FALSE; /* the screen saver may have chosen not to stop after all... */ if (sc->flags & SC_SCRN_BLANKED) return; mark_all(sc->cur_scp); if (sc->delayed_next_scr) sc_switch_scr(sc, sc->delayed_next_scr - 1); if (debugger == 0) wakeup(&scrn_blanked); } static int wait_scrn_saver_stop(sc_softc_t *sc) { int error = 0; while (scrn_blanked > 0) { run_scrn_saver = FALSE; if (sc && !(sc->flags & SC_SCRN_BLANKED)) { error = 0; break; } error = tsleep(&scrn_blanked, PZERO | PCATCH, "scrsav", 0); if ((error != 0) && (error != ERESTART)) break; } run_scrn_saver = FALSE; return error; } #endif /* DEV_SPLASH */ void sc_touch_scrn_saver(void) { scsplash_stick(FALSE); run_scrn_saver = FALSE; } int sc_switch_scr(sc_softc_t *sc, u_int next_scr) { scr_stat *cur_scp; struct tty *tp; struct proc *p; int s; DPRINTF(5, ("sc0: sc_switch_scr() %d ", next_scr + 1)); if (sc->cur_scp == NULL) return (0); /* prevent switch if previously requested */ if (sc->flags & SC_SCRN_VTYLOCK) { sc_bell(sc->cur_scp, sc->cur_scp->bell_pitch, sc->cur_scp->bell_duration); return EPERM; } /* delay switch if the screen is blanked or being updated */ if ((sc->flags & SC_SCRN_BLANKED) || sc->write_in_progress || sc->blink_in_progress) { sc->delayed_next_scr = next_scr + 1; sc_touch_scrn_saver(); DPRINTF(5, ("switch delayed\n")); return 0; } sc->delayed_next_scr = 0; s = spltty(); cur_scp = sc->cur_scp; /* we are in the middle of the vty switching process... */ if (sc->switch_in_progress && (cur_scp->smode.mode == VT_PROCESS) && cur_scp->proc) { p = pfind(cur_scp->pid); if (cur_scp->proc != p) { if (p) PROC_UNLOCK(p); /* * The controlling process has died!!. Do some clean up. * NOTE:`cur_scp->proc' and `cur_scp->smode.mode' * are not reset here yet; they will be cleared later. */ DPRINTF(5, ("cur_scp controlling process %d died, ", cur_scp->pid)); if (cur_scp->status & SWITCH_WAIT_REL) { /* * Force the previous switch to finish, but return now * with error. */ DPRINTF(5, ("reset WAIT_REL, ")); finish_vt_rel(cur_scp, TRUE, &s); splx(s); DPRINTF(5, ("finishing previous switch\n")); return EINVAL; } else if (cur_scp->status & SWITCH_WAIT_ACQ) { /* let's assume screen switch has been completed. */ DPRINTF(5, ("reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); } else { /* * We are in between screen release and acquisition, and * reached here via scgetc() or scrn_timer() which has * interrupted exchange_scr(). Don't do anything stupid. */ DPRINTF(5, ("waiting nothing, ")); } } else { if (p) PROC_UNLOCK(p); /* * The controlling process is alive, but not responding... * It is either buggy or it may be just taking time. * The following code is a gross kludge to cope with this * problem for which there is no clean solution. XXX */ if (cur_scp->status & SWITCH_WAIT_REL) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending relsig again, ")); signal_vt_rel(cur_scp); break; case 3: break; case 4: default: /* * Act as if the controlling program returned * VT_FALSE. */ DPRINTF(5, ("force reset WAIT_REL, ")); finish_vt_rel(cur_scp, FALSE, &s); splx(s); DPRINTF(5, ("act as if VT_FALSE was seen\n")); return EINVAL; } } else if (cur_scp->status & SWITCH_WAIT_ACQ) { switch (sc->switch_in_progress++) { case 1: break; case 2: DPRINTF(5, ("sending acqsig again, ")); signal_vt_acq(cur_scp); break; case 3: break; case 4: default: /* clear the flag and finish the previous switch */ DPRINTF(5, ("force reset WAIT_ACQ, ")); finish_vt_acq(cur_scp); break; } } } } /* * Return error if an invalid argument is given, or vty switch * is still in progress. */ if ((next_scr < sc->first_vty) || (next_scr >= sc->first_vty + sc->vtys) || sc->switch_in_progress) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 1\n")); return EINVAL; } /* * Don't allow switching away from the graphics mode vty * if the switch mode is VT_AUTO, unless the next vty is the same * as the current or the current vty has been closed (but showing). */ tp = VIRTUAL_TTY(sc, cur_scp->index); if ((cur_scp->index != next_scr) && ISTTYOPEN(tp) && (cur_scp->smode.mode == VT_AUTO) && ISGRAPHSC(cur_scp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error, graphics mode\n")); return EINVAL; } /* * Is the wanted vty open? Don't allow switching to a closed vty. * If we are in DDB, don't switch to a vty in the VT_PROCESS mode. * Note that we always allow the user to switch to the kernel * console even if it is closed. */ if ((sc_console == NULL) || (next_scr != sc_console->index)) { tp = VIRTUAL_TTY(sc, next_scr); if (!ISTTYOPEN(tp)) { splx(s); sc_bell(cur_scp, bios_value.bell_pitch, BELL_DURATION); DPRINTF(5, ("error 2, requested vty isn't open!\n")); return EINVAL; } if ((debugger > 0) && (SC_STAT(tp->t_dev)->smode.mode == VT_PROCESS)) { splx(s); DPRINTF(5, ("error 3, requested vty is in the VT_PROCESS mode\n")); return EINVAL; } } /* this is the start of vty switching process... */ ++sc->switch_in_progress; sc->old_scp = cur_scp; sc->new_scp = sc_get_stat(SC_DEV(sc, next_scr)); if (sc->new_scp == sc->old_scp) { sc->switch_in_progress = 0; /* * XXX wakeup() locks the scheduler lock which will hang if * the lock is in an in-between state, e.g., when we stop at * a breakpoint at fork_exit. It has always been wrong to call * wakeup() when the debugger is active. In RELENG_4, wakeup() * is supposed to be locked by splhigh(), but the debugger may * be invoked at splhigh(). */ if (debugger == 0) wakeup(&sc->new_scp->smode); splx(s); DPRINTF(5, ("switch done (new == old)\n")); return 0; } /* has controlling process died? */ vt_proc_alive(sc->old_scp); vt_proc_alive(sc->new_scp); /* wait for the controlling process to release the screen, if necessary */ if (signal_vt_rel(sc->old_scp)) { splx(s); return 0; } /* go set up the new vty screen */ splx(s); exchange_scr(sc); s = spltty(); /* wake up processes waiting for this vty */ if (debugger == 0) wakeup(&sc->cur_scp->smode); /* wait for the controlling process to acknowledge, if necessary */ if (signal_vt_acq(sc->cur_scp)) { splx(s); return 0; } sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); splx(s); DPRINTF(5, ("switch done\n")); return 0; } static int do_switch_scr(sc_softc_t *sc, int s) { vt_proc_alive(sc->new_scp); splx(s); exchange_scr(sc); s = spltty(); /* sc->cur_scp == sc->new_scp */ wakeup(&sc->cur_scp->smode); /* wait for the controlling process to acknowledge, if necessary */ if (!signal_vt_acq(sc->cur_scp)) { sc->switch_in_progress = 0; if (sc->unit == sc_console_unit) cnavailable(sc_consptr, TRUE); } return s; } static int vt_proc_alive(scr_stat *scp) { struct proc *p; if (scp->proc) { if ((p = pfind(scp->pid)) != NULL) PROC_UNLOCK(p); if (scp->proc == p) return TRUE; scp->proc = NULL; scp->smode.mode = VT_AUTO; DPRINTF(5, ("vt controlling process %d died\n", scp->pid)); } return FALSE; } static int signal_vt_rel(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; scp->status |= SWITCH_WAIT_REL; PROC_LOCK(scp->proc); psignal(scp->proc, scp->smode.relsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending relsig to %d\n", scp->pid)); return TRUE; } static int signal_vt_acq(scr_stat *scp) { if (scp->smode.mode != VT_PROCESS) return FALSE; if (scp->sc->unit == sc_console_unit) cnavailable(sc_consptr, FALSE); scp->status |= SWITCH_WAIT_ACQ; PROC_LOCK(scp->proc); psignal(scp->proc, scp->smode.acqsig); PROC_UNLOCK(scp->proc); DPRINTF(5, ("sending acqsig to %d\n", scp->pid)); return TRUE; } static int finish_vt_rel(scr_stat *scp, int release, int *s) { if (scp == scp->sc->old_scp && scp->status & SWITCH_WAIT_REL) { scp->status &= ~SWITCH_WAIT_REL; if (release) *s = do_switch_scr(scp->sc, *s); else scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static int finish_vt_acq(scr_stat *scp) { if (scp == scp->sc->new_scp && scp->status & SWITCH_WAIT_ACQ) { scp->status &= ~SWITCH_WAIT_ACQ; scp->sc->switch_in_progress = 0; return 0; } return EINVAL; } static void exchange_scr(sc_softc_t *sc) { scr_stat *scp; /* save the current state of video and keyboard */ sc_move_cursor(sc->old_scp, sc->old_scp->xpos, sc->old_scp->ypos); if (!ISGRAPHSC(sc->old_scp)) sc_remove_cursor_image(sc->old_scp); if (sc->old_scp->kbd_mode == K_XLATE) save_kbd_state(sc->old_scp); /* set up the video for the new screen */ scp = sc->cur_scp = sc->new_scp; #ifdef PC98 if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp) || ISUNKNOWNSC(sc->new_scp)) #else if (sc->old_scp->mode != scp->mode || ISUNKNOWNSC(sc->old_scp)) #endif set_mode(scp); #ifndef __sparc64__ else sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)sc->adp->va_window, FALSE); #endif scp->status |= MOUSE_HIDDEN; sc_move_cursor(scp, scp->xpos, scp->ypos); if (!ISGRAPHSC(scp)) sc_set_cursor_image(scp); #ifndef SC_NO_PALETTE_LOADING if (ISGRAPHSC(sc->old_scp)) load_palette(sc->adp, sc->palette); #endif sc_set_border(scp, scp->border); /* set up the keyboard for the new screen */ if (sc->old_scp->kbd_mode != scp->kbd_mode) kbd_ioctl(sc->kbd, KDSKBMODE, (caddr_t)&scp->kbd_mode); update_kbd_state(scp, scp->status, LOCK_MASK); mark_all(scp); } void sc_puts(scr_stat *scp, u_char *buf, int len) { int need_unlock = 0; #ifdef DEV_SPLASH /* make screensaver happy */ if (!sticky_splash && scp == scp->sc->cur_scp && !sc_saver_keyb_only) run_scrn_saver = FALSE; #endif if (scp->tsw) { if (!kdb_active && !mtx_owned(&scp->scr_lock)) { need_unlock = 1; mtx_lock_spin(&scp->scr_lock); } (*scp->tsw->te_puts)(scp, buf, len); if (need_unlock) mtx_unlock_spin(&scp->scr_lock); } if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } void sc_draw_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_pos, scp->curs_attr.flags & CONS_BLINK_CURSOR, TRUE, sc_inside_cutmark(scp, scp->cursor_pos)); scp->cursor_oldpos = scp->cursor_pos; SC_VIDEO_UNLOCK(scp->sc); } void sc_remove_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_cursor)(scp, scp->cursor_oldpos, scp->curs_attr.flags & CONS_BLINK_CURSOR, FALSE, sc_inside_cutmark(scp, scp->cursor_oldpos)); SC_VIDEO_UNLOCK(scp->sc); } static void update_cursor_image(scr_stat *scp) { /* assert(scp == scp->sc->cur_scp); */ sc_remove_cursor_image(scp); sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } void sc_set_cursor_image(scr_stat *scp) { scp->curs_attr.flags = scp->curr_curs_attr.flags; if (scp->curs_attr.flags & CONS_HIDDEN_CURSOR) { /* hidden cursor is internally represented as zero-height underline */ scp->curs_attr.flags = CONS_CHAR_CURSOR; scp->curs_attr.base = scp->curs_attr.height = 0; } else if (scp->curs_attr.flags & CONS_CHAR_CURSOR) { scp->curs_attr.base = imin(scp->curr_curs_attr.base, scp->font_size - 1); scp->curs_attr.height = imin(scp->curr_curs_attr.height, scp->font_size - scp->curs_attr.base); } else { /* block cursor */ scp->curs_attr.base = 0; scp->curs_attr.height = scp->font_size; } /* assert(scp == scp->sc->cur_scp); */ SC_VIDEO_LOCK(scp->sc); (*scp->rndr->set_cursor)(scp, scp->curs_attr.base, scp->curs_attr.height, scp->curs_attr.flags & CONS_BLINK_CURSOR); SC_VIDEO_UNLOCK(scp->sc); } static void change_cursor_shape(scr_stat *scp, int flags, int base, int height) { if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) sc_remove_cursor_image(scp); if (base >= 0) scp->curr_curs_attr.base = base; if (height >= 0) scp->curr_curs_attr.height = height; if (flags & CONS_RESET_CURSOR) scp->curr_curs_attr = scp->dflt_curs_attr; else scp->curr_curs_attr.flags = flags & CONS_CURSOR_ATTRS; if ((scp == scp->sc->cur_scp) && !ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } } void sc_change_cursor_shape(scr_stat *scp, int flags, int base, int height) { sc_softc_t *sc; struct cdev *dev; int s; int i; s = spltty(); if ((flags != -1) && (flags & CONS_LOCAL_CURSOR)) { /* local (per vty) change */ change_cursor_shape(scp, flags, base, height); splx(s); return; } /* global change */ sc = scp->sc; if (base >= 0) sc->curs_attr.base = base; if (height >= 0) sc->curs_attr.height = height; if (flags != -1) { if (flags & CONS_RESET_CURSOR) sc->curs_attr = sc->dflt_curs_attr; else sc->curs_attr.flags = flags & CONS_CURSOR_ATTRS; } for (i = sc->first_vty; i < sc->first_vty + sc->vtys; ++i) { if ((dev = SC_DEV(sc, i)) == NULL) continue; if ((scp = sc_get_stat(dev)) == NULL) continue; scp->dflt_curs_attr = sc->curs_attr; change_cursor_shape(scp, CONS_RESET_CURSOR, -1, -1); } splx(s); } static void scinit(int unit, int flags) { /* * When syscons is being initialized as the kernel console, malloc() * is not yet functional, because various kernel structures has not been * fully initialized yet. Therefore, we need to declare the following * static buffers for the console. This is less than ideal, * but is necessry evil for the time being. XXX */ #ifdef PC98 static u_short sc_buffer[ROW*COL*2];/* XXX */ #else static u_short sc_buffer[ROW*COL]; /* XXX */ #endif #ifndef SC_NO_FONT_LOADING static u_char font_8[256*8]; static u_char font_14[256*14]; static u_char font_16[256*16]; #endif sc_softc_t *sc; scr_stat *scp; video_adapter_t *adp; int col; int row; int i; /* one time initialization */ if (init_done == COLD) sc_get_bios_values(&bios_value); init_done = WARM; /* * Allocate resources. Even if we are being called for the second * time, we must allocate them again, because they might have * disappeared... */ sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if ((sc->flags & SC_INIT_DONE) == 0) SC_VIDEO_LOCKINIT(sc); adp = NULL; if (sc->adapter >= 0) { vid_release(sc->adp, (void *)&sc->adapter); adp = sc->adp; sc->adp = NULL; } if (sc->keyboard >= 0) { DPRINTF(5, ("sc%d: releasing kbd%d\n", unit, sc->keyboard)); i = kbd_release(sc->kbd, (void *)&sc->keyboard); DPRINTF(5, ("sc%d: kbd_release returned %d\n", unit, i)); if (sc->kbd != NULL) { DPRINTF(5, ("sc%d: kbd != NULL!, index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } sc->kbd = NULL; } sc->adapter = vid_allocate("*", unit, (void *)&sc->adapter); sc->adp = vid_get_adapter(sc->adapter); /* assert((sc->adapter >= 0) && (sc->adp != NULL)) */ sc->keyboard = sc_allocate_keyboard(sc, unit); DPRINTF(1, ("sc%d: keyboard %d\n", unit, sc->keyboard)); sc->kbd = kbd_get_keyboard(sc->keyboard); if (sc->kbd != NULL) { DPRINTF(1, ("sc%d: kbd index:%d, unit:%d, flags:0x%x\n", unit, sc->kbd->kb_index, sc->kbd->kb_unit, sc->kbd->kb_flags)); } if (!(sc->flags & SC_INIT_DONE) || (adp != sc->adp)) { sc->initial_mode = sc->adp->va_initial_mode; #ifndef SC_NO_FONT_LOADING if (flags & SC_KERNEL_CONSOLE) { sc->font_8 = font_8; sc->font_14 = font_14; sc->font_16 = font_16; } else if (sc->font_8 == NULL) { /* assert(sc_malloc) */ sc->font_8 = malloc(sizeof(font_8), M_DEVBUF, M_WAITOK); sc->font_14 = malloc(sizeof(font_14), M_DEVBUF, M_WAITOK); sc->font_16 = malloc(sizeof(font_16), M_DEVBUF, M_WAITOK); } #endif /* extract the hardware cursor location and hide the cursor for now */ (*vidsw[sc->adapter]->read_hw_cursor)(sc->adp, &col, &row); (*vidsw[sc->adapter]->set_hw_cursor)(sc->adp, -1, -1); /* set up the first console */ sc->first_vty = unit*MAXCONS; sc->vtys = MAXCONS; /* XXX: should be configurable */ if (flags & SC_KERNEL_CONSOLE) { /* * Set up devs structure but don't use it yet, calling make_dev() * might panic kernel. Wait for sc_attach_unit() to actually * create the devices. */ sc->dev = main_devs; scp = &main_console; init_scp(sc, sc->first_vty, scp); sc_vtb_init(&scp->vtb, VTB_MEMORY, scp->xsize, scp->ysize, (void *)sc_buffer, FALSE); if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); (*scp->tsw->te_default_attr)(scp, kernel_default.std_color, kernel_default.rev_color); } else { /* assert(sc_malloc) */ sc->dev = malloc(sizeof(struct cdev *)*sc->vtys, M_DEVBUF, M_WAITOK|M_ZERO); sc->dev[0] = make_dev(&sc_cdevsw, unit * MAXCONS, UID_ROOT, GID_WHEEL, 0600, "ttyv%r", unit * MAXCONS); sc_alloc_tty(sc->dev[0]); scp = alloc_scp(sc, sc->first_vty); SC_STAT(sc->dev[0]) = scp; } sc->cur_scp = scp; #ifndef __sparc64__ /* copy screen to temporary buffer */ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); if (ISTEXTSC(scp)) sc_vtb_copy(&scp->scr, 0, &scp->vtb, 0, scp->xsize*scp->ysize); #endif /* move cursors to the initial positions */ if (col >= scp->xsize) col = 0; if (row >= scp->ysize) row = scp->ysize - 1; scp->xpos = col; scp->ypos = row; scp->cursor_pos = scp->cursor_oldpos = row*scp->xsize + col; if (bios_value.cursor_end < scp->font_size) sc->dflt_curs_attr.base = scp->font_size - bios_value.cursor_end - 1; else sc->dflt_curs_attr.base = 0; i = bios_value.cursor_end - bios_value.cursor_start + 1; sc->dflt_curs_attr.height = imin(i, scp->font_size); sc->dflt_curs_attr.flags = 0; sc->curs_attr = sc->dflt_curs_attr; scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr; #ifndef SC_NO_SYSMOUSE sc_mouse_move(scp, scp->xpixel/2, scp->ypixel/2); #endif if (!ISGRAPHSC(scp)) { sc_set_cursor_image(scp); sc_draw_cursor_image(scp); } /* save font and palette */ #ifndef SC_NO_FONT_LOADING sc->fonts_loaded = 0; if (ISFONTAVAIL(sc->adp->va_flags)) { #ifdef SC_DFLT_FONT bcopy(dflt_font_8, sc->font_8, sizeof(dflt_font_8)); bcopy(dflt_font_14, sc->font_14, sizeof(dflt_font_14)); bcopy(dflt_font_16, sc->font_16, sizeof(dflt_font_16)); sc->fonts_loaded = FONT_16 | FONT_14 | FONT_8; if (scp->font_size < 14) { sc_load_font(scp, 0, 8, 8, sc->font_8, 0, 256); } else if (scp->font_size >= 16) { sc_load_font(scp, 0, 16, 8, sc->font_16, 0, 256); } else { sc_load_font(scp, 0, 14, 8, sc->font_14, 0, 256); } #else /* !SC_DFLT_FONT */ if (scp->font_size < 14) { sc_save_font(scp, 0, 8, 8, sc->font_8, 0, 256); sc->fonts_loaded = FONT_8; } else if (scp->font_size >= 16) { sc_save_font(scp, 0, 16, 8, sc->font_16, 0, 256); sc->fonts_loaded = FONT_16; } else { sc_save_font(scp, 0, 14, 8, sc->font_14, 0, 256); sc->fonts_loaded = FONT_14; } #endif /* SC_DFLT_FONT */ /* FONT KLUDGE: always use the font page #0. XXX */ sc_show_font(scp, 0); } #endif /* !SC_NO_FONT_LOADING */ #ifndef SC_NO_PALETTE_LOADING save_palette(sc->adp, sc->palette); #endif #ifdef DEV_SPLASH if (!(sc->flags & SC_SPLASH_SCRN)) { /* we are ready to put up the splash image! */ splash_init(sc->adp, scsplash_callback, sc); sc->flags |= SC_SPLASH_SCRN; } #endif } /* the rest is not necessary, if we have done it once */ if (sc->flags & SC_INIT_DONE) return; /* initialize mapscrn arrays to a one to one map */ for (i = 0; i < sizeof(sc->scr_map); i++) sc->scr_map[i] = sc->scr_rmap[i] = i; #ifdef PC98 sc->scr_map[0x5c] = (u_char)0xfc; /* for backslash */ #endif sc->flags |= SC_INIT_DONE; } static void scterm(int unit, int flags) { sc_softc_t *sc; scr_stat *scp; sc = sc_get_softc(unit, flags & SC_KERNEL_CONSOLE); if (sc == NULL) return; /* shouldn't happen */ #ifdef DEV_SPLASH /* this console is no longer available for the splash screen */ if (sc->flags & SC_SPLASH_SCRN) { splash_term(sc->adp); sc->flags &= ~SC_SPLASH_SCRN; } #endif #if 0 /* XXX */ /* move the hardware cursor to the upper-left corner */ (*vidsw[sc->adapter]->set_hw_cursor)(sc->adp, 0, 0); #endif /* release the keyboard and the video card */ if (sc->keyboard >= 0) kbd_release(sc->kbd, &sc->keyboard); if (sc->adapter >= 0) vid_release(sc->adp, &sc->adapter); /* stop the terminal emulator, if any */ scp = sc_get_stat(sc->dev[0]); if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); if (scp->ts != NULL) free(scp->ts, M_DEVBUF); mtx_destroy(&scp->scr_lock); /* clear the structure */ if (!(flags & SC_KERNEL_CONSOLE)) { /* XXX: We need delete_dev() for this */ free(sc->dev, M_DEVBUF); #if 0 /* XXX: We need a ttyunregister for this */ free(sc->tty, M_DEVBUF); #endif #ifndef SC_NO_FONT_LOADING free(sc->font_8, M_DEVBUF); free(sc->font_14, M_DEVBUF); free(sc->font_16, M_DEVBUF); #endif /* XXX vtb, history */ } bzero(sc, sizeof(*sc)); sc->keyboard = -1; sc->adapter = -1; } static void scshutdown(void *arg, int howto) { /* assert(sc_console != NULL) */ sc_touch_scrn_saver(); if (!cold && sc_console && sc_console->sc->cur_scp->smode.mode == VT_AUTO && sc_console->smode.mode == VT_AUTO) sc_switch_scr(sc_console->sc, sc_console->index); shutdown_in_progress = TRUE; } int sc_clean_up(scr_stat *scp) { #ifdef DEV_SPLASH int error; #endif if (scp->sc->flags & SC_SCRN_BLANKED) { sc_touch_scrn_saver(); #ifdef DEV_SPLASH if ((error = wait_scrn_saver_stop(scp->sc))) return error; #endif } scp->status |= MOUSE_HIDDEN; sc_remove_mouse_image(scp); sc_remove_cutmarking(scp); return 0; } void sc_alloc_scr_buffer(scr_stat *scp, int wait, int discard) { sc_vtb_t new; sc_vtb_t old; old = scp->vtb; sc_vtb_init(&new, VTB_MEMORY, scp->xsize, scp->ysize, NULL, wait); if (!discard && (old.vtb_flags & VTB_VALID)) { /* retain the current cursor position and buffer contants */ scp->cursor_oldpos = scp->cursor_pos; /* * This works only if the old buffer has the same size as or larger * than the new one. XXX */ sc_vtb_copy(&old, 0, &new, 0, scp->xsize*scp->ysize); scp->vtb = new; } else { scp->vtb = new; sc_vtb_destroy(&old); } #ifndef SC_NO_SYSMOUSE /* move the mouse cursor at the center of the screen */ sc_mouse_move(scp, scp->xpixel / 2, scp->ypixel / 2); #endif } static scr_stat *alloc_scp(sc_softc_t *sc, int vty) { scr_stat *scp; /* assert(sc_malloc) */ scp = (scr_stat *)malloc(sizeof(scr_stat), M_DEVBUF, M_WAITOK); init_scp(sc, vty, scp); sc_alloc_scr_buffer(scp, TRUE, TRUE); if (sc_init_emulator(scp, SC_DFLT_TERM)) sc_init_emulator(scp, "*"); #ifndef SC_NO_CUTPASTE sc_alloc_cut_buffer(scp, TRUE); #endif #ifndef SC_NO_HISTORY sc_alloc_history_buffer(scp, 0, 0, TRUE); #endif return scp; } static void init_scp(sc_softc_t *sc, int vty, scr_stat *scp) { video_info_t info; bzero(scp, sizeof(*scp)); scp->index = vty; scp->sc = sc; scp->status = 0; scp->mode = sc->initial_mode; (*vidsw[sc->adapter]->get_info)(sc->adp, scp->mode, &info); if (info.vi_flags & V_INFO_GRAPHICS) { scp->status |= GRAPHICS_MODE; scp->xpixel = info.vi_width; scp->ypixel = info.vi_height; scp->xsize = info.vi_width/info.vi_cwidth; scp->ysize = info.vi_height/info.vi_cheight; scp->font_size = 0; scp->font = NULL; } else { scp->xsize = info.vi_width; scp->ysize = info.vi_height; scp->xpixel = scp->xsize*info.vi_cwidth; scp->ypixel = scp->ysize*info.vi_cheight; scp->font_size = info.vi_cheight; scp->font_width = info.vi_cwidth; if (info.vi_cheight < 14) { #ifndef SC_NO_FONT_LOADING scp->font = sc->font_8; #else scp->font = NULL; #endif } else if (info.vi_cheight >= 16) { #ifndef SC_NO_FONT_LOADING scp->font = sc->font_16; #else scp->font = NULL; #endif } else { #ifndef SC_NO_FONT_LOADING scp->font = sc->font_14; #else scp->font = NULL; #endif } } sc_vtb_init(&scp->vtb, VTB_MEMORY, 0, 0, NULL, FALSE); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, 0, 0, NULL, FALSE); #endif scp->xoff = scp->yoff = 0; scp->xpos = scp->ypos = 0; scp->start = scp->xsize * scp->ysize - 1; scp->end = 0; scp->tsw = NULL; scp->ts = NULL; scp->rndr = NULL; scp->border = (SC_NORM_ATTR >> 4) & 0x0f; scp->curr_curs_attr = scp->dflt_curs_attr = sc->curs_attr; scp->mouse_cut_start = scp->xsize*scp->ysize; scp->mouse_cut_end = -1; scp->mouse_signal = 0; scp->mouse_pid = 0; scp->mouse_proc = NULL; scp->kbd_mode = K_XLATE; scp->bell_pitch = bios_value.bell_pitch; scp->bell_duration = BELL_DURATION; scp->status |= (bios_value.shift_state & NLKED); scp->status |= CURSOR_ENABLED | MOUSE_HIDDEN; scp->pid = 0; scp->proc = NULL; scp->smode.mode = VT_AUTO; scp->history = NULL; scp->history_pos = 0; scp->history_size = 0; mtx_init(&scp->scr_lock, "scrlock", NULL, MTX_SPIN); } int sc_init_emulator(scr_stat *scp, char *name) { sc_term_sw_t *sw; sc_rndr_sw_t *rndr; void *p; int error; if (name == NULL) /* if no name is given, use the current emulator */ sw = scp->tsw; else /* ...otherwise find the named emulator */ sw = sc_term_match(name); if (sw == NULL) return EINVAL; rndr = NULL; if (strcmp(sw->te_renderer, "*") != 0) { rndr = sc_render_match(scp, sw->te_renderer, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); } if (rndr == NULL) { rndr = sc_render_match(scp, scp->sc->adp->va_name, scp->status & (GRAPHICS_MODE | PIXEL_MODE)); if (rndr == NULL) return ENODEV; } if (sw == scp->tsw) { error = (*sw->te_init)(scp, &scp->ts, SC_TE_WARM_INIT); scp->rndr = rndr; scp->rndr->init(scp); sc_clear_screen(scp); /* assert(error == 0); */ return error; } if (sc_malloc && (sw->te_size > 0)) p = malloc(sw->te_size, M_DEVBUF, M_NOWAIT); else p = NULL; error = (*sw->te_init)(scp, &p, SC_TE_COLD_INIT); if (error) return error; if (scp->tsw) (*scp->tsw->te_term)(scp, &scp->ts); if (scp->ts != NULL) free(scp->ts, M_DEVBUF); scp->tsw = sw; scp->ts = p; scp->rndr = rndr; scp->rndr->init(scp); /* XXX */ (*sw->te_default_attr)(scp, user_default.std_color, user_default.rev_color); sc_clear_screen(scp); return 0; } /* * scgetc(flags) - get character from keyboard. * If flags & SCGETC_CN, then avoid harmful side effects. * If flags & SCGETC_NONBLOCK, then wait until a key is pressed, else * return NOKEY if there is nothing there. */ static u_int scgetc(sc_softc_t *sc, u_int flags) { scr_stat *scp; #ifndef SC_NO_HISTORY struct tty *tp; #endif u_int c; int this_scr; int f; int i; if (sc->kbd == NULL) return NOKEY; next_code: #if 1 /* I don't like this, but... XXX */ if (flags & SCGETC_CN) sccnupdate(sc->cur_scp); #endif scp = sc->cur_scp; /* first see if there is something in the keyboard port */ for (;;) { c = kbd_read_char(sc->kbd, !(flags & SCGETC_NONBLOCK)); if (c == ERRKEY) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); } else if (c == NOKEY) return c; else break; } /* make screensaver happy */ if (!(c & RELKEY)) sc_touch_scrn_saver(); if (!(flags & SCGETC_CN)) random_harvest(&c, sizeof(c), 1, 0, RANDOM_KEYBOARD); if (scp->kbd_mode != K_XLATE) return KEYCHAR(c); /* if scroll-lock pressed allow history browsing */ if (!ISGRAPHSC(scp) && scp->history && scp->status & SLKED) { scp->status &= ~CURSOR_ENABLED; sc_remove_cursor_image(scp); #ifndef SC_NO_HISTORY if (!(scp->status & BUFFER_SAVED)) { scp->status |= BUFFER_SAVED; sc_hist_save(scp); } switch (c) { /* FIXME: key codes */ case SPCLKEY | FKEY | F(49): /* home key */ sc_remove_cutmarking(scp); sc_hist_home(scp); goto next_code; case SPCLKEY | FKEY | F(57): /* end key */ sc_remove_cutmarking(scp); sc_hist_end(scp); goto next_code; case SPCLKEY | FKEY | F(50): /* up arrow key */ sc_remove_cutmarking(scp); if (sc_hist_up_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(58): /* down arrow key */ sc_remove_cutmarking(scp); if (sc_hist_down_line(scp)) if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); goto next_code; case SPCLKEY | FKEY | F(51): /* page up key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_up_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; case SPCLKEY | FKEY | F(59): /* page down key */ sc_remove_cutmarking(scp); for (i=0; iysize; i++) if (sc_hist_down_line(scp)) { if (!(flags & SCGETC_CN)) sc_bell(scp, bios_value.bell_pitch, BELL_DURATION); break; } goto next_code; } #endif /* SC_NO_HISTORY */ } /* * Process and consume special keys here. Return a plain char code * or a char code with the META flag or a function key code. */ if (c & RELKEY) { /* key released */ /* goto next_code */ } else { /* key pressed */ if (c & SPCLKEY) { c &= ~SPCLKEY; switch (KEYCHAR(c)) { /* LOCKING KEYS */ case NLK: case CLK: case ALK: break; case SLK: kbd_ioctl(sc->kbd, KDGKBSTATE, (caddr_t)&f); if (f & SLKED) { scp->status |= SLKED; } else { if (scp->status & SLKED) { scp->status &= ~SLKED; #ifndef SC_NO_HISTORY if (scp->status & BUFFER_SAVED) { if (!sc_hist_restore(scp)) sc_remove_cutmarking(scp); scp->status &= ~BUFFER_SAVED; scp->status |= CURSOR_ENABLED; sc_draw_cursor_image(scp); } tp = VIRTUAL_TTY(sc, scp->index); if (ISTTYOPEN(tp)) scstart(tp); #endif } } break; case PASTE: #ifndef SC_NO_CUTPASTE sc_mouse_paste(scp); #endif break; /* NON-LOCKING KEYS */ case NOP: case LSH: case RSH: case LCTR: case RCTR: case LALT: case RALT: case ASH: case META: break; case BTAB: if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; case SPSC: #ifdef DEV_SPLASH /* force activatation/deactivation of the screen saver */ if (!(sc->flags & SC_SCRN_BLANKED)) { run_scrn_saver = TRUE; sc->scrn_time_stamp -= scrn_blank_time; } if (cold) { /* * While devices are being probed, the screen saver need * to be invoked explictly. XXX */ if (sc->flags & SC_SCRN_BLANKED) { scsplash_stick(FALSE); stop_scrn_saver(sc, current_saver); } else { if (!ISGRAPHSC(scp)) { scsplash_stick(TRUE); (*current_saver)(sc, TRUE); } } } #endif /* DEV_SPLASH */ break; case RBT: #ifndef SC_DISABLE_REBOOT if (enable_reboot) shutdown_nice(0); #endif break; case HALT: #ifndef SC_DISABLE_REBOOT if (enable_reboot) shutdown_nice(RB_HALT); #endif break; case PDWN: #ifndef SC_DISABLE_REBOOT if (enable_reboot) shutdown_nice(RB_HALT|RB_POWEROFF); #endif break; case SUSP: power_pm_suspend(POWER_SLEEP_STATE_SUSPEND); break; case STBY: power_pm_suspend(POWER_SLEEP_STATE_STANDBY); break; case DBG: #ifndef SC_DISABLE_KDBKEY if (enable_kdbkey) - kdb_enter("manual escape to debugger"); + kdb_enter_why(KDB_WHY_BREAK, + "manual escape to debugger"); #endif break; case PNC: if (enable_panic_key) panic("Forced by the panic key"); break; case NEXT: this_scr = scp->index; for (i = (this_scr - sc->first_vty + 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + 1)%sc->vtys) { struct tty *tp = VIRTUAL_TTY(sc, sc->first_vty + i); if (ISTTYOPEN(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; case PREV: this_scr = scp->index; for (i = (this_scr - sc->first_vty + sc->vtys - 1)%sc->vtys; sc->first_vty + i != this_scr; i = (i + sc->vtys - 1)%sc->vtys) { struct tty *tp = VIRTUAL_TTY(sc, sc->first_vty + i); if (ISTTYOPEN(tp)) { sc_switch_scr(scp->sc, sc->first_vty + i); break; } } break; default: if (KEYCHAR(c) >= F_SCR && KEYCHAR(c) <= L_SCR) { sc_switch_scr(scp->sc, sc->first_vty + KEYCHAR(c) - F_SCR); break; } /* assert(c & FKEY) */ if (!(sc->flags & SC_SCRN_BLANKED)) return c; break; } /* goto next_code */ } else { /* regular keys (maybe MKEY is set) */ if (!(sc->flags & SC_SCRN_BLANKED)) return c; } } goto next_code; } static int scmmap(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr, int nprot) { scr_stat *scp; scp = sc_get_stat(dev); if (scp != scp->sc->cur_scp) return -1; return (*vidsw[scp->sc->adapter]->mmap)(scp->sc->adp, offset, paddr, nprot); } static int save_kbd_state(scr_stat *scp) { int state; int error; error = kbd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error == 0) { scp->status &= ~LOCK_MASK; scp->status |= state; } return error; } static int update_kbd_state(scr_stat *scp, int new_bits, int mask) { int state; int error; if (mask != LOCK_MASK) { error = kbd_ioctl(scp->sc->kbd, KDGKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; if (error) return error; state &= ~mask; state |= new_bits & mask; } else { state = new_bits & LOCK_MASK; } error = kbd_ioctl(scp->sc->kbd, KDSKBSTATE, (caddr_t)&state); if (error == ENOIOCTL) error = ENODEV; return error; } static int update_kbd_leds(scr_stat *scp, int which) { int error; which &= LOCK_MASK; error = kbd_ioctl(scp->sc->kbd, KDSETLED, (caddr_t)&which); if (error == ENOIOCTL) error = ENODEV; return error; } int set_mode(scr_stat *scp) { video_info_t info; /* reject unsupported mode */ if ((*vidsw[scp->sc->adapter]->get_info)(scp->sc->adp, scp->mode, &info)) return 1; /* if this vty is not currently showing, do nothing */ if (scp != scp->sc->cur_scp) return 0; /* setup video hardware for the given mode */ (*vidsw[scp->sc->adapter]->set_mode)(scp->sc->adp, scp->mode); scp->rndr->init(scp); #ifndef __sparc64__ sc_vtb_init(&scp->scr, VTB_FRAMEBUFFER, scp->xsize, scp->ysize, (void *)scp->sc->adp->va_window, FALSE); #endif #ifndef SC_NO_FONT_LOADING /* load appropriate font */ if (!(scp->status & GRAPHICS_MODE)) { if (!(scp->status & PIXEL_MODE) && ISFONTAVAIL(scp->sc->adp->va_flags)) { if (scp->font_size < 14) { if (scp->sc->fonts_loaded & FONT_8) sc_load_font(scp, 0, 8, 8, scp->sc->font_8, 0, 256); } else if (scp->font_size >= 16) { if (scp->sc->fonts_loaded & FONT_16) sc_load_font(scp, 0, 16, 8, scp->sc->font_16, 0, 256); } else { if (scp->sc->fonts_loaded & FONT_14) sc_load_font(scp, 0, 14, 8, scp->sc->font_14, 0, 256); } /* * FONT KLUDGE: * This is an interim kludge to display correct font. * Always use the font page #0 on the video plane 2. * Somehow we cannot show the font in other font pages on * some video cards... XXX */ sc_show_font(scp, 0); } mark_all(scp); } #endif /* !SC_NO_FONT_LOADING */ sc_set_border(scp, scp->border); sc_set_cursor_image(scp); return 0; } void sc_set_border(scr_stat *scp, int color) { SC_VIDEO_LOCK(scp->sc); (*scp->rndr->draw_border)(scp, color); SC_VIDEO_UNLOCK(scp->sc); } #ifndef SC_NO_FONT_LOADING void sc_load_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; (*vidsw[sc->adapter]->load_font)(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_save_font(scr_stat *scp, int page, int size, int width, u_char *buf, int base, int count) { sc_softc_t *sc; sc = scp->sc; sc->font_loading_in_progress = TRUE; (*vidsw[sc->adapter]->save_font)(sc->adp, page, size, width, buf, base, count); sc->font_loading_in_progress = FALSE; } void sc_show_font(scr_stat *scp, int page) { (*vidsw[scp->sc->adapter]->show_font)(scp->sc->adp, page); } #endif /* !SC_NO_FONT_LOADING */ void sc_paste(scr_stat *scp, u_char *p, int count) { struct tty *tp; u_char *rmap; tp = VIRTUAL_TTY(scp->sc, scp->sc->cur_scp->index); if (!ISTTYOPEN(tp)) return; rmap = scp->sc->scr_rmap; for (; count > 0; --count) ttyld_rint(tp, rmap[*p++]); } void sc_bell(scr_stat *scp, int pitch, int duration) { if (cold || shutdown_in_progress || !enable_bell) return; if (scp != scp->sc->cur_scp && (scp->sc->flags & SC_QUIET_BELL)) return; if (scp->sc->flags & SC_VISUAL_BELL) { if (scp->sc->blink_in_progress) return; scp->sc->blink_in_progress = 3; if (scp != scp->sc->cur_scp) scp->sc->blink_in_progress += 2; blink_screen(scp->sc->cur_scp); } else if (duration != 0 && pitch != 0) { if (scp != scp->sc->cur_scp) pitch *= 2; sysbeep(pitch, duration); } } static void blink_screen(void *arg) { scr_stat *scp = arg; struct tty *tp; if (ISGRAPHSC(scp) || (scp->sc->blink_in_progress <= 1)) { scp->sc->blink_in_progress = 0; mark_all(scp); tp = VIRTUAL_TTY(scp->sc, scp->index); if (ISTTYOPEN(tp)) scstart(tp); if (scp->sc->delayed_next_scr) sc_switch_scr(scp->sc, scp->sc->delayed_next_scr - 1); } else { (*scp->rndr->draw)(scp, 0, scp->xsize*scp->ysize, scp->sc->blink_in_progress & 1); scp->sc->blink_in_progress--; timeout(blink_screen, scp, hz / 10); } } /* * Until sc_attach_unit() gets called no dev structures will be available * to store the per-screen current status. This is the case when the * kernel is initially booting and needs access to its console. During * this early phase of booting the console's current status is kept in * one statically defined scr_stat structure, and any pointers to the * dev structures will be NULL. */ static scr_stat * sc_get_stat(struct cdev *devptr) { if (devptr == NULL) return (&main_console); return (SC_STAT(devptr)); } /* * Allocate active keyboard. Try to allocate "kbdmux" keyboard first, and, * if found, add all non-busy keyboards to "kbdmux". Otherwise look for * any keyboard. */ static int sc_allocate_keyboard(sc_softc_t *sc, int unit) { int idx0, idx; keyboard_t *k0, *k; keyboard_info_t ki; idx0 = kbd_allocate("kbdmux", -1, (void *)&sc->keyboard, sckbdevent, sc); if (idx0 != -1) { k0 = kbd_get_keyboard(idx0); for (idx = kbd_find_keyboard2("*", -1, 0); idx != -1; idx = kbd_find_keyboard2("*", -1, idx + 1)) { k = kbd_get_keyboard(idx); if (idx == idx0 || KBD_IS_BUSY(k)) continue; bzero(&ki, sizeof(ki)); strcpy(ki.kb_name, k->kb_name); ki.kb_unit = k->kb_unit; kbd_ioctl(k0, KBADDKBD, (caddr_t) &ki); } } else idx0 = kbd_allocate("*", unit, (void *)&sc->keyboard, sckbdevent, sc); return (idx0); } Index: stable/7/sys/dev/uart/uart_core.c =================================================================== --- stable/7/sys/dev/uart/uart_core.c (revision 177733) +++ stable/7/sys/dev/uart/uart_core.c (revision 177734) @@ -1,586 +1,587 @@ /*- * Copyright (c) 2003 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #ifndef KLD_MODULE #include "opt_comconsole.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "uart_if.h" devclass_t uart_devclass; char uart_driver_name[] = "uart"; SLIST_HEAD(uart_devinfo_list, uart_devinfo) uart_sysdevs = SLIST_HEAD_INITIALIZER(uart_sysdevs); MALLOC_DEFINE(M_UART, "UART", "UART driver"); void uart_add_sysdev(struct uart_devinfo *di) { SLIST_INSERT_HEAD(&uart_sysdevs, di, next); } const char * uart_getname(struct uart_class *uc) { return ((uc != NULL) ? uc->name : NULL); } struct uart_ops * uart_getops(struct uart_class *uc) { return ((uc != NULL) ? uc->uc_ops : NULL); } int uart_getrange(struct uart_class *uc) { return ((uc != NULL) ? uc->uc_range : 0); } /* * Schedule a soft interrupt. We do this on the 0 to !0 transition * of the TTY pending interrupt status. */ static void uart_sched_softih(struct uart_softc *sc, uint32_t ipend) { uint32_t new, old; do { old = sc->sc_ttypend; new = old | ipend; } while (!atomic_cmpset_32(&sc->sc_ttypend, old, new)); if ((old & SER_INT_MASK) == 0) swi_sched(sc->sc_softih, 0); } /* * A break condition has been detected. We treat the break condition as * a special case that should not happen during normal operation. When * the break condition is to be passed to higher levels in the form of * a NUL character, we really want the break to be in the right place in * the input stream. The overhead to achieve that is not in relation to * the exceptional nature of the break condition, so we permit ourselves * to be sloppy. */ static __inline int uart_intr_break(void *arg) { struct uart_softc *sc = arg; #if defined(KDB) && defined(BREAK_TO_DEBUGGER) if (sc->sc_sysdev != NULL && sc->sc_sysdev->type == UART_DEV_CONSOLE) { - kdb_enter("Line break on console"); + kdb_enter_why(KDB_WHY_BREAK, "Line break on console"); return (0); } #endif if (sc->sc_opened) uart_sched_softih(sc, SER_INT_BREAK); return (0); } /* * Handle a receiver overrun situation. We lost at least 1 byte in the * input stream and it's our job to contain the situation. We grab as * much of the data we can, but otherwise flush the receiver FIFO to * create some breathing room. The net effect is that we avoid the * overrun condition to happen for the next X characters, where X is * related to the FIFO size at the cost of loosing data right away. * So, instead of having multiple overrun interrupts in close proximity * to each other and possibly pessimizing UART interrupt latency for * other UARTs in a multiport configuration, we create a longer segment * of missing characters by freeing up the FIFO. * Each overrun condition is marked in the input buffer by a token. The * token represents the loss of at least one, but possible more bytes in * the input stream. */ static __inline int uart_intr_overrun(void *arg) { struct uart_softc *sc = arg; if (sc->sc_opened) { UART_RECEIVE(sc); if (uart_rx_put(sc, UART_STAT_OVERRUN)) sc->sc_rxbuf[sc->sc_rxput] = UART_STAT_OVERRUN; uart_sched_softih(sc, SER_INT_RXREADY); } UART_FLUSH(sc, UART_FLUSH_RECEIVER); return (0); } /* * Received data ready. */ static __inline int uart_intr_rxready(void *arg) { struct uart_softc *sc = arg; int rxp; rxp = sc->sc_rxput; UART_RECEIVE(sc); #if defined(KDB) && defined(ALT_BREAK_TO_DEBUGGER) if (sc->sc_sysdev != NULL && sc->sc_sysdev->type == UART_DEV_CONSOLE) { while (rxp != sc->sc_rxput) { if (kdb_alt_break(sc->sc_rxbuf[rxp++], &sc->sc_altbrk)) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); if (rxp == sc->sc_rxbufsz) rxp = 0; } } #endif if (sc->sc_opened) uart_sched_softih(sc, SER_INT_RXREADY); else sc->sc_rxput = sc->sc_rxget; /* Ignore received data. */ return (1); } /* * Line or modem status change (OOB signalling). * We pass the signals to the software interrupt handler for further * processing. Note that we merge the delta bits, but set the state * bits. This is to avoid loosing state transitions due to having more * than 1 hardware interrupt between software interrupts. */ static __inline int uart_intr_sigchg(void *arg) { struct uart_softc *sc = arg; int new, old, sig; sig = UART_GETSIG(sc); if (sc->sc_pps.ppsparam.mode & PPS_CAPTUREBOTH) { if (sig & UART_SIG_DPPS) { pps_capture(&sc->sc_pps); pps_event(&sc->sc_pps, (sig & UART_SIG_PPS) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } /* * Keep track of signal changes, even when the device is not * opened. This allows us to inform upper layers about a * possible loss of DCD and thus the existence of a (possibly) * different connection when we have DCD back, during the time * that the device was closed. */ do { old = sc->sc_ttypend; new = old & ~SER_MASK_STATE; new |= sig & SER_INT_SIGMASK; } while (!atomic_cmpset_32(&sc->sc_ttypend, old, new)); if (sc->sc_opened) uart_sched_softih(sc, SER_INT_SIGCHG); return (1); } /* * The transmitter can accept more data. */ static __inline int uart_intr_txidle(void *arg) { struct uart_softc *sc = arg; if (sc->sc_txbusy) { sc->sc_txbusy = 0; uart_sched_softih(sc, SER_INT_TXIDLE); } return (0); } static int uart_intr(void *arg) { struct uart_softc *sc = arg; int flag = 0, ipend; while (!sc->sc_leaving && (ipend = UART_IPEND(sc)) != 0) { flag = 1; if (ipend & SER_INT_OVERRUN) uart_intr_overrun(sc); if (ipend & SER_INT_BREAK) uart_intr_break(sc); if (ipend & SER_INT_RXREADY) uart_intr_rxready(sc); if (ipend & SER_INT_SIGCHG) uart_intr_sigchg(sc); if (ipend & SER_INT_TXIDLE) uart_intr_txidle(sc); } return((flag)?FILTER_HANDLED:FILTER_STRAY); } serdev_intr_t * uart_bus_ihand(device_t dev, int ipend) { switch (ipend) { case SER_INT_BREAK: return (uart_intr_break); case SER_INT_OVERRUN: return (uart_intr_overrun); case SER_INT_RXREADY: return (uart_intr_rxready); case SER_INT_SIGCHG: return (uart_intr_sigchg); case SER_INT_TXIDLE: return (uart_intr_txidle); } return (NULL); } int uart_bus_ipend(device_t dev) { struct uart_softc *sc; sc = device_get_softc(dev); return (UART_IPEND(sc)); } int uart_bus_sysdev(device_t dev) { struct uart_softc *sc; sc = device_get_softc(dev); return ((sc->sc_sysdev != NULL) ? 1 : 0); } int uart_bus_probe(device_t dev, int regshft, int rclk, int rid, int chan) { struct uart_softc *sc; struct uart_devinfo *sysdev; int error; sc = device_get_softc(dev); /* * All uart_class references are weak. Check that the needed * class has been compiled-in. Fail if not. */ if (sc->sc_class == NULL) return (ENXIO); /* * Initialize the instance. Note that the instance (=softc) does * not necessarily match the hardware specific softc. We can't do * anything about it now, because we may not attach to the device. * Hardware drivers cannot use any of the class specific fields * while probing. */ kobj_init((kobj_t)sc, (kobj_class_t)sc->sc_class); sc->sc_dev = dev; if (device_get_desc(dev) == NULL) device_set_desc(dev, uart_getname(sc->sc_class)); /* * Allocate the register resource. We assume that all UARTs have * a single register window in either I/O port space or memory * mapped I/O space. Any UART that needs multiple windows will * consequently not be supported by this driver as-is. We try I/O * port space first because that's the common case. */ sc->sc_rrid = rid; sc->sc_rtype = SYS_RES_IOPORT; sc->sc_rres = bus_alloc_resource(dev, sc->sc_rtype, &sc->sc_rrid, 0, ~0, uart_getrange(sc->sc_class), RF_ACTIVE); if (sc->sc_rres == NULL) { sc->sc_rrid = rid; sc->sc_rtype = SYS_RES_MEMORY; sc->sc_rres = bus_alloc_resource(dev, sc->sc_rtype, &sc->sc_rrid, 0, ~0, uart_getrange(sc->sc_class), RF_ACTIVE); if (sc->sc_rres == NULL) return (ENXIO); } /* * Fill in the bus access structure and compare this device with * a possible console device and/or a debug port. We set the flags * in the softc so that the hardware dependent probe can adjust * accordingly. In general, you don't want to permanently disrupt * console I/O. */ sc->sc_bas.bsh = rman_get_bushandle(sc->sc_rres); sc->sc_bas.bst = rman_get_bustag(sc->sc_rres); sc->sc_bas.chan = chan; sc->sc_bas.regshft = regshft; sc->sc_bas.rclk = (rclk == 0) ? sc->sc_class->uc_rclk : rclk; SLIST_FOREACH(sysdev, &uart_sysdevs, next) { if (chan == sysdev->bas.chan && uart_cpu_eqres(&sc->sc_bas, &sysdev->bas)) { /* XXX check if ops matches class. */ sc->sc_sysdev = sysdev; sysdev->bas.rclk = sc->sc_bas.rclk; } } error = UART_PROBE(sc); bus_release_resource(dev, sc->sc_rtype, sc->sc_rrid, sc->sc_rres); return ((error) ? error : BUS_PROBE_DEFAULT); } int uart_bus_attach(device_t dev) { struct uart_softc *sc, *sc0; const char *sep; int error; /* * The sc_class field defines the type of UART we're going to work * with and thus the size of the softc. Replace the generic softc * with one that matches the UART now that we're certain we handle * the device. */ sc0 = device_get_softc(dev); if (sc0->sc_class->size > sizeof(*sc)) { sc = malloc(sc0->sc_class->size, M_UART, M_WAITOK|M_ZERO); bcopy(sc0, sc, sizeof(*sc)); device_set_softc(dev, sc); } else sc = sc0; /* * Protect ourselves against interrupts while we're not completely * finished attaching and initializing. We don't expect interrupts * until after UART_ATTACH() though. */ sc->sc_leaving = 1; mtx_init(&sc->sc_hwmtx_s, "uart_hwmtx", NULL, MTX_SPIN); if (sc->sc_hwmtx == NULL) sc->sc_hwmtx = &sc->sc_hwmtx_s; /* * Re-allocate. We expect that the softc contains the information * collected by uart_bus_probe() intact. */ sc->sc_rres = bus_alloc_resource(dev, sc->sc_rtype, &sc->sc_rrid, 0, ~0, uart_getrange(sc->sc_class), RF_ACTIVE); if (sc->sc_rres == NULL) { mtx_destroy(&sc->sc_hwmtx_s); return (ENXIO); } sc->sc_bas.bsh = rman_get_bushandle(sc->sc_rres); sc->sc_bas.bst = rman_get_bustag(sc->sc_rres); sc->sc_irid = 0; sc->sc_ires = bus_alloc_resource_any(dev, SYS_RES_IRQ, &sc->sc_irid, RF_ACTIVE | RF_SHAREABLE); if (sc->sc_ires != NULL) { error = bus_setup_intr(dev, sc->sc_ires, INTR_TYPE_TTY, uart_intr, NULL, sc, &sc->sc_icookie); if (error) error = bus_setup_intr(dev, sc->sc_ires, INTR_TYPE_TTY | INTR_MPSAFE, NULL, (driver_intr_t *)uart_intr, sc, &sc->sc_icookie); else sc->sc_fastintr = 1; if (error) { device_printf(dev, "could not activate interrupt\n"); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid, sc->sc_ires); sc->sc_ires = NULL; } } if (sc->sc_ires == NULL) { /* XXX no interrupt resource. Force polled mode. */ sc->sc_polled = 1; } sc->sc_rxbufsz = IBUFSIZ; sc->sc_rxbuf = malloc(sc->sc_rxbufsz * sizeof(*sc->sc_rxbuf), M_UART, M_WAITOK); sc->sc_txbuf = malloc(sc->sc_txfifosz * sizeof(*sc->sc_txbuf), M_UART, M_WAITOK); error = UART_ATTACH(sc); if (error) goto fail; if (sc->sc_hwiflow || sc->sc_hwoflow) { sep = ""; device_print_prettyname(dev); if (sc->sc_hwiflow) { printf("%sRTS iflow", sep); sep = ", "; } if (sc->sc_hwoflow) { printf("%sCTS oflow", sep); sep = ", "; } printf("\n"); } if (bootverbose && (sc->sc_fastintr || sc->sc_polled)) { sep = ""; device_print_prettyname(dev); if (sc->sc_fastintr) { printf("%sfast interrupt", sep); sep = ", "; } if (sc->sc_polled) { printf("%spolled mode", sep); sep = ", "; } printf("\n"); } if (sc->sc_sysdev != NULL) { if (sc->sc_sysdev->baudrate == 0) { if (UART_IOCTL(sc, UART_IOCTL_BAUD, (intptr_t)&sc->sc_sysdev->baudrate) != 0) sc->sc_sysdev->baudrate = -1; } switch (sc->sc_sysdev->type) { case UART_DEV_CONSOLE: device_printf(dev, "console"); break; case UART_DEV_DBGPORT: device_printf(dev, "debug port"); break; case UART_DEV_KEYBOARD: device_printf(dev, "keyboard"); break; default: device_printf(dev, "unknown system device"); break; } printf(" (%d,%c,%d,%d)\n", sc->sc_sysdev->baudrate, "noems"[sc->sc_sysdev->parity], sc->sc_sysdev->databits, sc->sc_sysdev->stopbits); } sc->sc_pps.ppscap = PPS_CAPTUREBOTH; pps_init(&sc->sc_pps); error = (sc->sc_sysdev != NULL && sc->sc_sysdev->attach != NULL) ? (*sc->sc_sysdev->attach)(sc) : uart_tty_attach(sc); if (error) goto fail; if (sc->sc_sysdev != NULL) sc->sc_sysdev->hwmtx = sc->sc_hwmtx; sc->sc_leaving = 0; uart_intr(sc); return (0); fail: free(sc->sc_txbuf, M_UART); free(sc->sc_rxbuf, M_UART); if (sc->sc_ires != NULL) { bus_teardown_intr(dev, sc->sc_ires, sc->sc_icookie); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid, sc->sc_ires); } bus_release_resource(dev, sc->sc_rtype, sc->sc_rrid, sc->sc_rres); mtx_destroy(&sc->sc_hwmtx_s); return (error); } int uart_bus_detach(device_t dev) { struct uart_softc *sc; sc = device_get_softc(dev); sc->sc_leaving = 1; if (sc->sc_sysdev != NULL) sc->sc_sysdev->hwmtx = NULL; UART_DETACH(sc); if (sc->sc_sysdev != NULL && sc->sc_sysdev->detach != NULL) (*sc->sc_sysdev->detach)(sc); else uart_tty_detach(sc); free(sc->sc_txbuf, M_UART); free(sc->sc_rxbuf, M_UART); if (sc->sc_ires != NULL) { bus_teardown_intr(dev, sc->sc_ires, sc->sc_icookie); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irid, sc->sc_ires); } bus_release_resource(dev, sc->sc_rtype, sc->sc_rrid, sc->sc_rres); mtx_destroy(&sc->sc_hwmtx_s); if (sc->sc_class->size > sizeof(*sc)) { device_set_softc(dev, NULL); free(sc, M_UART); } else device_set_softc(dev, NULL); return (0); } Index: stable/7/sys/fs/unionfs/union_subr.c =================================================================== --- stable/7/sys/fs/unionfs/union_subr.c (revision 177733) +++ stable/7/sys/fs/unionfs/union_subr.c (revision 177734) @@ -1,1091 +1,1093 @@ /*- * Copyright (c) 1994 Jan-Simon Pendry * Copyright (c) 1994 * The Regents of the University of California. All rights reserved. * Copyright (c) 2005, 2006 Masanori Ozawa , ONGS Inc. * Copyright (c) 2006 Daichi Goto * * This code is derived from software contributed to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef MAC #include #endif #include #include MALLOC_DEFINE(M_UNIONFSNODE, "UNIONFS node", "UNIONFS vnode private part"); MALLOC_DEFINE(M_UNIONFSPATH, "UNIONFS path", "UNIONFS path private part"); /* * Initialize */ int unionfs_init(struct vfsconf *vfsp) { UNIONFSDEBUG("unionfs_init\n"); /* printed during system boot */ return (0); } /* * Uninitialize */ int unionfs_uninit(struct vfsconf *vfsp) { return (0); } /* * Make a new or get existing unionfs node. * * uppervp and lowervp should be unlocked. Because if new unionfs vnode is * locked, uppervp or lowervp is locked too. In order to prevent dead lock, * you should not lock plurality simultaneously. */ int unionfs_nodeget(struct mount *mp, struct vnode *uppervp, struct vnode *lowervp, struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct thread *td) { struct unionfs_mount *ump; struct unionfs_node *unp; struct vnode *vp; int error; int lkflags; char *path; ump = MOUNTTOUNIONFSMOUNT(mp); lkflags = (cnp ? cnp->cn_lkflags : 0); path = (cnp ? cnp->cn_nameptr : NULL); if (uppervp == NULLVP && lowervp == NULLVP) panic("unionfs_nodeget: upper and lower is null"); /* If it has no ISLASTCN flag, path check is skipped. */ if (cnp && !(cnp->cn_flags & ISLASTCN)) path = NULL; if ((uppervp == NULLVP || ump->um_uppervp != uppervp) || (lowervp == NULLVP || ump->um_lowervp != lowervp)) { if (dvp == NULLVP) return (EINVAL); } /* * Do the MALLOC before the getnewvnode since doing so afterward * might cause a bogus v_data pointer to get dereferenced elsewhere * if MALLOC should block. */ MALLOC(unp, struct unionfs_node *, sizeof(struct unionfs_node), M_UNIONFSNODE, M_WAITOK | M_ZERO); error = getnewvnode("unionfs", mp, &unionfs_vnodeops, &vp); if (error != 0) { FREE(unp, M_UNIONFSNODE); return (error); } error = insmntque(vp, mp); /* XXX: Too early for mpsafe fs */ if (error != 0) { FREE(unp, M_UNIONFSNODE); return (error); } if (dvp != NULLVP) vref(dvp); if (uppervp != NULLVP) vref(uppervp); if (lowervp != NULLVP) vref(lowervp); unp->un_vnode = vp; unp->un_uppervp = uppervp; unp->un_lowervp = lowervp; unp->un_dvp = dvp; if (uppervp != NULLVP) vp->v_vnlock = uppervp->v_vnlock; else vp->v_vnlock = lowervp->v_vnlock; if (path != NULL) { unp->un_path = (char *) malloc(cnp->cn_namelen +1, M_UNIONFSPATH, M_WAITOK|M_ZERO); bcopy(cnp->cn_nameptr, unp->un_path, cnp->cn_namelen); unp->un_path[cnp->cn_namelen] = '\0'; } vp->v_type = (uppervp != NULLVP ? uppervp->v_type : lowervp->v_type); vp->v_data = unp; if ((uppervp != NULLVP && ump->um_uppervp == uppervp) && (lowervp != NULLVP && ump->um_lowervp == lowervp)) vp->v_vflag |= VV_ROOT; if (lkflags & LK_TYPE_MASK) vn_lock(vp, lkflags | LK_RETRY, td); *vpp = vp; return (0); } /* * Clean up the unionfs node. */ void unionfs_noderem(struct vnode *vp, struct thread *td) { int vfslocked; struct unionfs_node *unp; struct unionfs_node_status *unsp, *unsp_tmp; struct vnode *lvp; struct vnode *uvp; /* * Use the interlock to protect the clearing of v_data to * prevent faults in unionfs_lock(). */ VI_LOCK(vp); unp = VTOUNIONFS(vp); lvp = unp->un_lowervp; uvp = unp->un_uppervp; unp->un_lowervp = unp->un_uppervp = NULLVP; vp->v_vnlock = &(vp->v_lock); vp->v_data = NULL; lockmgr(vp->v_vnlock, LK_EXCLUSIVE | LK_INTERLOCK, VI_MTX(vp), td); if (lvp != NULLVP) VOP_UNLOCK(lvp, 0, td); if (uvp != NULLVP) VOP_UNLOCK(uvp, 0, td); vp->v_object = NULL; if (lvp != NULLVP) { vfslocked = VFS_LOCK_GIANT(lvp->v_mount); vrele(lvp); VFS_UNLOCK_GIANT(vfslocked); } if (uvp != NULLVP) { vfslocked = VFS_LOCK_GIANT(uvp->v_mount); vrele(uvp); VFS_UNLOCK_GIANT(vfslocked); } if (unp->un_dvp != NULLVP) { vfslocked = VFS_LOCK_GIANT(unp->un_dvp->v_mount); vrele(unp->un_dvp); VFS_UNLOCK_GIANT(vfslocked); unp->un_dvp = NULLVP; } if (unp->un_path) { free(unp->un_path, M_UNIONFSPATH); unp->un_path = NULL; } LIST_FOREACH_SAFE(unsp, &(unp->un_unshead), uns_list, unsp_tmp) { LIST_REMOVE(unsp, uns_list); free(unsp, M_TEMP); } FREE(unp, M_UNIONFSNODE); } /* * Get the unionfs node status. * You need exclusive lock this vnode. */ void unionfs_get_node_status(struct unionfs_node *unp, struct thread *td, struct unionfs_node_status **unspp) { struct unionfs_node_status *unsp; KASSERT(NULL != unspp, ("null pointer")); ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status"); LIST_FOREACH(unsp, &(unp->un_unshead), uns_list) { if (unsp->uns_tid == td->td_tid) { *unspp = unsp; return; } } /* create a new unionfs node status */ MALLOC(unsp, struct unionfs_node_status *, sizeof(struct unionfs_node_status), M_TEMP, M_WAITOK | M_ZERO); unsp->uns_tid = td->td_tid; LIST_INSERT_HEAD(&(unp->un_unshead), unsp, uns_list); *unspp = unsp; } /* * Remove the unionfs node status, if you can. * You need exclusive lock this vnode. */ void unionfs_tryrem_node_status(struct unionfs_node *unp, struct thread *td, struct unionfs_node_status *unsp) { KASSERT(NULL != unsp, ("null pointer")); ASSERT_VOP_ELOCKED(UNIONFSTOV(unp), "unionfs_get_node_status"); if (0 < unsp->uns_lower_opencnt || 0 < unsp->uns_upper_opencnt) return; LIST_REMOVE(unsp, uns_list); free(unsp, M_TEMP); } /* * Create upper node attr. */ void unionfs_create_uppervattr_core(struct unionfs_mount *ump, struct vattr *lva, struct vattr *uva, struct thread *td) { VATTR_NULL(uva); uva->va_type = lva->va_type; uva->va_atime = lva->va_atime; uva->va_mtime = lva->va_mtime; uva->va_ctime = lva->va_ctime; switch (ump->um_copymode) { case UNIONFS_TRANSPARENT: uva->va_mode = lva->va_mode; uva->va_uid = lva->va_uid; uva->va_gid = lva->va_gid; break; case UNIONFS_MASQUERADE: if (ump->um_uid == lva->va_uid) { uva->va_mode = lva->va_mode & 077077; uva->va_mode |= (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile) & 0700; uva->va_uid = lva->va_uid; uva->va_gid = lva->va_gid; } else { uva->va_mode = (lva->va_type == VDIR ? ump->um_udir : ump->um_ufile); uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; } break; default: /* UNIONFS_TRADITIONAL */ FILEDESC_SLOCK(td->td_proc->p_fd); uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask; FILEDESC_SUNLOCK(td->td_proc->p_fd); uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; break; } } /* * Create upper node attr. */ int unionfs_create_uppervattr(struct unionfs_mount *ump, struct vnode *lvp, struct vattr *uva, struct ucred *cred, struct thread *td) { int error; struct vattr lva; if ((error = VOP_GETATTR(lvp, &lva, cred, td))) return (error); unionfs_create_uppervattr_core(ump, &lva, uva, td); return (error); } /* * relookup * * dvp should be locked on entry and will be locked on return. * * If an error is returned, *vpp will be invalid, otherwise it will hold a * locked, referenced vnode. If *vpp == dvp then remember that only one * LK_EXCLUSIVE lock is held. */ static int unionfs_relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct componentname *cn, struct thread *td, char *path, int pathlen, u_long nameiop) { int error; cn->cn_namelen = pathlen; cn->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); bcopy(path, cn->cn_pnbuf, pathlen); cn->cn_pnbuf[pathlen] = '\0'; cn->cn_nameiop = nameiop; cn->cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN); cn->cn_lkflags = LK_EXCLUSIVE; cn->cn_thread = td; cn->cn_cred = cnp->cn_cred; cn->cn_nameptr = cn->cn_pnbuf; cn->cn_consume = cnp->cn_consume; if (nameiop == DELETE) cn->cn_flags |= (cnp->cn_flags & (DOWHITEOUT | SAVESTART)); else if (RENAME == nameiop) cn->cn_flags |= (cnp->cn_flags & SAVESTART); vref(dvp); VOP_UNLOCK(dvp, 0, td); if ((error = relookup(dvp, vpp, cn))) { uma_zfree(namei_zone, cn->cn_pnbuf); cn->cn_flags &= ~HASBUF; vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, td); } else vrele(dvp); return (error); } /* * relookup for CREATE namei operation. * * dvp is unionfs vnode. dvp should be locked. * * If it called 'unionfs_copyfile' function by unionfs_link etc, * VOP_LOOKUP information is broken. * So it need relookup in order to create link etc. */ int unionfs_relookup_for_create(struct vnode *dvp, struct componentname *cnp, struct thread *td) { int error; struct vnode *udvp; struct vnode *vp; struct componentname cn; udvp = UNIONFSVPTOUPPERVP(dvp); vp = NULLVP; error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, strlen(cnp->cn_nameptr), CREATE); if (error) return (error); if (vp != NULLVP) { if (udvp == vp) vrele(vp); else vput(vp); error = EEXIST; } if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (!error) { cn.cn_flags |= (cnp->cn_flags & HASBUF); cnp->cn_flags = cn.cn_flags; } return (error); } /* * relookup for DELETE namei operation. * * dvp is unionfs vnode. dvp should be locked. */ int unionfs_relookup_for_delete(struct vnode *dvp, struct componentname *cnp, struct thread *td) { int error; struct vnode *udvp; struct vnode *vp; struct componentname cn; udvp = UNIONFSVPTOUPPERVP(dvp); vp = NULLVP; error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, strlen(cnp->cn_nameptr), DELETE); if (error) return (error); if (vp == NULLVP) error = ENOENT; else { if (udvp == vp) vrele(vp); else vput(vp); } if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (!error) { cn.cn_flags |= (cnp->cn_flags & HASBUF); cnp->cn_flags = cn.cn_flags; } return (error); } /* * relookup for RENAME namei operation. * * dvp is unionfs vnode. dvp should be locked. */ int unionfs_relookup_for_rename(struct vnode *dvp, struct componentname *cnp, struct thread *td) { int error; struct vnode *udvp; struct vnode *vp; struct componentname cn; udvp = UNIONFSVPTOUPPERVP(dvp); vp = NULLVP; error = unionfs_relookup(udvp, &vp, cnp, &cn, td, cnp->cn_nameptr, strlen(cnp->cn_nameptr), RENAME); if (error) return (error); if (vp != NULLVP) { if (udvp == vp) vrele(vp); else vput(vp); } if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (!error) { cn.cn_flags |= (cnp->cn_flags & HASBUF); cnp->cn_flags = cn.cn_flags; } return (error); } /* * Update the unionfs_node. * * uvp is new locked upper vnode. unionfs vnode's lock will be exchanged to the * uvp's lock and lower's lock will be unlocked. */ static void unionfs_node_update(struct unionfs_node *unp, struct vnode *uvp, struct thread *td) { int count, lockcnt; struct vnode *vp; struct vnode *lvp; vp = UNIONFSTOV(unp); lvp = unp->un_lowervp; /* * lock update */ VI_LOCK(vp); unp->un_uppervp = uvp; vp->v_vnlock = uvp->v_vnlock; lockcnt = lvp->v_vnlock->lk_exclusivecount; if (lockcnt <= 0) panic("unionfs: no exclusive lock"); VI_UNLOCK(vp); for (count = 1; count < lockcnt; count++) vn_lock(uvp, LK_EXCLUSIVE | LK_CANRECURSE | LK_RETRY, td); } /* * Create a new shadow dir. * * udvp should be locked on entry and will be locked on return. * * If no error returned, unp will be updated. */ int unionfs_mkshadowdir(struct unionfs_mount *ump, struct vnode *udvp, struct unionfs_node *unp, struct componentname *cnp, struct thread *td) { int error; struct vnode *lvp; struct vnode *uvp; struct vattr va; struct vattr lva; struct componentname cn; struct mount *mp; struct ucred *cred; struct ucred *credbk; struct uidinfo *rootinfo; if (unp->un_uppervp != NULLVP) return (EEXIST); lvp = unp->un_lowervp; uvp = NULLVP; credbk = cnp->cn_cred; /* Authority change to root */ rootinfo = uifind((uid_t)0); cred = crdup(cnp->cn_cred); chgproccnt(cred->cr_ruidinfo, 1, 0); change_euid(cred, rootinfo); change_ruid(cred, rootinfo); change_svuid(cred, (uid_t)0); uifree(rootinfo); cnp->cn_cred = cred; memset(&cn, 0, sizeof(cn)); if ((error = VOP_GETATTR(lvp, &lva, cnp->cn_cred, td))) goto unionfs_mkshadowdir_abort; if ((error = unionfs_relookup(udvp, &uvp, cnp, &cn, td, cnp->cn_nameptr, cnp->cn_namelen, CREATE))) goto unionfs_mkshadowdir_abort; if (uvp != NULLVP) { if (udvp == uvp) vrele(uvp); else vput(uvp); error = EEXIST; goto unionfs_mkshadowdir_free_out; } if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH))) goto unionfs_mkshadowdir_free_out; if ((error = VOP_LEASE(udvp, td, cn.cn_cred, LEASE_WRITE))) { vn_finished_write(mp); goto unionfs_mkshadowdir_free_out; } unionfs_create_uppervattr_core(ump, &lva, &va, td); error = VOP_MKDIR(udvp, &uvp, &cn, &va); if (!error) { unionfs_node_update(unp, uvp, td); /* * XXX The bug which cannot set uid/gid was corrected. * Ignore errors. */ va.va_type = VNON; VOP_SETATTR(uvp, &va, cn.cn_cred, td); } vn_finished_write(mp); unionfs_mkshadowdir_free_out: if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } unionfs_mkshadowdir_abort: cnp->cn_cred = credbk; chgproccnt(cred->cr_ruidinfo, -1, 0); crfree(cred); return (error); } /* * Create a new whiteout. * * dvp should be locked on entry and will be locked on return. */ int unionfs_mkwhiteout(struct vnode *dvp, struct componentname *cnp, struct thread *td, char *path) { int error; struct vnode *wvp; struct componentname cn; struct mount *mp; if (path == NULL) path = cnp->cn_nameptr; wvp = NULLVP; if ((error = unionfs_relookup(dvp, &wvp, cnp, &cn, td, path, strlen(path), CREATE))) return (error); if (wvp != NULLVP) { if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } if (dvp == wvp) vrele(wvp); else vput(wvp); return (EEXIST); } if ((error = vn_start_write(dvp, &mp, V_WAIT | PCATCH))) goto unionfs_mkwhiteout_free_out; if (!(error = VOP_LEASE(dvp, td, td->td_ucred, LEASE_WRITE))) error = VOP_WHITEOUT(dvp, &cn, CREATE); vn_finished_write(mp); unionfs_mkwhiteout_free_out: if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } return (error); } /* * Create a new vnode for create a new shadow file. * * If an error is returned, *vpp will be invalid, otherwise it will hold a * locked, referenced and opened vnode. * * unp is never updated. */ static int unionfs_vn_create_on_upper(struct vnode **vpp, struct vnode *udvp, struct unionfs_node *unp, struct vattr *uvap, struct thread *td) { struct unionfs_mount *ump; struct vnode *vp; struct vnode *lvp; struct ucred *cred; struct vattr lva; int fmode; int error; struct componentname cn; ump = MOUNTTOUNIONFSMOUNT(UNIONFSTOV(unp)->v_mount); vp = NULLVP; lvp = unp->un_lowervp; cred = td->td_ucred; fmode = FFLAGS(O_WRONLY | O_CREAT | O_TRUNC | O_EXCL); error = 0; if ((error = VOP_GETATTR(lvp, &lva, cred, td)) != 0) return (error); unionfs_create_uppervattr_core(ump, &lva, uvap, td); if (unp->un_path == NULL) panic("unionfs: un_path is null"); cn.cn_namelen = strlen(unp->un_path); cn.cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); bcopy(unp->un_path, cn.cn_pnbuf, cn.cn_namelen + 1); cn.cn_nameiop = CREATE; cn.cn_flags = (LOCKPARENT | LOCKLEAF | HASBUF | SAVENAME | ISLASTCN); cn.cn_lkflags = LK_EXCLUSIVE; cn.cn_thread = td; cn.cn_cred = cred; cn.cn_nameptr = cn.cn_pnbuf; cn.cn_consume = 0; vref(udvp); if ((error = relookup(udvp, &vp, &cn)) != 0) goto unionfs_vn_create_on_upper_free_out2; vrele(udvp); if (vp != NULLVP) { if (vp == udvp) vrele(vp); else vput(vp); error = EEXIST; goto unionfs_vn_create_on_upper_free_out1; } if ((error = VOP_LEASE(udvp, td, cred, LEASE_WRITE)) != 0) goto unionfs_vn_create_on_upper_free_out1; if ((error = VOP_CREATE(udvp, &vp, &cn, uvap)) != 0) goto unionfs_vn_create_on_upper_free_out1; if ((error = VOP_OPEN(vp, fmode, cred, td, NULL)) != 0) { vput(vp); goto unionfs_vn_create_on_upper_free_out1; } vp->v_writecount++; *vpp = vp; unionfs_vn_create_on_upper_free_out1: VOP_UNLOCK(udvp, 0, td); unionfs_vn_create_on_upper_free_out2: if (cn.cn_flags & HASBUF) { uma_zfree(namei_zone, cn.cn_pnbuf); cn.cn_flags &= ~HASBUF; } return (error); } /* * Copy from lvp to uvp. * * lvp and uvp should be locked and opened on entry and will be locked and * opened on return. */ static int unionfs_copyfile_core(struct vnode *lvp, struct vnode *uvp, struct ucred *cred, struct thread *td) { int error; off_t offset; int count; int bufoffset; char *buf; struct uio uio; struct iovec iov; error = 0; memset(&uio, 0, sizeof(uio)); uio.uio_td = td; uio.uio_segflg = UIO_SYSSPACE; uio.uio_offset = 0; if ((error = VOP_LEASE(lvp, td, cred, LEASE_READ)) != 0) return (error); if ((error = VOP_LEASE(uvp, td, cred, LEASE_WRITE)) != 0) return (error); buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); while (error == 0) { offset = uio.uio_offset; uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf; iov.iov_len = MAXBSIZE; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_READ; if ((error = VOP_READ(lvp, &uio, 0, cred)) != 0) break; if ((count = MAXBSIZE - uio.uio_resid) == 0) break; bufoffset = 0; while (bufoffset < count) { uio.uio_iov = &iov; uio.uio_iovcnt = 1; iov.iov_base = buf + bufoffset; iov.iov_len = count - bufoffset; uio.uio_offset = offset + bufoffset; uio.uio_resid = iov.iov_len; uio.uio_rw = UIO_WRITE; if ((error = VOP_WRITE(uvp, &uio, 0, cred)) != 0) break; bufoffset += (count - bufoffset) - uio.uio_resid; } uio.uio_offset = offset + bufoffset; } free(buf, M_TEMP); return (error); } /* * Copy file from lower to upper. * * If you need copy of the contents, set 1 to docopy. Otherwise, set 0 to * docopy. * * If no error returned, unp will be updated. */ int unionfs_copyfile(struct unionfs_node *unp, int docopy, struct ucred *cred, struct thread *td) { int error; struct mount *mp; struct vnode *udvp; struct vnode *lvp; struct vnode *uvp; struct vattr uva; lvp = unp->un_lowervp; uvp = NULLVP; if ((UNIONFSTOV(unp)->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); if (unp->un_dvp == NULLVP) return (EINVAL); if (unp->un_uppervp != NULLVP) return (EEXIST); udvp = VTOUNIONFS(unp->un_dvp)->un_uppervp; if (udvp == NULLVP) return (EROFS); if ((udvp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); error = VOP_ACCESS(lvp, VREAD, cred, td); if (error != 0) return (error); if ((error = vn_start_write(udvp, &mp, V_WAIT | PCATCH)) != 0) return (error); error = unionfs_vn_create_on_upper(&uvp, udvp, unp, &uva, td); if (error != 0) { vn_finished_write(mp); return (error); } if (docopy != 0) { error = VOP_OPEN(lvp, FREAD, cred, td, NULL); if (error == 0) { error = unionfs_copyfile_core(lvp, uvp, cred, td); VOP_CLOSE(lvp, FREAD, cred, td); } } VOP_CLOSE(uvp, FWRITE, cred, td); uvp->v_writecount--; vn_finished_write(mp); if (error == 0) { /* Reset the attributes. Ignore errors. */ uva.va_type = VNON; VOP_SETATTR(uvp, &uva, cred, td); } unionfs_node_update(unp, uvp, td); return (error); } /* * It checks whether vp can rmdir. (check empty) * * vp is unionfs vnode. * vp should be locked. */ int unionfs_check_rmdir(struct vnode *vp, struct ucred *cred, struct thread *td) { int error; int eofflag; int lookuperr; struct vnode *uvp; struct vnode *lvp; struct vnode *tvp; struct vattr va; struct componentname cn; /* * The size of buf needs to be larger than DIRBLKSIZ. */ char buf[256 * 6]; struct dirent *dp; struct dirent *edp; struct uio uio; struct iovec iov; ASSERT_VOP_ELOCKED(vp, "unionfs_check_rmdir"); eofflag = 0; uvp = UNIONFSVPTOUPPERVP(vp); lvp = UNIONFSVPTOLOWERVP(vp); /* check opaque */ if ((error = VOP_GETATTR(uvp, &va, cred, td)) != 0) return (error); if (va.va_flags & OPAQUE) return (0); /* open vnode */ #ifdef MAC if ((error = mac_check_vnode_open(cred, vp, VEXEC|VREAD)) != 0) return (error); #endif if ((error = VOP_ACCESS(vp, VEXEC|VREAD, cred, td)) != 0) return (error); if ((error = VOP_OPEN(vp, FREAD, cred, td, NULL)) != 0) return (error); uio.uio_rw = UIO_READ; uio.uio_segflg = UIO_SYSSPACE; uio.uio_td = td; uio.uio_offset = 0; #ifdef MAC error = mac_check_vnode_readdir(td->td_ucred, lvp); #endif while (!error && !eofflag) { iov.iov_base = buf; iov.iov_len = sizeof(buf); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_resid = iov.iov_len; error = VOP_READDIR(lvp, &uio, cred, &eofflag, NULL, NULL); if (error) break; edp = (struct dirent*)&buf[sizeof(buf) - uio.uio_resid]; for (dp = (struct dirent*)buf; !error && dp < edp; dp = (struct dirent*)((caddr_t)dp + dp->d_reclen)) { if (dp->d_type == DT_WHT || (dp->d_namlen == 1 && dp->d_name[0] == '.') || (dp->d_namlen == 2 && !bcmp(dp->d_name, "..", 2))) continue; cn.cn_namelen = dp->d_namlen; cn.cn_pnbuf = NULL; cn.cn_nameptr = dp->d_name; cn.cn_nameiop = LOOKUP; cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN); cn.cn_lkflags = LK_EXCLUSIVE; cn.cn_thread = td; cn.cn_cred = cred; cn.cn_consume = 0; /* * check entry in lower. * Sometimes, readdir function returns * wrong entry. */ lookuperr = VOP_LOOKUP(lvp, &tvp, &cn); if (!lookuperr) vput(tvp); else continue; /* skip entry */ /* * check entry * If it has no exist/whiteout entry in upper, * directory is not empty. */ cn.cn_flags = (LOCKPARENT | LOCKLEAF | SAVENAME | RDONLY | ISLASTCN); lookuperr = VOP_LOOKUP(uvp, &tvp, &cn); if (!lookuperr) vput(tvp); /* ignore exist or whiteout entry */ if (!lookuperr || (lookuperr == ENOENT && (cn.cn_flags & ISWHITEOUT))) continue; error = ENOTEMPTY; } } /* close vnode */ VOP_CLOSE(vp, FREAD, cred, td); return (error); } #ifdef DIAGNOSTIC struct vnode * unionfs_checkuppervp(struct vnode *vp, char *fil, int lno) { struct unionfs_node *unp; unp = VTOUNIONFS(vp); #ifdef notyet if (vp->v_op != unionfs_vnodeop_p) { printf("unionfs_checkuppervp: on non-unionfs-node.\n"); #ifdef KDB - kdb_enter("unionfs_checkuppervp: on non-unionfs-node.\n"); + kdb_enter_why(KDB_WHY_UNIONFS, + "unionfs_checkuppervp: on non-unionfs-node.\n"); #endif panic("unionfs_checkuppervp"); }; #endif return (unp->un_uppervp); } struct vnode * unionfs_checklowervp(struct vnode *vp, char *fil, int lno) { struct unionfs_node *unp; unp = VTOUNIONFS(vp); #ifdef notyet if (vp->v_op != unionfs_vnodeop_p) { printf("unionfs_checklowervp: on non-unionfs-node.\n"); #ifdef KDB - kdb_enter("unionfs_checklowervp: on non-unionfs-node.\n"); + kdb_enter_why(KDB_WHY_UNIONFS, + "unionfs_checklowervp: on non-unionfs-node.\n"); #endif panic("unionfs_checklowervp"); }; #endif return (unp->un_lowervp); } #endif Index: stable/7/sys/i386/i386/machdep.c =================================================================== --- stable/7/sys/i386/i386/machdep.c (revision 177733) +++ stable/7/sys/i386/i386/machdep.c (revision 177734) @@ -1,3158 +1,3159 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; #ifndef SMP static struct pcpu __pcpu; #endif struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook", 7) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(td->td_sigmask, scp->sc_mask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if (!tsc_present) return (EOPNOTSUPP); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUFS_SEL 2 %fs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUGS_SEL 3 %gs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 4 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 5 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE_SEL 6 Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUDATA_SEL 7 Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 10 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GNDIS_SEL 18 NDIS Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { int i, physmap_idx; physmap_idx = *physmap_idxp; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); if (smap->length == 0) return (1); #ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); return (1); } #endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-monotonic memory region, ignoring second region\n"); return (1); } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; return (1); } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, off, physmap_idx, pa_indx, da_indx; int hasbrokenint12, has_smap; u_long physmem_tunable; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; struct bios_smap *smap, *smapbase, *smapend; u_int32_t smapsize; quad_t dcons_addr, dcons_size; caddr_t kmdp; has_smap = 0; #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Some newer BIOSes has broken INT 12H implementation which cause * kernel panic immediately. In this case, we need to scan SMAP * with INT 15:E820 first, then determine base memory size. */ if (hasbrokenint12) { goto int15e820; } /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; int15e820: /* * Fetch the memory map with INT 15:E820. First, check to see * if the loader supplied it and use that if so. Otherwise, * use vm86 to invoke the BIOS call directly. */ physmap_idx = 0; smapbase = NULL; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); if (kmdp != NULL) smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { /* subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); has_smap = 1; for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } else { /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); } /* * Perform "base memory" related probes & setup based on SMAP */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* * XXX this function is horribly organized and has to the same * things that it does above here. */ if (basemem == 0) basemem = 640; if (basemem > 640) { printf( "Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * Let vm86 scribble on pages between basemem and * ISA_HOLE_START, as above. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); #ifdef SMP pc = &SMP_prvspace[0].pcpu; #else pc = &__pcpu; #endif gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA elcr_probe(); atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL) static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifndef DEV_APIC #include /* * Provide stub functions so that the MADT APIC enumerator in the acpi * kernel module will link against a kernel without 'device apic'. * * XXX - This is a gross hack. */ void apic_register_enumerator(struct apic_enumerator *enumerator) { } void * ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) { return (NULL); } int ioapic_disable_pin(void *cookie, u_int pin) { return (ENXIO); } int ioapic_get_vector(void *cookie, u_int pin) { return (-1); } void ioapic_register(void *cookie) { } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { return (ENXIO); } int ioapic_set_extint(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_nmi(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { return (ENXIO); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { return (ENXIO); } void lapic_create(u_int apic_id, int boot_cpu) { } void lapic_init(vm_paddr_t addr) { } int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (ENXIO); } int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (ENXIO); } int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (ENXIO); } #endif #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called from the debugger. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* KDB */ Index: stable/7/sys/i386/i386/mp_watchdog.c =================================================================== --- stable/7/sys/i386/i386/mp_watchdog.c (revision 177733) +++ stable/7/sys/i386/i386/mp_watchdog.c (revision 177734) @@ -1,211 +1,212 @@ /*- * Copyright (c) 2004 Robert N. M. Watson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_mp_watchdog.h" #include "opt_sched.h" #ifdef SCHED_ULE #error MP_WATCHDOG cannot currently be used with SCHED_ULE #endif #include #include #include #include #include #include #include #include #include #include #include #include #include /* * mp_watchdog hijacks the idle thread on a specified CPU, prevents new work * from being scheduled there, and uses it as a "watchdog" to detect kernel * failure on other CPUs. This is made reasonable by inclusion of logical * processors in Xeon hardware. The watchdog is configured by setting the * debug.watchdog sysctl/tunable to the CPU of interest. A callout will then * begin executing reseting a timer that is gradually lowered by the watching * thread. If the timer reaches 0, the watchdog fires by ether dropping * directly to the debugger, or by sending an NMI IPI to the boot processor. * This is a somewhat less efficient substitute for dedicated watchdog * hardware, but can be quite an effective tool for debugging hangs. * * XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but * doesn't yet. */ static int watchdog_cpu = -1; static int watchdog_dontfire = 1; static int watchdog_timer = -1; static int watchdog_nmi = 1; TUNABLE_INT("debug.watchdog", &watchdog_cpu); SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0, "IPI the boot processor with an NMI to enter the debugger"); static struct callout watchdog_callout; static void watchdog_change(int wdcpu); /* * Number of seconds before the watchdog will fire if the callout fails to * reset the timer. */ #define WATCHDOG_THRESHOLD 10 static void watchdog_init(void *arg) { callout_init(&watchdog_callout, CALLOUT_MPSAFE); if (watchdog_cpu != -1) watchdog_change(watchdog_cpu); } /* * This callout resets a timer until the watchdog kicks in. It acquires some * critical locks to make sure things haven't gotten wedged with hose locks * held. */ static void watchdog_function(void *arg) { /* * Since the timer ran, we must not be wedged. Acquire some critical * locks to make sure. Then reset the timer. */ mtx_lock(&Giant); watchdog_timer = WATCHDOG_THRESHOLD; mtx_unlock(&Giant); callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL); } SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL); static void watchdog_change(int wdcpu) { if (wdcpu == -1 || wdcpu == 0xffffffff) { /* * Disable the watchdog. */ watchdog_cpu = -1; watchdog_dontfire = 1; callout_stop(&watchdog_callout); printf("watchdog stopped\n"); } else { watchdog_timer = WATCHDOG_THRESHOLD; watchdog_dontfire = 0; watchdog_cpu = wdcpu; callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL); } } /* * This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff * to disable the watchdog. */ static int sysctl_watchdog(SYSCTL_HANDLER_ARGS) { int error, temp; temp = watchdog_cpu; error = sysctl_handle_int(oidp, &temp, 0, req); if (error) return (error); if (req->newptr != NULL) watchdog_change(temp); return (0); } SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_watchdog, "I", ""); /* * Drop into the debugger by sending an IPI NMI to the boot processor. */ static void watchdog_ipi_nmi(void) { /* * Deliver NMI to the boot processor. Why not? */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI, boot_cpu_id); lapic_ipi_wait(-1); } /* * ap_watchdog() is called by the SMP idle loop code. It works on the same * premise that the disabling of logical processors does: that if the cpu is * idle, then it can ignore the world from then on, as nothing will be * scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and * explicit process migration (sched_bind()), this is not an unreasonable * assumption. */ void ap_watchdog(u_int cpuid) { char old_pcomm[MAXCOMLEN + 1]; struct proc *p; if (watchdog_cpu != cpuid) return; printf("watchdog started on cpu %d\n", cpuid); p = curproc; bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1); snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid); while (1) { DELAY(1000000); /* One second. */ if (watchdog_cpu != cpuid) break; atomic_subtract_int(&watchdog_timer, 1); if (watchdog_timer < 4) printf("Watchdog timer: %d\n", watchdog_timer); if (watchdog_timer == 0 && watchdog_dontfire == 0) { printf("Watchdog firing!\n"); watchdog_dontfire = 1; if (watchdog_nmi) watchdog_ipi_nmi(); else - kdb_enter("mp_watchdog"); + kdb_enter_why(KDB_WHY_WATCHDOG, + "mp_watchdog"); } } bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1); printf("watchdog stopped on cpu %d\n", cpuid); } Index: stable/7/sys/ia64/ia64/machdep.c =================================================================== --- stable/7/sys/ia64/ia64/machdep.c (revision 177733) +++ stable/7/sys/ia64/ia64/machdep.c (revision 177734) @@ -1,1538 +1,1539 @@ /*- * Copyright (c) 2003,2004 Marcel Moolenaar * Copyright (c) 2000,2001 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include /* XXX fc.i kluge (quick fix) */ extern int ia64_icache_sync_kluge; u_int64_t processor_frequency; u_int64_t bus_frequency; u_int64_t itc_frequency; int cold = 1; u_int64_t pa_bootinfo; struct bootinfo bootinfo; struct pcpu pcpu0; extern char kstack[]; vm_offset_t proc0kstack; extern u_int64_t kernel_text[], _end[]; extern u_int64_t ia64_gateway_page[]; extern u_int64_t break_sigtramp[]; extern u_int64_t epc_sigtramp[]; struct fpswa_iface *fpswa_iface; u_int64_t ia64_pal_base; u_int64_t ia64_port_base; char machine[] = MACHINE; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static char cpu_model[64]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "The CPU model name"); static char cpu_family[64]; SYSCTL_STRING(_hw, OID_AUTO, family, CTLFLAG_RD, cpu_family, 0, "The CPU family name"); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) struct msgbuf *msgbufp=0; long Maxmem = 0; long realmem = 0; #define PHYSMAP_SIZE (2 * VM_PHYSSEG_MAX) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) void mi_startup(void); /* XXX should be in a MI header */ struct kva_md_info kmi; #define Mhz 1000000L #define Ghz (1000L*Mhz) void setPQL2(int *const size, int *const ways); void setPQL2(int *const size, int *const ways) { return; } static void identifycpu(void) { char vendor[17]; char *family_name, *model_name; u_int64_t features, tmp; int number, revision, model, family, archrev; /* * Assumes little-endian. */ *(u_int64_t *) &vendor[0] = ia64_get_cpuid(0); *(u_int64_t *) &vendor[8] = ia64_get_cpuid(1); vendor[16] = '\0'; tmp = ia64_get_cpuid(3); number = (tmp >> 0) & 0xff; revision = (tmp >> 8) & 0xff; model = (tmp >> 16) & 0xff; family = (tmp >> 24) & 0xff; archrev = (tmp >> 32) & 0xff; family_name = model_name = "unknown"; switch (family) { case 0x07: family_name = "Itanium"; model_name = "Merced"; break; case 0x1f: family_name = "Itanium 2"; switch (model) { case 0x00: model_name = "McKinley"; break; case 0x01: /* * Deerfield is a low-voltage variant based on the * Madison core. We need circumstantial evidence * (i.e. the clock frequency) to identify those. * Allow for roughly 1% error margin. */ tmp = processor_frequency >> 7; if ((processor_frequency - tmp) < 1*Ghz && (processor_frequency + tmp) >= 1*Ghz) model_name = "Deerfield"; else model_name = "Madison"; break; case 0x02: model_name = "Madison II"; break; } break; case 0x20: /* XXX fc.i kluge (quick fix) */ ia64_icache_sync_kluge = 1; family_name = "Itanium 2"; switch (model) { case 0x00: model_name = "Montecito"; break; } break; } snprintf(cpu_family, sizeof(cpu_family), "%s", family_name); snprintf(cpu_model, sizeof(cpu_model), "%s", model_name); features = ia64_get_cpuid(4); printf("CPU: %s (", model_name); if (processor_frequency) { printf("%ld.%02ld-Mhz ", (processor_frequency + 4999) / Mhz, ((processor_frequency + 4999) / (Mhz/100)) % 100); } printf("%s)\n", family_name); printf(" Origin = \"%s\" Revision = %d\n", vendor, revision); printf(" Features = 0x%b\n", (u_int32_t) features, "\020" "\001LB" /* long branch (brl) instruction. */ "\002SD" /* Spontaneous deferral. */ "\003AO" /* 16-byte atomic operations (ld, st, cmpxchg). */ ); } static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ identifycpu(); /* startrtclock(); */ #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ld (%ld MB)\n", ia64_ptob(Maxmem), ia64_ptob(Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { long size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %ld bytes (%ld pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 >> PAGE_SHIFT); } } vm_ksubmap_init(&kmi); printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); if (fpswa_iface == NULL) printf("Warning: no FPSWA package supplied\n"); else printf("FPSWA Revision = 0x%lx, Entry = %p\n", (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); /* * Traverse the MADT to discover IOSAPIC and Local SAPIC * information. */ ia64_probe_sapics(); ia64_mca_init(); } void cpu_boot(int howto) { efi_reset_system(); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); *rate = processor_frequency; return (0); } void cpu_halt() { efi_reset_system(); } static void cpu_idle_default(void) { struct ia64_pal_result res; res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); } void cpu_idle() { (*cpu_idle_hook)(); } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; void cpu_reset() { cpu_boot(0); } void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) { struct pcb *oldpcb, *newpcb; oldpcb = old->td_pcb; #ifdef COMPAT_IA32 ia32_savectx(oldpcb); #endif if (PCPU_GET(fpcurthread) == old) old->td_frame->tf_special.psr |= IA64_PSR_DFH; if (!savectx(oldpcb)) { newpcb = new->td_pcb; oldpcb->pcb_current_pmap = pmap_switch(newpcb->pcb_current_pmap); PCPU_SET(curthread, new); #ifdef COMPAT_IA32 ia32_restorectx(newpcb); #endif if (PCPU_GET(fpcurthread) == new) new->td_frame->tf_special.psr &= ~IA64_PSR_DFH; restorectx(newpcb); /* We should not get here. */ panic("cpu_switch: restorectx() returned"); /* NOTREACHED */ } } void cpu_throw(struct thread *old __unused, struct thread *new) { struct pcb *newpcb; newpcb = new->td_pcb; (void)pmap_switch(newpcb->pcb_current_pmap); PCPU_SET(curthread, new); #ifdef COMPAT_IA32 ia32_restorectx(newpcb); #endif restorectx(newpcb); /* We should not get here. */ panic("cpu_throw: restorectx() returned"); /* NOTREACHED */ } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = cpuid; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_intr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_intr); } void map_vhpt(uintptr_t vhpt) { pt_entry_t pte; uint64_t psr; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_RW; pte |= vhpt & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1" :: "r"(vhpt), "r"(IA64_ID_PAGE_SHIFT<<2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(vhpt); ia64_set_itir(IA64_ID_PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(2), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); } void map_pal_code(void) { pt_entry_t pte; uint64_t psr; if (ia64_pal_base == 0) return; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_RWX; pte |= ia64_pal_base & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(IA64_PHYS_TO_RR7(ia64_pal_base)), "r"(IA64_ID_PAGE_SHIFT<<2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(IA64_PHYS_TO_RR7(ia64_pal_base)); ia64_set_itir(IA64_ID_PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(1), "r"(pte)); ia64_srlz_d(); __asm __volatile("itr.i itr[%0]=%1" :: "r"(1), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); } void map_gateway_page(void) { pt_entry_t pte; uint64_t psr; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_X_RX; pte |= (uint64_t)ia64_gateway_page & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(VM_MAX_ADDRESS), "r"(PAGE_SHIFT << 2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(VM_MAX_ADDRESS); ia64_set_itir(PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(3), "r"(pte)); ia64_srlz_d(); __asm __volatile("itr.i itr[%0]=%1" :: "r"(3), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); /* Expose the mapping to userland in ar.k5 */ ia64_set_k5(VM_MAX_ADDRESS); } static void calculate_frequencies(void) { struct ia64_sal_result sal; struct ia64_pal_result pal; sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0); pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0); if (sal.sal_status == 0 && pal.pal_status == 0) { if (bootverbose) { printf("Platform clock frequency %ld Hz\n", sal.sal_result[0]); printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, " "ITC ratio %ld/%ld\n", pal.pal_result[0] >> 32, pal.pal_result[0] & ((1L << 32) - 1), pal.pal_result[1] >> 32, pal.pal_result[1] & ((1L << 32) - 1), pal.pal_result[2] >> 32, pal.pal_result[2] & ((1L << 32) - 1)); } processor_frequency = sal.sal_result[0] * (pal.pal_result[0] >> 32) / (pal.pal_result[0] & ((1L << 32) - 1)); bus_frequency = sal.sal_result[0] * (pal.pal_result[1] >> 32) / (pal.pal_result[1] & ((1L << 32) - 1)); itc_frequency = sal.sal_result[0] * (pal.pal_result[2] >> 32) / (pal.pal_result[2] & ((1L << 32) - 1)); } } void ia64_init(void) { int phys_avail_cnt; vm_offset_t kernstart, kernend; vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1; char *p; struct efi_md *md; int metadata_missing; /* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */ /* * TODO: Disable interrupts, floating point etc. * Maybe flush cache and tlb */ ia64_set_fpsr(IA64_FPSR_DEFAULT); /* * TODO: Get critical system information (if possible, from the * information provided by the boot program). */ /* * pa_bootinfo is the physical address of the bootinfo block as * passed to us by the loader and set in locore.s. */ bootinfo = *(struct bootinfo *)(IA64_PHYS_TO_RR7(pa_bootinfo)); if (bootinfo.bi_magic != BOOTINFO_MAGIC || bootinfo.bi_version != 1) { bzero(&bootinfo, sizeof(bootinfo)); bootinfo.bi_kernend = (vm_offset_t) round_page(_end); } /* * Look for the I/O ports first - we need them for console * probing. */ for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) { switch (md->md_type) { case EFI_MD_TYPE_IOPORT: ia64_port_base = IA64_PHYS_TO_RR6(md->md_phys); break; case EFI_MD_TYPE_PALCODE: ia64_pal_base = md->md_phys; break; } } metadata_missing = 0; if (bootinfo.bi_modulep) preload_metadata = (caddr_t)bootinfo.bi_modulep; else metadata_missing = 1; if (envmode == 0 && bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp; else kern_envp = static_env; /* * Look at arguments passed to us and compute boothowto. */ boothowto = bootinfo.bi_boothowto; /* * Catch case of boot_verbose set in environment. */ if ((p = getenv("boot_verbose")) != NULL) { if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) { boothowto |= RB_VERBOSE; } freeenv(p); } if (boothowto & RB_VERBOSE) bootverbose = 1; /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we * need to initialize at least pc_curthread. */ pcpup = &pcpu0; ia64_set_k4((u_int64_t)pcpup); pcpu_init(pcpup, 0, sizeof(pcpu0)); PCPU_SET(curthread, &thread0); /* * Initialize the console before we print anything out. */ cninit(); /* OUTPUT NOW ALLOWED */ if (ia64_pal_base != 0) { ia64_pal_base &= ~IA64_ID_PAGE_MASK; /* * We use a TR to map the first 256M of memory - this might * cover the palcode too. */ if (ia64_pal_base == 0) printf("PAL code mapped by the kernel's TR\n"); } else printf("PAL code not found\n"); /* * Wire things up so we can call the firmware. */ map_pal_code(); efi_boot_minimal(bootinfo.bi_systab); ia64_sal_init(); calculate_frequencies(); /* * Find the beginning and end of the kernel. */ kernstart = trunc_page(kernel_text); #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; kernend = (vm_offset_t)round_page(ksym_end); #else kernend = (vm_offset_t)round_page(_end); #endif /* But if the bootstrap tells us otherwise, believe it! */ if (bootinfo.bi_kernend) kernend = round_page(bootinfo.bi_kernend); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); /* Get FPSWA interface */ fpswa_iface = (bootinfo.bi_fpswa == 0) ? NULL : (struct fpswa_iface *)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa); /* Init basic tunables, including hz */ init_param1(); p = getenv("kernelname"); if (p) { strncpy(kernelname, p, sizeof(kernelname) - 1); freeenv(p); } kernstartpfn = atop(IA64_RR_MASK(kernstart)); kernendpfn = atop(IA64_RR_MASK(kernend)); /* * Size the memory regions and load phys_avail[] with the results. */ /* * Find out how much memory is available, by looking at * the memory descriptors. */ #ifdef DEBUG_MD printf("Memory descriptor count: %d\n", mdcount); #endif phys_avail_cnt = 0; for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) { #ifdef DEBUG_MD printf("MD %p: type %d pa 0x%lx cnt 0x%lx\n", md, md->md_type, md->md_phys, md->md_pages); #endif pfn0 = ia64_btop(round_page(md->md_phys)); pfn1 = ia64_btop(trunc_page(md->md_phys + md->md_pages * 4096)); if (pfn1 <= pfn0) continue; if (md->md_type != EFI_MD_TYPE_FREE) continue; /* * We have a memory descriptor that describes conventional * memory that is for general use. We must determine if the * loader has put the kernel in this region. */ physmem += (pfn1 - pfn0); if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) { /* * Must compute the location of the kernel * within the segment. */ #ifdef DEBUG_MD printf("Descriptor %p contains kernel\n", mp); #endif if (pfn0 < kernstartpfn) { /* * There is a chunk before the kernel. */ #ifdef DEBUG_MD printf("Loading chunk before kernel: " "0x%lx / 0x%lx\n", pfn0, kernstartpfn); #endif phys_avail[phys_avail_cnt] = ia64_ptob(pfn0); phys_avail[phys_avail_cnt+1] = ia64_ptob(kernstartpfn); phys_avail_cnt += 2; } if (kernendpfn < pfn1) { /* * There is a chunk after the kernel. */ #ifdef DEBUG_MD printf("Loading chunk after kernel: " "0x%lx / 0x%lx\n", kernendpfn, pfn1); #endif phys_avail[phys_avail_cnt] = ia64_ptob(kernendpfn); phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1); phys_avail_cnt += 2; } } else { /* * Just load this cluster as one chunk. */ #ifdef DEBUG_MD printf("Loading descriptor %d: 0x%lx / 0x%lx\n", i, pfn0, pfn1); #endif phys_avail[phys_avail_cnt] = ia64_ptob(pfn0); phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1); phys_avail_cnt += 2; } } phys_avail[phys_avail_cnt] = 0; Maxmem = physmem; init_param2(physmem); /* * Initialize error message buffer (at end of core). */ msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); msgbufinit(msgbufp, MSGBUF_SIZE); proc_linkup0(&proc0, &thread0); /* * Init mapping for kernel stack for proc 0 */ proc0kstack = (vm_offset_t)kstack; thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = KSTACK_PAGES; mutex_init(); /* * Initialize the rest of proc 0's PCB. * * Set the kernel sp, reserving space for an (empty) trapframe, * and make proc0's trapframe pointer point to it for sanity. * Initialise proc0's backing store to start after u area. */ cpu_thread_setup(&thread0); thread0.td_frame->tf_flags = FRAME_SYSCALL; thread0.td_pcb->pcb_special.sp = (u_int64_t)thread0.td_frame - 16; thread0.td_pcb->pcb_special.bspstore = thread0.td_kstack; /* * Initialize the virtual memory system. */ pmap_bootstrap(); /* * Initialize debuggers, and break into them if appropriate. */ kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger\n"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger\n"); #endif ia64_set_tpr(0); ia64_srlz_d(); /* * Save our current context so that we have a known (maybe even * sane) context as the initial context for new threads that are * forked from us. If any of those threads (including thread0) * does something wrong, we may be lucky and return here where * we're ready for them with a nice panic. */ if (!savectx(thread0.td_pcb)) mi_startup(); /* We should not get here. */ panic("ia64_init: Whooaa there!"); /* NOTREACHED */ } __volatile void * ia64_ioport_address(u_int port) { uint64_t addr; addr = (port > 0xffff) ? IA64_PHYS_TO_RR6((uint64_t)port) : ia64_port_base | ((port & 0xfffc) << 10) | (port & 0xFFF); return ((__volatile void *)addr); } uint64_t ia64_get_hcdp(void) { return (bootinfo.bi_hcdp); } void bzero(void *buf, size_t len) { caddr_t p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } void DELAY(int n) { u_int64_t start, end, now; start = ia64_get_itc(); end = start + (itc_frequency * n) / 1000000; /* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */ do { now = ia64_get_itc(); } while (now < end || (now > start && end < start)); } /* * Send an interrupt (signal) to a process. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *tf; struct sigacts *psp; struct sigframe sf, *sfp; u_int64_t sbs, sp; int oonstack; int sig; u_long code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_special.sp; oonstack = sigonstack(sp); sbs = 0; /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sbs = (u_int64_t)td->td_sigstk.ss_sp; sbs = (sbs + 15) & ~15; sfp = (struct sigframe *)(sbs + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe *)sp; sfp = (struct sigframe *)((u_int64_t)(sfp - 1) & ~15); /* Fill in the siginfo structure for POSIX handlers. */ if (SIGISMEMBER(psp->ps_siginfo, sig)) { sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* * XXX this shouldn't be here after code in trap.c * is fixed */ sf.sf_si.si_addr = (void*)tf->tf_special.ifa; code = (u_int64_t)&sfp->sf_si; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); /* Copy the frame out to userland. */ if (copyout(&sf, sfp, sizeof(sf)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); return; } if ((tf->tf_flags & FRAME_SYSCALL) == 0) { tf->tf_special.psr &= ~IA64_PSR_RI; tf->tf_special.iip = ia64_get_k5() + ((uint64_t)break_sigtramp - (uint64_t)ia64_gateway_page); } else tf->tf_special.iip = ia64_get_k5() + ((uint64_t)epc_sigtramp - (uint64_t)ia64_gateway_page); /* * Setup the trapframe to return to the signal trampoline. We pass * information to the trampoline in the following registers: * * gp new backing store or NULL * r8 signal number * r9 signal code or siginfo pointer * r10 signal handler (function descriptor) */ tf->tf_special.sp = (u_int64_t)sfp - 16; tf->tf_special.gp = sbs; tf->tf_special.bspstore = sf.sf_uc.uc_mcontext.mc_special.bspstore; tf->tf_special.ndirty = 0; tf->tf_special.rnat = sf.sf_uc.uc_mcontext.mc_special.rnat; tf->tf_scratch.gr8 = sig; tf->tf_scratch.gr9 = code; tf->tf_scratch.gr10 = (u_int64_t)catcher; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap) { ucontext_t uc; struct trapframe *tf; struct proc *p; struct pcb *pcb; tf = td->td_frame; p = td->td_proc; pcb = td->td_pcb; /* * Fetch the entire context structure at once for speed. * We don't use a normal argument to simplify RSE handling. */ if (copyin(uap->sigcntxp, (caddr_t)&uc, sizeof(uc))) return (EFAULT); set_mcontext(td, &uc.uc_mcontext); PROC_LOCK(p); #if defined(COMPAT_43) if (sigonstack(tf->tf_special.sp)) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_special = tf->tf_special; pcb->pcb_special.__spare = ~0UL; /* XXX see unwind.c */ save_callee_saved(&pcb->pcb_preserved); save_callee_saved_fp(&pcb->pcb_preserved_fp); } int ia64_flush_dirty(struct thread *td, struct _special *r) { struct iovec iov; struct uio uio; uint64_t bspst, kstk, rnat; int error; if (r->ndirty == 0) return (0); kstk = td->td_kstack + (r->bspstore & 0x1ffUL); if (td == curthread) { __asm __volatile("mov ar.rsc=0;;"); __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); /* Make sure we have all the user registers written out. */ if (bspst - kstk < r->ndirty) { __asm __volatile("flushrs;;"); __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); } __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat)); __asm __volatile("mov ar.rsc=3"); error = copyout((void*)kstk, (void*)r->bspstore, r->ndirty); kstk += r->ndirty; r->rnat = (bspst > kstk && (bspst & 0x1ffL) < (kstk & 0x1ffL)) ? *(uint64_t*)(kstk | 0x1f8L) : rnat; } else { PHOLD(td->td_proc); iov.iov_base = (void*)(uintptr_t)kstk; iov.iov_len = r->ndirty; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = r->bspstore; uio.uio_resid = r->ndirty; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; error = proc_rwmem(td->td_proc, &uio); /* * XXX proc_rwmem() doesn't currently return ENOSPC, * so I think it can bogusly return 0. Neither do * we allow short writes. */ if (uio.uio_resid != 0 && error == 0) error = ENOSPC; PRELE(td->td_proc); } r->bspstore += r->ndirty; r->ndirty = 0; return (error); } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; int error; tf = td->td_frame; bzero(mc, sizeof(*mc)); mc->mc_special = tf->tf_special; error = ia64_flush_dirty(td, &mc->mc_special); if (tf->tf_flags & FRAME_SYSCALL) { mc->mc_flags |= _MC_FLAGS_SYSCALL_CONTEXT; mc->mc_scratch = tf->tf_scratch; if (flags & GET_MC_CLEAR_RET) { mc->mc_scratch.gr8 = 0; mc->mc_scratch.gr9 = 0; mc->mc_scratch.gr10 = 0; mc->mc_scratch.gr11 = 0; } } else { mc->mc_flags |= _MC_FLAGS_ASYNC_CONTEXT; mc->mc_scratch = tf->tf_scratch; mc->mc_scratch_fp = tf->tf_scratch_fp; /* * XXX If the thread never used the high FP registers, we * probably shouldn't waste time saving them. */ ia64_highfp_save(td); mc->mc_flags |= _MC_FLAGS_HIGHFP_VALID; mc->mc_high_fp = td->td_pcb->pcb_high_fp; } save_callee_saved(&mc->mc_preserved); save_callee_saved_fp(&mc->mc_preserved_fp); return (error); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct _special s; struct trapframe *tf; uint64_t psrmask; tf = td->td_frame; KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0, ("Whoa there! We have more than 8KB of dirty registers!")); s = mc->mc_special; /* * Only copy the user mask and the restart instruction bit from * the new context. */ psrmask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_RI; s.psr = (tf->tf_special.psr & ~psrmask) | (s.psr & psrmask); /* We don't have any dirty registers of the new context. */ s.ndirty = 0; if (mc->mc_flags & _MC_FLAGS_ASYNC_CONTEXT) { /* * We can get an async context passed to us while we * entered the kernel through a syscall: sigreturn(2) * and kse_switchin(2) both take contexts that could * previously be the result of a trap or interrupt. * Hence, we cannot assert that the trapframe is not * a syscall frame, but we can assert that it's at * least an expected syscall. */ if (tf->tf_flags & FRAME_SYSCALL) { KASSERT(tf->tf_scratch.gr15 == SYS_sigreturn || tf->tf_scratch.gr15 == SYS_kse_switchin, ("foo")); tf->tf_flags &= ~FRAME_SYSCALL; } tf->tf_scratch = mc->mc_scratch; tf->tf_scratch_fp = mc->mc_scratch_fp; if (mc->mc_flags & _MC_FLAGS_HIGHFP_VALID) td->td_pcb->pcb_high_fp = mc->mc_high_fp; } else { KASSERT((tf->tf_flags & FRAME_SYSCALL) != 0, ("foo")); if ((mc->mc_flags & _MC_FLAGS_SYSCALL_CONTEXT) == 0) { s.cfm = tf->tf_special.cfm; s.iip = tf->tf_special.iip; tf->tf_scratch.gr15 = 0; /* Clear syscall nr. */ } else tf->tf_scratch = mc->mc_scratch; } tf->tf_special = s; restore_callee_saved(&mc->mc_preserved); restore_callee_saved_fp(&mc->mc_preserved_fp); if (mc->mc_flags & _MC_FLAGS_KSE_SET_MBOX) suword((caddr_t)mc->mc_special.ifa, mc->mc_special.isr); return (0); } /* * Clear registers on exec. */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; uint64_t *ksttop, *kst; tf = td->td_frame; ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty + (tf->tf_special.bspstore & 0x1ffUL)); /* * We can ignore up to 8KB of dirty registers by masking off the * lower 13 bits in exception_restore() or epc_syscall(). This * should be enough for a couple of years, but if there are more * than 8KB of dirty registers, we lose track of the bottom of * the kernel stack. The solution is to copy the active part of * the kernel stack down 1 page (or 2, but not more than that) * so that we always have less than 8KB of dirty registers. */ KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0, ("Whoa there! We have more than 8KB of dirty registers!")); bzero(&tf->tf_special, sizeof(tf->tf_special)); if ((tf->tf_flags & FRAME_SYSCALL) == 0) { /* break syscalls. */ bzero(&tf->tf_scratch, sizeof(tf->tf_scratch)); bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp)); tf->tf_special.cfm = (1UL<<63) | (3UL<<7) | 3UL; tf->tf_special.bspstore = IA64_BACKINGSTORE; /* * Copy the arguments onto the kernel register stack so that * they get loaded by the loadrs instruction. Skip over the * NaT collection points. */ kst = ksttop - 1; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst-- = 0; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst-- = ps_strings; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst = stack; tf->tf_special.ndirty = (ksttop - kst) << 3; } else { /* epc syscalls (default). */ tf->tf_special.cfm = (3UL<<62) | (3UL<<7) | 3UL; tf->tf_special.bspstore = IA64_BACKINGSTORE + 24; /* * Write values for out0, out1 and out2 to the user's backing * store and arrange for them to be restored into the user's * initial register frame. * Assumes that (bspstore & 0x1f8) < 0x1e0. */ suword((caddr_t)tf->tf_special.bspstore - 24, stack); suword((caddr_t)tf->tf_special.bspstore - 16, ps_strings); suword((caddr_t)tf->tf_special.bspstore - 8, 0); } tf->tf_special.iip = entry; tf->tf_special.sp = (stack & ~15) - 16; tf->tf_special.rsc = 0xf; tf->tf_special.fpsr = IA64_FPSR_DEFAULT; tf->tf_special.psr = IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | IA64_PSR_BN | IA64_PSR_CPL_USER; } int ptrace_set_pc(struct thread *td, unsigned long addr) { uint64_t slot; switch (addr & 0xFUL) { case 0: slot = IA64_PSR_RI_0; break; case 1: /* XXX we need to deal with MLX bundles here */ slot = IA64_PSR_RI_1; break; case 2: slot = IA64_PSR_RI_2; break; default: return (EINVAL); } td->td_frame->tf_special.iip = addr & ~0x0FULL; td->td_frame->tf_special.psr = (td->td_frame->tf_special.psr & ~IA64_PSR_RI) | slot; return (0); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; /* * There's no way to set single stepping when we're leaving the * kernel through the EPC syscall path. The way we solve this is * by enabling the lower-privilege trap so that we re-enter the * kernel as soon as the privilege level changes. See trap.c for * how we proceed from there. */ tf = td->td_frame; if (tf->tf_flags & FRAME_SYSCALL) tf->tf_special.psr |= IA64_PSR_LP; else tf->tf_special.psr |= IA64_PSR_SS; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; /* * Clear any and all status bits we may use to implement single * stepping. */ tf = td->td_frame; tf->tf_special.psr &= ~IA64_PSR_SS; tf->tf_special.psr &= ~IA64_PSR_LP; tf->tf_special.psr &= ~IA64_PSR_TB; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; regs->r_special = tf->tf_special; regs->r_scratch = tf->tf_scratch; save_callee_saved(®s->r_preserved); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; int error; tf = td->td_frame; error = ia64_flush_dirty(td, &tf->tf_special); if (!error) { tf->tf_special = regs->r_special; tf->tf_special.bspstore += tf->tf_special.ndirty; tf->tf_special.ndirty = 0; tf->tf_scratch = regs->r_scratch; restore_callee_saved(®s->r_preserved); } return (error); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; /* Save the high FP registers. */ ia64_highfp_save(td); fpregs->fpr_scratch = frame->tf_scratch_fp; save_callee_saved_fp(&fpregs->fpr_preserved); fpregs->fpr_high = pcb->pcb_high_fp; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; /* Throw away the high FP registers (should be redundant). */ ia64_highfp_drop(td); frame->tf_scratch_fp = fpregs->fpr_scratch; restore_callee_saved_fp(&fpregs->fpr_preserved); pcb->pcb_high_fp = fpregs->fpr_high; return (0); } /* * High FP register functions. */ int ia64_highfp_drop(struct thread *td) { struct pcb *pcb; struct pcpu *cpu; struct thread *thr; mtx_lock_spin(&td->td_md.md_highfp_mtx); pcb = td->td_pcb; cpu = pcb->pcb_fpcpu; if (cpu == NULL) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); return (0); } pcb->pcb_fpcpu = NULL; thr = cpu->pc_fpcurthread; cpu->pc_fpcurthread = NULL; mtx_unlock_spin(&td->td_md.md_highfp_mtx); /* Post-mortem sanity checking. */ KASSERT(thr == td, ("Inconsistent high FP state")); return (1); } int ia64_highfp_save(struct thread *td) { struct pcb *pcb; struct pcpu *cpu; struct thread *thr; /* Don't save if the high FP registers weren't modified. */ if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0) return (ia64_highfp_drop(td)); mtx_lock_spin(&td->td_md.md_highfp_mtx); pcb = td->td_pcb; cpu = pcb->pcb_fpcpu; if (cpu == NULL) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); return (0); } #ifdef SMP if (td == curthread) sched_pin(); if (cpu != pcpup) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); ipi_send(cpu, IPI_HIGH_FP); if (td == curthread) sched_unpin(); while (pcb->pcb_fpcpu == cpu) DELAY(100); return (1); } else { save_high_fp(&pcb->pcb_high_fp); if (td == curthread) sched_unpin(); } #else save_high_fp(&pcb->pcb_high_fp); #endif pcb->pcb_fpcpu = NULL; thr = cpu->pc_fpcurthread; cpu->pc_fpcurthread = NULL; mtx_unlock_spin(&td->td_md.md_highfp_mtx); /* Post-mortem sanity cxhecking. */ KASSERT(thr == td, ("Inconsistent high FP state")); return (1); } int sysbeep(int pitch, int period) { return (ENODEV); } Index: stable/7/sys/kern/kern_clock.c =================================================================== --- stable/7/sys/kern/kern_clock.c (revision 177733) +++ stable/7/sys/kern/kern_clock.c (revision 177734) @@ -1,582 +1,582 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_device_polling.h" #include "opt_hwpmc_hooks.h" #include "opt_ntp.h" #include "opt_watchdog.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef GPROF #include #endif #ifdef HWPMC_HOOKS #include #endif #ifdef DEVICE_POLLING extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ static void initclocks(void *dummy); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) /* Some of these don't belong here, but it's easiest to concentrate them. */ long cp_time[CPUSTATES]; /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; static int sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) { int error; #ifdef SCTL_MASK32 int i; unsigned int cp_time32[CPUSTATES]; if (req->flags & SCTL_MASK32) { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time32)); for (i = 0; i < CPUSTATES; i++) cp_time32[i] = (unsigned int)cp_time[i]; error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32)); } else #endif { if (!req->oldptr) return SYSCTL_OUT(req, 0, sizeof(cp_time)); error = SYSCTL_OUT(req, cp_time, sizeof(cp_time)); } return error; } SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD, 0,0, sysctl_kern_cp_time, "LU", "CPU time statistics"); #ifdef SW_WATCHDOG #include static int watchdog_ticks; static int watchdog_enabled; static void watchdog_fire(void); static void watchdog_config(void *, u_int, int *); #endif /* SW_WATCHDOG */ /* * Clock handling routines. * * This code is written to operate with two timers that run independently of * each other. * * The main timer, running hz times per second, is used to trigger interval * timers, timeouts and rescheduling as needed. * * The second timer handles kernel and user profiling, * and does resource use estimation. If the second timer is programmable, * it is randomized to avoid aliasing between the two clocks. For example, * the randomization prevents an adversary from always giving up the cpu * just before its quantum expires. Otherwise, it would never accumulate * cpu ticks. The mean frequency of the second timer is stathz. * * If no second timer exists, stathz will be zero; in this case we drive * profiling and statistics off the main clock. This WILL NOT be accurate; * do not do it unless absolutely necessary. * * The statistics clock may (or may not) be run at a higher rate while * profiling. This profile clock runs at profhz. We require that profhz * be an integral multiple of stathz. * * If the statistics clock is running fast, it must be divided by the ratio * profhz/stathz for statistics. (For profiling, every tick counts.) * * Time-of-day is maintained using a "timecounter", which may or may * not be related to the hardware generating the above mentioned * interrupts. */ int stathz; int profhz; int profprocs; int ticks; int psratio; /* * Initialize clock frequencies and start both clocks running. */ /* ARGSUSED*/ static void initclocks(dummy) void *dummy; { register int i; /* * Set divisors to 1 (normal case) and let the machine-specific * code do its bit. */ mtx_init(&time_lock, "time lock", NULL, MTX_SPIN); cpu_initclocks(); /* * Compute profhz/stathz, and fix profhz if needed. */ i = stathz ? stathz : hz; if (profhz == 0) profhz = i; psratio = profhz / i; #ifdef SW_WATCHDOG EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0); #endif } /* * Each time the real-time timer fires, this function is called on all CPUs. * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only * the other CPUs in the system need to call this function. */ void hardclock_cpu(int usermode) { struct pstats *pstats; struct thread *td = curthread; struct proc *p = td->td_proc; int flags; /* * Run current process's virtual and profile time, as needed. */ pstats = p->p_stats; flags = 0; if (usermode && timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) { PROC_SLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) flags |= TDF_ALRMPEND | TDF_ASTPENDING; PROC_SUNLOCK(p); } if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) { PROC_SLOCK(p); if (itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) flags |= TDF_PROFPEND | TDF_ASTPENDING; PROC_SUNLOCK(p); } thread_lock(td); sched_tick(); td->td_flags |= flags; thread_unlock(td); #ifdef HWPMC_HOOKS if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); #endif } /* * The real-time timer, interrupting hz times per second. */ void hardclock(int usermode, uintfptr_t pc) { int need_softclock = 0; hardclock_cpu(usermode); tc_ticktock(); /* * If no separate statistics clock is available, run it from here. * * XXX: this only works for UP */ if (stathz == 0) { profclock(usermode, pc); statclock(usermode); } #ifdef DEVICE_POLLING hardclock_device_poll(); /* this is very short and quick */ #endif /* DEVICE_POLLING */ /* * Process callouts at a very low cpu priority, so we don't keep the * relatively high clock interrupt priority any longer than necessary. */ mtx_lock_spin_flags(&callout_lock, MTX_QUIET); ticks++; if (!TAILQ_EMPTY(&callwheel[ticks & callwheelmask])) { need_softclock = 1; } else if (softticks + 1 == ticks) ++softticks; mtx_unlock_spin_flags(&callout_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with callout_lock held; incorrect locking order. */ if (need_softclock) swi_sched(softclock_ih, 0); #ifdef SW_WATCHDOG if (watchdog_enabled > 0 && --watchdog_ticks <= 0) watchdog_fire(); #endif /* SW_WATCHDOG */ } /* * Compute number of ticks in the specified amount of time. */ int tvtohz(tv) struct timeval *tv; { register unsigned long ticks; register long sec, usec; /* * If the number of usecs in the whole seconds part of the time * difference fits in a long, then the total number of usecs will * fit in an unsigned long. Compute the total and convert it to * ticks, rounding up and adding 1 to allow for the current tick * to expire. Rounding also depends on unsigned long arithmetic * to avoid overflow. * * Otherwise, if the number of ticks in the whole seconds part of * the time difference fits in a long, then convert the parts to * ticks separately and add, using similar rounding methods and * overflow avoidance. This method would work in the previous * case but it is slightly slower and assumes that hz is integral. * * Otherwise, round the time difference down to the maximum * representable value. * * If ints have 32 bits, then the maximum value for any timeout in * 10ms ticks is 248 days. */ sec = tv->tv_sec; usec = tv->tv_usec; if (usec < 0) { sec--; usec += 1000000; } if (sec < 0) { #ifdef DIAGNOSTIC if (usec > 0) { sec++; usec -= 1000000; } printf("tvotohz: negative time difference %ld sec %ld usec\n", sec, usec); #endif ticks = 1; } else if (sec <= LONG_MAX / 1000000) ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) / tick + 1; else if (sec <= LONG_MAX / hz) ticks = sec * hz + ((unsigned long)usec + (tick - 1)) / tick + 1; else ticks = LONG_MAX; if (ticks > INT_MAX) ticks = INT_MAX; return ((int)ticks); } /* * Start profiling on a process. * * Kernel profiling passes proc0 which never exits and hence * keeps the profile clock running constantly. */ void startprofclock(p) register struct proc *p; { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_STOPPROF) return; if ((p->p_flag & P_PROFIL) == 0) { p->p_flag |= P_PROFIL; mtx_lock_spin(&time_lock); if (++profprocs == 1) cpu_startprofclock(); mtx_unlock_spin(&time_lock); } } /* * Stop profiling on a process. */ void stopprofclock(p) register struct proc *p; { PROC_LOCK_ASSERT(p, MA_OWNED); if (p->p_flag & P_PROFIL) { if (p->p_profthreads != 0) { p->p_flag |= P_STOPPROF; while (p->p_profthreads != 0) msleep(&p->p_profthreads, &p->p_mtx, PPAUSE, "stopprof", 0); p->p_flag &= ~P_STOPPROF; } if ((p->p_flag & P_PROFIL) == 0) return; p->p_flag &= ~P_PROFIL; mtx_lock_spin(&time_lock); if (--profprocs == 0) cpu_stopprofclock(); mtx_unlock_spin(&time_lock); } } /* * Statistics clock. Updates rusage information and calls the scheduler * to adjust priorities of the active thread. * * This should be called by all active processors. */ void statclock(int usermode) { struct rusage *ru; struct vmspace *vm; struct thread *td; struct proc *p; long rss; td = curthread; p = td->td_proc; thread_lock_flags(td, MTX_QUIET); if (usermode) { /* * Charge the time as appropriate. */ #ifdef KSE if (p->p_flag & P_SA) thread_statclock(1); #endif td->td_uticks++; if (p->p_nice > NZERO) atomic_add_long(&cp_time[CP_NICE], 1); else atomic_add_long(&cp_time[CP_USER], 1); } else { /* * Came from kernel mode, so we were: * - handling an interrupt, * - doing syscall or trap work on behalf of the current * user process, or * - spinning in the idle loop. * Whichever it is, charge the time as appropriate. * Note that we charge interrupts to the current process, * regardless of whether they are ``for'' that process, * so that we know how much of its real time was spent * in ``non-process'' (i.e., interrupt) work. */ if ((td->td_pflags & TDP_ITHREAD) || td->td_intr_nesting_level >= 2) { td->td_iticks++; atomic_add_long(&cp_time[CP_INTR], 1); } else { #ifdef KSE if (p->p_flag & P_SA) thread_statclock(0); #endif td->td_pticks++; td->td_sticks++; if (!TD_IS_IDLETHREAD(td)) atomic_add_long(&cp_time[CP_SYS], 1); else atomic_add_long(&cp_time[CP_IDLE], 1); } } /* Update resource usage integrals and maximums. */ MPASS(p->p_vmspace != NULL); vm = p->p_vmspace; ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize); ru->ru_idrss += pgtok(vm->vm_dsize); ru->ru_isrss += pgtok(vm->vm_ssize); rss = pgtok(vmspace_resident_count(vm)); if (ru->ru_maxrss < rss) ru->ru_maxrss = rss; CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d", td, td->td_proc->p_comm, td->td_priority, (stathz)?stathz:hz); sched_clock(td); thread_unlock(td); } void profclock(int usermode, uintfptr_t pc) { struct thread *td; #ifdef GPROF struct gmonparam *g; uintfptr_t i; #endif td = curthread; if (usermode) { /* * Came from user mode; CPU was in user state. * If this process is being profiled, record the tick. * if there is no related user location yet, don't * bother trying to count it. */ if (td->td_proc->p_flag & P_PROFIL) addupc_intr(td, pc, 1); } #ifdef GPROF else { /* * Kernel statistics are just like addupc_intr, only easier. */ g = &_gmonparam; if (g->state == GMON_PROF_ON && pc >= g->lowpc) { i = PC_TO_I(g, pc); if (i < g->textsize) { KCOUNT(g, i)++; } } } #endif } /* * Return information about system clocks. */ static int sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) { struct clockinfo clkinfo; /* * Construct clockinfo structure. */ bzero(&clkinfo, sizeof(clkinfo)); clkinfo.hz = hz; clkinfo.tick = tick; clkinfo.profhz = profhz; clkinfo.stathz = stathz ? stathz : hz; return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); } SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo", "Rate and period of various kernel clocks"); #ifdef SW_WATCHDOG static void watchdog_config(void *unused __unused, u_int cmd, int *error) { u_int u; u = cmd & WD_INTERVAL; if (u >= WD_TO_1SEC) { watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz; watchdog_enabled = 1; *error = 0; } else { watchdog_enabled = 0; } } /* * Handle a watchdog timeout by dumping interrupt information and * then either dropping to DDB or panicking. */ static void watchdog_fire(void) { int nintr; u_int64_t inttotal; u_long *curintr; char *curname; curintr = intrcnt; curname = intrnames; inttotal = 0; nintr = eintrcnt - intrcnt; printf("interrupt total\n"); while (--nintr >= 0) { if (*curintr) printf("%-12s %20lu\n", curname, *curintr); curname += strlen(curname) + 1; inttotal += *curintr++; } printf("Total %20ju\n", (uintmax_t)inttotal); #if defined(KDB) && !defined(KDB_UNATTENDED) kdb_backtrace(); - kdb_enter("watchdog timeout"); + kdb_enter_why(KDB_WHY_WATCHDOG, "watchdog timeout"); #else panic("watchdog timeout"); #endif } #endif /* SW_WATCHDOG */ Index: stable/7/sys/kern/kern_shutdown.c =================================================================== --- stable/7/sys/kern/kern_shutdown.c (revision 177733) +++ stable/7/sys/kern/kern_shutdown.c (revision 177734) @@ -1,655 +1,655 @@ /*- * Copyright (c) 1986, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kern_shutdown.c 8.3 (Berkeley) 1/21/94 */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_mac.h" #include "opt_panic.h" #include "opt_show_busybufs.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* smp_active */ #include #include #include #include #include #include #include #include #include #include #include #include #ifndef PANIC_REBOOT_WAIT_TIME #define PANIC_REBOOT_WAIT_TIME 15 /* default to 15 seconds */ #endif /* * Note that stdarg.h and the ANSI style va_start macro is used for both * ANSI and traditional C compilers. */ #include #ifdef KDB #ifdef KDB_UNATTENDED int debugger_on_panic = 0; #else int debugger_on_panic = 1; #endif SYSCTL_INT(_debug, OID_AUTO, debugger_on_panic, CTLFLAG_RW, &debugger_on_panic, 0, "Run debugger on kernel panic"); #ifdef KDB_TRACE int trace_on_panic = 1; #else int trace_on_panic = 0; #endif SYSCTL_INT(_debug, OID_AUTO, trace_on_panic, CTLFLAG_RW, &trace_on_panic, 0, "Print stack trace on kernel panic"); #endif /* KDB */ int sync_on_panic = 0; SYSCTL_INT(_kern, OID_AUTO, sync_on_panic, CTLFLAG_RW, &sync_on_panic, 0, "Do a sync before rebooting from a panic"); SYSCTL_NODE(_kern, OID_AUTO, shutdown, CTLFLAG_RW, 0, "Shutdown environment"); /* * Variable panicstr contains argument to first call to panic; used as flag * to indicate that the kernel has already called panic. */ const char *panicstr; int dumping; /* system is dumping */ int rebooting; /* system is rebooting */ static struct dumperinfo dumper; /* our selected dumper */ /* Context information for dump-debuggers. */ static struct pcb dumppcb; /* Registers. */ static lwpid_t dumptid; /* Thread ID. */ static void boot(int) __dead2; static void poweroff_wait(void *, int); static void shutdown_halt(void *junk, int howto); static void shutdown_panic(void *junk, int howto); static void shutdown_reset(void *junk, int howto); /* register various local shutdown events */ static void shutdown_conf(void *unused) { EVENTHANDLER_REGISTER(shutdown_final, poweroff_wait, NULL, SHUTDOWN_PRI_FIRST); EVENTHANDLER_REGISTER(shutdown_final, shutdown_halt, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_panic, NULL, SHUTDOWN_PRI_LAST + 100); EVENTHANDLER_REGISTER(shutdown_final, shutdown_reset, NULL, SHUTDOWN_PRI_LAST + 200); } SYSINIT(shutdown_conf, SI_SUB_INTRINSIC, SI_ORDER_ANY, shutdown_conf, NULL) /* * The system call that results in a reboot. */ /* ARGSUSED */ int reboot(struct thread *td, struct reboot_args *uap) { int error; error = 0; #ifdef MAC error = mac_check_system_reboot(td->td_ucred, uap->opt); #endif if (error == 0) error = priv_check(td, PRIV_REBOOT); if (error == 0) { mtx_lock(&Giant); boot(uap->opt); mtx_unlock(&Giant); } return (error); } /* * Called by events that want to shut down.. e.g on a PC */ static int shutdown_howto = 0; void shutdown_nice(int howto) { shutdown_howto = howto; /* Send a signal to init(8) and have it shutdown the world */ if (initproc != NULL) { PROC_LOCK(initproc); psignal(initproc, SIGINT); PROC_UNLOCK(initproc); } else { /* No init(8) running, so simply reboot */ boot(RB_NOSYNC); } return; } static int waittime = -1; static void print_uptime(void) { int f; struct timespec ts; getnanouptime(&ts); printf("Uptime: "); f = 0; if (ts.tv_sec >= 86400) { printf("%ldd", (long)ts.tv_sec / 86400); ts.tv_sec %= 86400; f = 1; } if (f || ts.tv_sec >= 3600) { printf("%ldh", (long)ts.tv_sec / 3600); ts.tv_sec %= 3600; f = 1; } if (f || ts.tv_sec >= 60) { printf("%ldm", (long)ts.tv_sec / 60); ts.tv_sec %= 60; f = 1; } printf("%lds\n", (long)ts.tv_sec); } static void doadump(void) { /* * Sometimes people have to call this from the kernel debugger. * (if 'panic' can not dump) * Give them a clue as to why they can't dump. */ if (dumper.dumper == NULL) { printf("Cannot dump. No dump device defined.\n"); return; } savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; dumpsys(&dumper); } static int isbufbusy(struct buf *bp) { if (((bp->b_flags & (B_INVAL | B_PERSISTENT)) == 0 && BUF_REFCNT(bp) > 0) || ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI)) return (1); return (0); } /* * Shutdown the system cleanly to prepare for reboot, halt, or power off. */ static void boot(int howto) { static int first_buf_printf = 1; #if defined(SMP) /* * Bind us to CPU 0 so that all shutdown code runs there. Some * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ thread_lock(curthread); sched_bind(curthread, 0); thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); #endif /* We're in the process of rebooting. */ rebooting = 1; /* collect extra flags that shutdown_nice might have set */ howto |= shutdown_howto; /* We are out of the debugger now. */ kdb_active = 0; /* * Do any callouts that should be done BEFORE syncing the filesystems. */ EVENTHANDLER_INVOKE(shutdown_pre_sync, howto); /* * Now sync filesystems */ if (!cold && (howto & RB_NOSYNC) == 0 && waittime < 0) { register struct buf *bp; int iter, nbusy, pbusy; #ifndef PREEMPTION int subiter; #endif waittime = 0; sync(curthread, NULL); /* * With soft updates, some buffers that are * written will be remarked as dirty until other * buffers are written. */ for (iter = pbusy = 0; iter < 20; iter++) { nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) if (isbufbusy(bp)) nbusy++; if (nbusy == 0) { if (first_buf_printf) printf("All buffers synced."); break; } if (first_buf_printf) { printf("Syncing disks, buffers remaining... "); first_buf_printf = 0; } printf("%d ", nbusy); if (nbusy < pbusy) iter = 0; pbusy = nbusy; sync(curthread, NULL); #ifdef PREEMPTION /* * Drop Giant and spin for a while to allow * interrupt threads to run. */ DROP_GIANT(); DELAY(50000 * iter); PICKUP_GIANT(); #else /* * Drop Giant and context switch several times to * allow interrupt threads to run. */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { thread_lock(curthread); mi_switch(SW_VOL, NULL); thread_unlock(curthread); DELAY(1000); } PICKUP_GIANT(); #endif } printf("\n"); /* * Count only busy local buffers to prevent forcing * a fsck if we're just a client of a wedged NFS server */ nbusy = 0; for (bp = &buf[nbuf]; --bp >= buf; ) { if (isbufbusy(bp)) { #if 0 /* XXX: This is bogus. We should probably have a BO_REMOTE flag instead */ if (bp->b_dev == NULL) { TAILQ_REMOVE(&mountlist, bp->b_vp->v_mount, mnt_list); continue; } #endif nbusy++; #if defined(SHOW_BUSYBUFS) || defined(DIAGNOSTIC) printf( "%d: bufobj:%p, flags:%0x, blkno:%ld, lblkno:%ld\n", nbusy, bp->b_bufobj, bp->b_flags, (long)bp->b_blkno, (long)bp->b_lblkno); #endif } } if (nbusy) { /* * Failed to sync all blocks. Indicate this and don't * unmount filesystems (thus forcing an fsck on reboot). */ printf("Giving up on %d buffers\n", nbusy); DELAY(5000000); /* 5 seconds */ } else { if (!first_buf_printf) printf("Final sync complete\n"); /* * Unmount filesystems */ if (panicstr == 0) vfs_unmountall(); } swapoff_all(); DELAY(100000); /* wait for console output to finish */ } print_uptime(); /* * Ok, now do things that assume all filesystem activity has * been completed. */ EVENTHANDLER_INVOKE(shutdown_post_sync, howto); /* XXX This doesn't disable interrupts any more. Reconsider? */ splhigh(); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) doadump(); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); for(;;) ; /* safety against shutdown_reset not working */ /* NOTREACHED */ } /* * If the shutdown was a clean halt, behave accordingly. */ static void shutdown_halt(void *junk, int howto) { if (howto & RB_HALT) { printf("\n"); printf("The operating system has halted.\n"); printf("Please press any key to reboot.\n\n"); switch (cngetc()) { case -1: /* No console, just die */ cpu_halt(); /* NOTREACHED */ default: howto &= ~RB_HALT; break; } } } /* * Check to see if the system paniced, pause and then reboot * according to the specified delay. */ static void shutdown_panic(void *junk, int howto) { int loop; if (howto & RB_DUMP) { if (PANIC_REBOOT_WAIT_TIME != 0) { if (PANIC_REBOOT_WAIT_TIME != -1) { printf("Automatic reboot in %d seconds - " "press a key on the console to abort\n", PANIC_REBOOT_WAIT_TIME); for (loop = PANIC_REBOOT_WAIT_TIME * 10; loop > 0; --loop) { DELAY(1000 * 100); /* 1/10th second */ /* Did user type a key? */ if (cncheckc() != -1) break; } if (!loop) return; } } else { /* zero time specified - reboot NOW */ return; } printf("--> Press a key on the console to reboot,\n"); printf("--> or switch off the system now.\n"); cngetc(); } } /* * Everything done, now reset */ static void shutdown_reset(void *junk, int howto) { printf("Rebooting...\n"); DELAY(1000000); /* wait 1 sec for printf's to complete and be read */ /* cpu_boot(howto); */ /* doesn't do anything at the moment */ cpu_reset(); /* NOTREACHED */ /* assuming reset worked */ } #ifdef SMP static u_int panic_cpu = NOCPU; #endif /* * Panic is called on unresolvable fatal errors. It prints "panic: mesg", * and then reboots. If we are called twice, then we avoid trying to sync * the disks as this often leads to recursive panics. */ void panic(const char *fmt, ...) { struct thread *td = curthread; int bootopt, newpanic; va_list ap; static char buf[256]; #ifdef SMP /* * We don't want multiple CPU's to panic at the same time, so we * use panic_cpu as a simple spinlock. We have to keep checking * panic_cpu if we are spinning in case the panic on the first * CPU is canceled. */ if (panic_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&panic_cpu, NOCPU, PCPU_GET(cpuid)) == 0) while (panic_cpu != NOCPU) ; /* nothing */ #endif bootopt = RB_AUTOBOOT | RB_DUMP; newpanic = 0; if (panicstr) bootopt |= RB_NOSYNC; else { panicstr = fmt; newpanic = 1; } va_start(ap, fmt); if (newpanic) { (void)vsnprintf(buf, sizeof(buf), fmt, ap); panicstr = buf; printf("panic: %s\n", buf); } else { printf("panic: "); vprintf(fmt, ap); printf("\n"); } va_end(ap); #ifdef SMP printf("cpuid = %d\n", PCPU_GET(cpuid)); #endif #ifdef KDB if (newpanic && trace_on_panic) kdb_backtrace(); if (debugger_on_panic) - kdb_enter("panic"); + kdb_enter_why(KDB_WHY_PANIC, "panic"); #ifdef RESTARTABLE_PANICS /* See if the user aborted the panic, in which case we continue. */ if (panicstr == NULL) { #ifdef SMP atomic_store_rel_int(&panic_cpu, NOCPU); #endif return; } #endif #endif /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; boot(bootopt); } /* * Support for poweroff delay. */ #ifndef POWEROFF_DELAY # define POWEROFF_DELAY 5000 #endif static int poweroff_delay = POWEROFF_DELAY; SYSCTL_INT(_kern_shutdown, OID_AUTO, poweroff_delay, CTLFLAG_RW, &poweroff_delay, 0, ""); static void poweroff_wait(void *junk, int howto) { if (!(howto & RB_POWEROFF) || poweroff_delay <= 0) return; DELAY(poweroff_delay * 1000); } /* * Some system processes (e.g. syncer) need to be stopped at appropriate * points in their main loops prior to a system shutdown, so that they * won't interfere with the shutdown process (e.g. by holding a disk buf * to cause sync to fail). For each of these system processes, register * shutdown_kproc() as a handler for one of shutdown events. */ static int kproc_shutdown_wait = 60; SYSCTL_INT(_kern_shutdown, OID_AUTO, kproc_shutdown_wait, CTLFLAG_RW, &kproc_shutdown_wait, 0, ""); void kproc_shutdown(void *arg, int howto) { struct proc *p; char procname[MAXCOMLEN + 1]; int error; if (panicstr) return; p = (struct proc *)arg; strlcpy(procname, p->p_comm, sizeof(procname)); printf("Waiting (max %d seconds) for system process `%s' to stop...", kproc_shutdown_wait, procname); error = kthread_suspend(p, kproc_shutdown_wait * hz); if (error == EWOULDBLOCK) printf("timed out\n"); else printf("done\n"); } /* Registration of dumpers */ int set_dumper(struct dumperinfo *di) { if (di == NULL) { bzero(&dumper, sizeof dumper); return (0); } if (dumper.dumper != NULL) return (EBUSY); dumper = *di; return (0); } /* Call dumper with bounds checking. */ int dump_write(struct dumperinfo *di, void *virtual, vm_offset_t physical, off_t offset, size_t length) { if (length != 0 && (offset < di->mediaoffset || offset - di->mediaoffset + length > di->mediasize)) { printf("Attempt to write outside dump device boundaries.\n"); return (ENXIO); } return (di->dumper(di->priv, virtual, physical, offset, length)); } #if defined(__powerpc__) void dumpsys(struct dumperinfo *di __unused) { printf("Kernel dumps not implemented on this architecture\n"); } #endif Index: stable/7/sys/kern/subr_kdb.c =================================================================== --- stable/7/sys/kern/subr_kdb.c (revision 177733) +++ stable/7/sys/kern/subr_kdb.c (revision 177734) @@ -1,514 +1,536 @@ /*- * Copyright (c) 2004 The FreeBSD Project * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif int kdb_active = 0; void *kdb_jmpbufp = NULL; struct kdb_dbbe *kdb_dbbe = NULL; struct pcb kdb_pcb; struct pcb *kdb_thrctx = NULL; struct thread *kdb_thread = NULL; struct trapframe *kdb_frame = NULL; KDB_BACKEND(null, NULL, NULL, NULL); SET_DECLARE(kdb_dbbe_set, struct kdb_dbbe); static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS); static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS); SYSCTL_NODE(_debug, OID_AUTO, kdb, CTLFLAG_RW, NULL, "KDB nodes"); SYSCTL_PROC(_debug_kdb, OID_AUTO, available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, kdb_sysctl_available, "A", "list of available KDB backends"); SYSCTL_PROC(_debug_kdb, OID_AUTO, current, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, kdb_sysctl_current, "A", "currently selected KDB backend"); SYSCTL_PROC(_debug_kdb, OID_AUTO, enter, CTLTYPE_INT | CTLFLAG_RW, 0, 0, kdb_sysctl_enter, "I", "set to enter the debugger"); SYSCTL_PROC(_debug_kdb, OID_AUTO, panic, CTLTYPE_INT | CTLFLAG_RW, 0, 0, kdb_sysctl_panic, "I", "set to panic the kernel"); SYSCTL_PROC(_debug_kdb, OID_AUTO, trap, CTLTYPE_INT | CTLFLAG_RW, 0, 0, kdb_sysctl_trap, "I", "set to cause a page fault via data access"); SYSCTL_PROC(_debug_kdb, OID_AUTO, trap_code, CTLTYPE_INT | CTLFLAG_RW, 0, 0, kdb_sysctl_trap_code, "I", "set to cause a page fault via code access"); /* * Flag indicating whether or not to IPI the other CPUs to stop them on * entering the debugger. Sometimes, this will result in a deadlock as * stop_cpus() waits for the other cpus to stop, so we allow it to be * disabled. */ #ifdef SMP static int kdb_stop_cpus = 1; SYSCTL_INT(_debug_kdb, OID_AUTO, stop_cpus, CTLTYPE_INT | CTLFLAG_RW, &kdb_stop_cpus, 0, "stop other CPUs when entering the debugger"); TUNABLE_INT("debug.kdb.stop_cpus", &kdb_stop_cpus); #endif +/* + * Flag to indicate to debuggers why the debugger was entered. + */ +const char * volatile kdb_why = KDB_WHY_UNSET; + static int kdb_sysctl_available(SYSCTL_HANDLER_ARGS) { struct kdb_dbbe *be, **iter; char *avail, *p; ssize_t len, sz; int error; sz = 0; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0) sz += strlen(be->dbbe_name) + 1; } sz++; avail = malloc(sz, M_TEMP, M_WAITOK); p = avail; *p = '\0'; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0) { len = snprintf(p, sz, "%s ", be->dbbe_name); p += len; sz -= len; } } KASSERT(sz >= 0, ("%s", __func__)); error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } static int kdb_sysctl_current(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; if (kdb_dbbe != NULL) { strncpy(buf, kdb_dbbe->dbbe_name, sizeof(buf)); buf[sizeof(buf) - 1] = '\0'; } else *buf = '\0'; error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); if (kdb_active) return (EBUSY); return (kdb_dbbe_select(buf)); } static int kdb_sysctl_enter(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); if (kdb_active) return (EBUSY); - kdb_enter("sysctl debug.kdb.enter"); + kdb_enter_why(KDB_WHY_SYSCTL, "sysctl debug.kdb.enter"); return (0); } static int kdb_sysctl_panic(SYSCTL_HANDLER_ARGS) { int error, i; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); panic("kdb_sysctl_panic"); return (0); } static int kdb_sysctl_trap(SYSCTL_HANDLER_ARGS) { int error, i; int *addr = (int *)0x10; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); return (*addr); } static int kdb_sysctl_trap_code(SYSCTL_HANDLER_ARGS) { int error, i; void (*fp)(u_int, u_int, u_int) = (void *)0xdeadc0de; error = sysctl_wire_old_buffer(req, sizeof(int)); if (error == 0) { i = 0; error = sysctl_handle_int(oidp, &i, 0, req); } if (error != 0 || req->newptr == NULL) return (error); (*fp)(0x11111111, 0x22222222, 0x33333333); return (0); } /* * Solaris implements a new BREAK which is initiated by a character sequence * CR ~ ^b which is similar to a familiar pattern used on Sun servers by the * Remote Console. * * Note that this function may be called from almost anywhere, with interrupts * disabled and with unknown locks held, so it must not access data other than * its arguments. Its up to the caller to ensure that the state variable is * consistent. */ #define KEY_CR 13 /* CR '\r' */ #define KEY_TILDE 126 /* ~ */ #define KEY_CRTLB 2 /* ^B */ int kdb_alt_break(int key, int *state) { int brk; brk = 0; switch (key) { case KEY_CR: *state = KEY_TILDE; break; case KEY_TILDE: *state = (*state == KEY_TILDE) ? KEY_CRTLB : 0; break; case KEY_CRTLB: if (*state == KEY_CRTLB) brk = 1; /* FALLTHROUGH */ default: *state = 0; break; } return (brk); } /* * Print a backtrace of the calling thread. The backtrace is generated by * the selected debugger, provided it supports backtraces. If no debugger * is selected or the current debugger does not support backtraces, this * function silently returns. */ void kdb_backtrace() { if (kdb_dbbe != NULL && kdb_dbbe->dbbe_trace != NULL) { printf("KDB: stack backtrace:\n"); kdb_dbbe->dbbe_trace(); } } /* * Set/change the current backend. */ int kdb_dbbe_select(const char *name) { struct kdb_dbbe *be, **iter; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0 && strcmp(be->dbbe_name, name) == 0) { kdb_dbbe = be; return (0); } } return (EINVAL); } /* * Enter the currently selected debugger. If a message has been provided, * it is printed first. If the debugger does not support the enter method, * it is entered by using breakpoint(), which enters the debugger through - * kdb_trap(). + * kdb_trap(). The 'why' argument will contain a more mechanically usable + * string than 'msg', and is relied upon by DDB scripting to identify the + * reason for entering the debugger so that the right script can be run. */ - void -kdb_enter(const char *msg) +kdb_enter_why(const char *why, const char *msg) { if (kdb_dbbe != NULL && kdb_active == 0) { if (msg != NULL) printf("KDB: enter: %s\n", msg); + kdb_why = why; breakpoint(); + kdb_why = KDB_WHY_UNSET; } +} + +/* + * This compatibility function exists so that kernel modules with existing + * dependencies will still link and function. In FreeBSD 8.0, kdb_enter() is + * kdb_enter_why(). In 7.x, this limits how KDB can use the entry call, and + * means that there won't be specific scripts for event that use the legacy + * interface. + */ +void +kdb_enter(const char *msg) +{ + + kdb_enter_why(KDB_WHY_UNSET, msg); } /* * Initialize the kernel debugger interface. */ void kdb_init() { struct kdb_dbbe *be, **iter; int cur_pri, pri; kdb_active = 0; kdb_dbbe = NULL; cur_pri = -1; SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; pri = (be->dbbe_init != NULL) ? be->dbbe_init() : -1; be->dbbe_active = (pri >= 0) ? 0 : -1; if (pri > cur_pri) { cur_pri = pri; kdb_dbbe = be; } } if (kdb_dbbe != NULL) { printf("KDB: debugger backends:"); SET_FOREACH(iter, kdb_dbbe_set) { be = *iter; if (be->dbbe_active == 0) printf(" %s", be->dbbe_name); } printf("\n"); printf("KDB: current backend: %s\n", kdb_dbbe->dbbe_name); } } /* * Handle contexts. */ void * kdb_jmpbuf(jmp_buf new) { void *old; old = kdb_jmpbufp; kdb_jmpbufp = new; return (old); } void kdb_reenter(void) { if (!kdb_active || kdb_jmpbufp == NULL) return; longjmp(kdb_jmpbufp, 1); /* NOTREACHED */ } /* * Thread related support functions. */ struct pcb * kdb_thr_ctx(struct thread *thr) { #if defined(SMP) && defined(KDB_STOPPEDPCB) struct pcpu *pc; #endif if (thr == curthread) return (&kdb_pcb); #if defined(SMP) && defined(KDB_STOPPEDPCB) SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask)) return (KDB_STOPPEDPCB(pc)); } #endif return (thr->td_pcb); } struct thread * kdb_thr_first(void) { struct proc *p; struct thread *thr; p = LIST_FIRST(&allproc); while (p != NULL) { if (p->p_flag & P_INMEM) { thr = FIRST_THREAD_IN_PROC(p); if (thr != NULL) return (thr); } p = LIST_NEXT(p, p_list); } return (NULL); } struct thread * kdb_thr_from_pid(pid_t pid) { struct proc *p; p = LIST_FIRST(&allproc); while (p != NULL) { if (p->p_flag & P_INMEM && p->p_pid == pid) return (FIRST_THREAD_IN_PROC(p)); p = LIST_NEXT(p, p_list); } return (NULL); } struct thread * kdb_thr_lookup(lwpid_t tid) { struct thread *thr; thr = kdb_thr_first(); while (thr != NULL && thr->td_tid != tid) thr = kdb_thr_next(thr); return (thr); } struct thread * kdb_thr_next(struct thread *thr) { struct proc *p; p = thr->td_proc; thr = TAILQ_NEXT(thr, td_plist); do { if (thr != NULL) return (thr); p = LIST_NEXT(p, p_list); if (p != NULL && (p->p_flag & P_INMEM)) thr = FIRST_THREAD_IN_PROC(p); } while (p != NULL); return (NULL); } int kdb_thr_select(struct thread *thr) { if (thr == NULL) return (EINVAL); kdb_thread = thr; kdb_thrctx = kdb_thr_ctx(thr); return (0); } /* * Enter the debugger due to a trap. */ int kdb_trap(int type, int code, struct trapframe *tf) { register_t intr; #ifdef SMP int did_stop_cpus; #endif int handled; if (kdb_dbbe == NULL || kdb_dbbe->dbbe_trap == NULL) return (0); /* We reenter the debugger through kdb_reenter(). */ if (kdb_active) return (0); intr = intr_disable(); #ifdef SMP if ((did_stop_cpus = kdb_stop_cpus) != 0) stop_cpus(PCPU_GET(other_cpus)); #endif kdb_active++; kdb_frame = tf; /* Let MD code do its thing first... */ kdb_cpu_trap(type, code); makectx(tf, &kdb_pcb); kdb_thr_select(curthread); handled = kdb_dbbe->dbbe_trap(type, code); kdb_active--; #ifdef SMP if (did_stop_cpus) restart_cpus(stopped_cpus); #endif intr_restore(intr); return (handled); } Index: stable/7/sys/kern/subr_witness.c =================================================================== --- stable/7/sys/kern/subr_witness.c (revision 177733) +++ stable/7/sys/kern/subr_witness.c (revision 177734) @@ -1,2031 +1,2031 @@ /*- * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from BSDI $Id: mutex_witness.c,v 1.1.2.20 2000/04/27 03:10:27 cp Exp $ * and BSDI $Id: synch_machdep.c,v 2.3.2.39 2000/04/27 03:10:25 cp Exp $ */ /* * Implementation of the `witness' lock verifier. Originally implemented for * mutexes in BSD/OS. Extended to handle generic lock objects and lock * classes in FreeBSD. */ /* * Main Entry: witness * Pronunciation: 'wit-n&s * Function: noun * Etymology: Middle English witnesse, from Old English witnes knowledge, * testimony, witness, from 2wit * Date: before 12th century * 1 : attestation of a fact or event : TESTIMONY * 2 : one that gives evidence; specifically : one who testifies in * a cause or before a judicial tribunal * 3 : one asked to be present at a transaction so as to be able to * testify to its having taken place * 4 : one who has personal knowledge of something * 5 a : something serving as evidence or proof : SIGN * b : public affirmation by word or example of usually * religious faith or conviction * 6 capitalized : a member of the Jehovah's Witnesses */ /* * Special rules concerning Giant and lock orders: * * 1) Giant must be acquired before any other mutexes. Stated another way, * no other mutex may be held when Giant is acquired. * * 2) Giant must be released when blocking on a sleepable lock. * * This rule is less obvious, but is a result of Giant providing the same * semantics as spl(). Basically, when a thread sleeps, it must release * Giant. When a thread blocks on a sleepable lock, it sleeps. Hence rule * 2). * * 3) Giant may be acquired before or after sleepable locks. * * This rule is also not quite as obvious. Giant may be acquired after * a sleepable lock because it is a non-sleepable lock and non-sleepable * locks may always be acquired while holding a sleepable lock. The second * case, Giant before a sleepable lock, follows from rule 2) above. Suppose * you have two threads T1 and T2 and a sleepable lock X. Suppose that T1 * acquires X and blocks on Giant. Then suppose that T2 acquires Giant and * blocks on X. When T2 blocks on X, T2 will release Giant allowing T1 to * execute. Thus, acquiring Giant both before and after a sleepable lock * will not result in a lock order reversal. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_hwpmc_hooks.h" #include "opt_witness.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Note that these traces do not work with KTR_ALQ. */ #if 0 #define KTR_WITNESS KTR_SUBSYS #else #define KTR_WITNESS 0 #endif /* Easier to stay with the old names. */ #define lo_list lo_witness_data.lod_list #define lo_witness lo_witness_data.lod_witness /* Define this to check for blessed mutexes */ #undef BLESSING #define WITNESS_COUNT 1024 #define WITNESS_CHILDCOUNT (WITNESS_COUNT * 4) /* * XXX: This is somewhat bogus, as we assume here that at most 1024 threads * will hold LOCK_NCHILDREN * 2 locks. We handle failure ok, and we should * probably be safe for the most part, but it's still a SWAG. */ #define LOCK_CHILDCOUNT (MAXCPU + 1024) * 2 #define WITNESS_NCHILDREN 6 struct witness_child_list_entry; struct witness { const char *w_name; struct lock_class *w_class; STAILQ_ENTRY(witness) w_list; /* List of all witnesses. */ STAILQ_ENTRY(witness) w_typelist; /* Witnesses of a type. */ struct witness_child_list_entry *w_children; /* Great evilness... */ const char *w_file; int w_line; u_int w_level; u_int w_refcount; u_char w_Giant_squawked:1; u_char w_other_squawked:1; u_char w_same_squawked:1; u_char w_displayed:1; }; struct witness_child_list_entry { struct witness_child_list_entry *wcl_next; struct witness *wcl_children[WITNESS_NCHILDREN]; u_int wcl_count; }; STAILQ_HEAD(witness_list, witness); #ifdef BLESSING struct witness_blessed { const char *b_lock1; const char *b_lock2; }; #endif struct witness_order_list_entry { const char *w_name; struct lock_class *w_class; }; #ifdef BLESSING static int blessed(struct witness *, struct witness *); #endif static int depart(struct witness *w); static struct witness *enroll(const char *description, struct lock_class *lock_class); static int insertchild(struct witness *parent, struct witness *child); static int isitmychild(struct witness *parent, struct witness *child); static int isitmydescendant(struct witness *parent, struct witness *child); static int itismychild(struct witness *parent, struct witness *child); static void removechild(struct witness *parent, struct witness *child); static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS); static const char *fixup_filename(const char *file); static struct witness *witness_get(void); static void witness_free(struct witness *m); static struct witness_child_list_entry *witness_child_get(void); static void witness_child_free(struct witness_child_list_entry *wcl); static struct lock_list_entry *witness_lock_list_get(void); static void witness_lock_list_free(struct lock_list_entry *lle); static struct lock_instance *find_instance(struct lock_list_entry *lock_list, struct lock_object *lock); static void witness_list_lock(struct lock_instance *instance); #ifdef DDB static void witness_leveldescendents(struct witness *parent, int level); static void witness_levelall(void); static void witness_displaydescendants(void(*)(const char *fmt, ...), struct witness *, int indent); static void witness_display_list(void(*prnt)(const char *fmt, ...), struct witness_list *list); static void witness_display(void(*)(const char *fmt, ...)); static void witness_list(struct thread *td); #endif SYSCTL_NODE(_debug, OID_AUTO, witness, CTLFLAG_RW, 0, "Witness Locking"); /* * If set to 0, witness is disabled. If set to a non-zero value, witness * performs full lock order checking for all locks. At runtime, this * value may be set to 0 to turn off witness. witness is not allowed be * turned on once it is turned off, however. */ static int witness_watch = 1; TUNABLE_INT("debug.witness.watch", &witness_watch); SYSCTL_PROC(_debug_witness, OID_AUTO, watch, CTLFLAG_RW | CTLTYPE_INT, NULL, 0, sysctl_debug_witness_watch, "I", "witness is watching lock operations"); #ifdef KDB /* * When KDB is enabled and witness_kdb is set to 1, it will cause the system * to drop into kdebug() when: * - a lock hierarchy violation occurs * - locks are held when going to sleep. */ #ifdef WITNESS_KDB int witness_kdb = 1; #else int witness_kdb = 0; #endif TUNABLE_INT("debug.witness.kdb", &witness_kdb); SYSCTL_INT(_debug_witness, OID_AUTO, kdb, CTLFLAG_RW, &witness_kdb, 0, ""); /* * When KDB is enabled and witness_trace is set to 1, it will cause the system * to print a stack trace: * - a lock hierarchy violation occurs * - locks are held when going to sleep. */ int witness_trace = 1; TUNABLE_INT("debug.witness.trace", &witness_trace); SYSCTL_INT(_debug_witness, OID_AUTO, trace, CTLFLAG_RW, &witness_trace, 0, ""); #endif /* KDB */ #ifdef WITNESS_SKIPSPIN int witness_skipspin = 1; #else int witness_skipspin = 0; #endif TUNABLE_INT("debug.witness.skipspin", &witness_skipspin); SYSCTL_INT(_debug_witness, OID_AUTO, skipspin, CTLFLAG_RDTUN, &witness_skipspin, 0, ""); static struct mtx w_mtx; static struct witness_list w_free = STAILQ_HEAD_INITIALIZER(w_free); static struct witness_list w_all = STAILQ_HEAD_INITIALIZER(w_all); static struct witness_list w_spin = STAILQ_HEAD_INITIALIZER(w_spin); static struct witness_list w_sleep = STAILQ_HEAD_INITIALIZER(w_sleep); static struct witness_child_list_entry *w_child_free = NULL; static struct lock_list_entry *w_lock_list_free = NULL; static int w_free_cnt, w_spin_cnt, w_sleep_cnt, w_child_free_cnt, w_child_cnt; SYSCTL_INT(_debug_witness, OID_AUTO, free_cnt, CTLFLAG_RD, &w_free_cnt, 0, ""); SYSCTL_INT(_debug_witness, OID_AUTO, spin_cnt, CTLFLAG_RD, &w_spin_cnt, 0, ""); SYSCTL_INT(_debug_witness, OID_AUTO, sleep_cnt, CTLFLAG_RD, &w_sleep_cnt, 0, ""); SYSCTL_INT(_debug_witness, OID_AUTO, child_free_cnt, CTLFLAG_RD, &w_child_free_cnt, 0, ""); SYSCTL_INT(_debug_witness, OID_AUTO, child_cnt, CTLFLAG_RD, &w_child_cnt, 0, ""); static struct witness w_data[WITNESS_COUNT]; static struct witness_child_list_entry w_childdata[WITNESS_CHILDCOUNT]; static struct lock_list_entry w_locklistdata[LOCK_CHILDCOUNT]; static struct witness_order_list_entry order_lists[] = { /* * sx locks */ { "proctree", &lock_class_sx }, { "allproc", &lock_class_sx }, { "allprison", &lock_class_sx }, { NULL, NULL }, /* * Various mutexes */ { "Giant", &lock_class_mtx_sleep }, { "pipe mutex", &lock_class_mtx_sleep }, { "sigio lock", &lock_class_mtx_sleep }, { "process group", &lock_class_mtx_sleep }, { "process lock", &lock_class_mtx_sleep }, { "session", &lock_class_mtx_sleep }, { "uidinfo hash", &lock_class_mtx_sleep }, { "uidinfo struct", &lock_class_mtx_sleep }, #ifdef HWPMC_HOOKS { "pmc-sleep", &lock_class_mtx_sleep }, #endif { NULL, NULL }, /* * Sockets */ { "accept", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { "so_rcv", &lock_class_mtx_sleep }, { "sellck", &lock_class_mtx_sleep }, { NULL, NULL }, /* * Routing */ { "so_rcv", &lock_class_mtx_sleep }, { "radix node head", &lock_class_mtx_sleep }, { "rtentry", &lock_class_mtx_sleep }, { "ifaddr", &lock_class_mtx_sleep }, { NULL, NULL }, /* * Multicast - protocol locks before interface locks, after UDP locks. */ { "udpinp", &lock_class_mtx_sleep }, { "in_multi_mtx", &lock_class_mtx_sleep }, { "igmp_mtx", &lock_class_mtx_sleep }, { "if_addr_mtx", &lock_class_mtx_sleep }, { NULL, NULL }, /* * UNIX Domain Sockets */ { "unp", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, /* * UDP/IP */ { "udp", &lock_class_mtx_sleep }, { "udpinp", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, /* * TCP/IP */ { "tcp", &lock_class_mtx_sleep }, { "tcpinp", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, /* * SLIP */ { "slip_mtx", &lock_class_mtx_sleep }, { "slip sc_mtx", &lock_class_mtx_sleep }, { NULL, NULL }, /* * netatalk */ { "ddp_list_mtx", &lock_class_mtx_sleep }, { "ddp_mtx", &lock_class_mtx_sleep }, { NULL, NULL }, /* * BPF */ { "bpf global lock", &lock_class_mtx_sleep }, { "bpf interface lock", &lock_class_mtx_sleep }, { "bpf cdev lock", &lock_class_mtx_sleep }, { NULL, NULL }, /* * NFS server */ { "nfsd_mtx", &lock_class_mtx_sleep }, { "so_snd", &lock_class_mtx_sleep }, { NULL, NULL }, /* * IEEE 802.11 */ { "802.11 com lock", &lock_class_mtx_sleep}, { NULL, NULL }, /* * Network drivers */ { "network driver", &lock_class_mtx_sleep}, { NULL, NULL }, /* * Netgraph */ { "ng_node", &lock_class_mtx_sleep }, { "ng_worklist", &lock_class_mtx_sleep }, { NULL, NULL }, /* * CDEV */ { "system map", &lock_class_mtx_sleep }, { "vm page queue mutex", &lock_class_mtx_sleep }, { "vnode interlock", &lock_class_mtx_sleep }, { "cdev", &lock_class_mtx_sleep }, { NULL, NULL }, /* * kqueue/VFS interaction */ { "kqueue", &lock_class_mtx_sleep }, { "struct mount mtx", &lock_class_mtx_sleep }, { "vnode interlock", &lock_class_mtx_sleep }, { NULL, NULL }, /* * spin locks */ #ifdef SMP { "ap boot", &lock_class_mtx_spin }, #endif { "rm.mutex_mtx", &lock_class_mtx_spin }, { "sio", &lock_class_mtx_spin }, { "scrlock", &lock_class_mtx_spin }, #ifdef __i386__ { "cy", &lock_class_mtx_spin }, #endif #ifdef __sparc64__ { "pcib_mtx", &lock_class_mtx_spin }, { "rtc_mtx", &lock_class_mtx_spin }, #endif { "scc_hwmtx", &lock_class_mtx_spin }, { "uart_hwmtx", &lock_class_mtx_spin }, { "fast_taskqueue", &lock_class_mtx_spin }, { "intr table", &lock_class_mtx_spin }, #ifdef HWPMC_HOOKS { "pmc-per-proc", &lock_class_mtx_spin }, #endif { "process slock", &lock_class_mtx_spin }, { "sleepq chain", &lock_class_mtx_spin }, { "umtx lock", &lock_class_mtx_spin }, { "turnstile chain", &lock_class_mtx_spin }, { "turnstile lock", &lock_class_mtx_spin }, { "sched lock", &lock_class_mtx_spin }, { "td_contested", &lock_class_mtx_spin }, { "callout", &lock_class_mtx_spin }, { "entropy harvest mutex", &lock_class_mtx_spin }, { "syscons video lock", &lock_class_mtx_spin }, { "time lock", &lock_class_mtx_spin }, #ifdef SMP { "smp rendezvous", &lock_class_mtx_spin }, #endif /* * leaf locks */ { "icu", &lock_class_mtx_spin }, #if defined(SMP) && defined(__sparc64__) { "ipi", &lock_class_mtx_spin }, #endif #ifdef __i386__ { "allpmaps", &lock_class_mtx_spin }, { "descriptor tables", &lock_class_mtx_spin }, #endif { "clk", &lock_class_mtx_spin }, { "mprof lock", &lock_class_mtx_spin }, { "kse lock", &lock_class_mtx_spin }, { "zombie lock", &lock_class_mtx_spin }, { "ALD Queue", &lock_class_mtx_spin }, #ifdef __ia64__ { "MCA spin lock", &lock_class_mtx_spin }, #endif #if defined(__i386__) || defined(__amd64__) { "pcicfg", &lock_class_mtx_spin }, { "NDIS thread lock", &lock_class_mtx_spin }, #endif { "tw_osl_io_lock", &lock_class_mtx_spin }, { "tw_osl_q_lock", &lock_class_mtx_spin }, { "tw_cl_io_lock", &lock_class_mtx_spin }, { "tw_cl_intr_lock", &lock_class_mtx_spin }, { "tw_cl_gen_lock", &lock_class_mtx_spin }, #ifdef HWPMC_HOOKS { "pmc-leaf", &lock_class_mtx_spin }, #endif { "blocked lock", &lock_class_mtx_spin }, { NULL, NULL }, { NULL, NULL } }; #ifdef BLESSING /* * Pairs of locks which have been blessed * Don't complain about order problems with blessed locks */ static struct witness_blessed blessed_list[] = { }; static int blessed_count = sizeof(blessed_list) / sizeof(struct witness_blessed); #endif /* * List of locks initialized prior to witness being initialized whose * enrollment is currently deferred. */ STAILQ_HEAD(, lock_object) pending_locks = STAILQ_HEAD_INITIALIZER(pending_locks); /* * This global is set to 0 once it becomes safe to use the witness code. */ static int witness_cold = 1; /* * This global is set to 1 once the static lock orders have been enrolled * so that a warning can be issued for any spin locks enrolled later. */ static int witness_spin_warn = 0; /* * The WITNESS-enabled diagnostic code. Note that the witness code does * assume that the early boot is single-threaded at least until after this * routine is completed. */ static void witness_initialize(void *dummy __unused) { struct lock_object *lock; struct witness_order_list_entry *order; struct witness *w, *w1; int i; /* * We have to release Giant before initializing its witness * structure so that WITNESS doesn't get confused. */ mtx_unlock(&Giant); mtx_assert(&Giant, MA_NOTOWNED); CTR1(KTR_WITNESS, "%s: initializing witness", __func__); mtx_init(&w_mtx, "witness lock", NULL, MTX_SPIN | MTX_QUIET | MTX_NOWITNESS | MTX_NOPROFILE); for (i = 0; i < WITNESS_COUNT; i++) witness_free(&w_data[i]); for (i = 0; i < WITNESS_CHILDCOUNT; i++) witness_child_free(&w_childdata[i]); for (i = 0; i < LOCK_CHILDCOUNT; i++) witness_lock_list_free(&w_locklistdata[i]); /* First add in all the specified order lists. */ for (order = order_lists; order->w_name != NULL; order++) { w = enroll(order->w_name, order->w_class); if (w == NULL) continue; w->w_file = "order list"; for (order++; order->w_name != NULL; order++) { w1 = enroll(order->w_name, order->w_class); if (w1 == NULL) continue; w1->w_file = "order list"; if (!itismychild(w, w1)) panic("Not enough memory for static orders!"); w = w1; } } witness_spin_warn = 1; /* Iterate through all locks and add them to witness. */ while (!STAILQ_EMPTY(&pending_locks)) { lock = STAILQ_FIRST(&pending_locks); STAILQ_REMOVE_HEAD(&pending_locks, lo_list); KASSERT(lock->lo_flags & LO_WITNESS, ("%s: lock %s is on pending list but not LO_WITNESS", __func__, lock->lo_name)); lock->lo_witness = enroll(lock->lo_type, LOCK_CLASS(lock)); } /* Mark the witness code as being ready for use. */ witness_cold = 0; mtx_lock(&Giant); } SYSINIT(witness_init, SI_SUB_WITNESS, SI_ORDER_FIRST, witness_initialize, NULL) static int sysctl_debug_witness_watch(SYSCTL_HANDLER_ARGS) { int error, value; value = witness_watch; error = sysctl_handle_int(oidp, &value, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (value == witness_watch) return (0); if (value != 0) return (EINVAL); witness_watch = 0; return (0); } void witness_init(struct lock_object *lock) { struct lock_class *class; /* Various sanity checks. */ class = LOCK_CLASS(lock); if ((lock->lo_flags & LO_RECURSABLE) != 0 && (class->lc_flags & LC_RECURSABLE) == 0) panic("%s: lock (%s) %s can not be recursable", __func__, class->lc_name, lock->lo_name); if ((lock->lo_flags & LO_SLEEPABLE) != 0 && (class->lc_flags & LC_SLEEPABLE) == 0) panic("%s: lock (%s) %s can not be sleepable", __func__, class->lc_name, lock->lo_name); if ((lock->lo_flags & LO_UPGRADABLE) != 0 && (class->lc_flags & LC_UPGRADABLE) == 0) panic("%s: lock (%s) %s can not be upgradable", __func__, class->lc_name, lock->lo_name); /* * If we shouldn't watch this lock, then just clear lo_witness. * Otherwise, if witness_cold is set, then it is too early to * enroll this lock, so defer it to witness_initialize() by adding * it to the pending_locks list. If it is not too early, then enroll * the lock now. */ if (witness_watch == 0 || panicstr != NULL || (lock->lo_flags & LO_WITNESS) == 0) lock->lo_witness = NULL; else if (witness_cold) { STAILQ_INSERT_TAIL(&pending_locks, lock, lo_list); lock->lo_flags |= LO_ENROLLPEND; } else lock->lo_witness = enroll(lock->lo_type, class); } void witness_destroy(struct lock_object *lock) { struct lock_class *class; struct witness *w; class = LOCK_CLASS(lock); if (witness_cold) panic("lock (%s) %s destroyed while witness_cold", class->lc_name, lock->lo_name); /* XXX: need to verify that no one holds the lock */ if ((lock->lo_flags & (LO_WITNESS | LO_ENROLLPEND)) == LO_WITNESS && lock->lo_witness != NULL) { w = lock->lo_witness; mtx_lock_spin(&w_mtx); MPASS(w->w_refcount > 0); w->w_refcount--; /* * Lock is already released if we have an allocation failure * and depart() fails. */ if (w->w_refcount != 0 || depart(w)) mtx_unlock_spin(&w_mtx); } /* * If this lock is destroyed before witness is up and running, * remove it from the pending list. */ if (lock->lo_flags & LO_ENROLLPEND) { STAILQ_REMOVE(&pending_locks, lock, lock_object, lo_list); lock->lo_flags &= ~LO_ENROLLPEND; } } #ifdef DDB static void witness_levelall (void) { struct witness_list *list; struct witness *w, *w1; /* * First clear all levels. */ STAILQ_FOREACH(w, &w_all, w_list) { w->w_level = 0; } /* * Look for locks with no parent and level all their descendants. */ STAILQ_FOREACH(w, &w_all, w_list) { /* * This is just an optimization, technically we could get * away just walking the all list each time. */ if (w->w_class->lc_flags & LC_SLEEPLOCK) list = &w_sleep; else list = &w_spin; STAILQ_FOREACH(w1, list, w_typelist) { if (isitmychild(w1, w)) goto skip; } witness_leveldescendents(w, 0); skip: ; /* silence GCC 3.x */ } } static void witness_leveldescendents(struct witness *parent, int level) { struct witness_child_list_entry *wcl; int i; if (parent->w_level < level) parent->w_level = level; level++; for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) for (i = 0; i < wcl->wcl_count; i++) witness_leveldescendents(wcl->wcl_children[i], level); } static void witness_displaydescendants(void(*prnt)(const char *fmt, ...), struct witness *parent, int indent) { struct witness_child_list_entry *wcl; int i, level; level = parent->w_level; prnt("%-2d", level); for (i = 0; i < indent; i++) prnt(" "); if (parent->w_refcount > 0) prnt("%s", parent->w_name); else prnt("(dead)"); if (parent->w_displayed) { prnt(" -- (already displayed)\n"); return; } parent->w_displayed = 1; if (parent->w_refcount > 0) { if (parent->w_file != NULL) prnt(" -- last acquired @ %s:%d", parent->w_file, parent->w_line); } prnt("\n"); for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) for (i = 0; i < wcl->wcl_count; i++) witness_displaydescendants(prnt, wcl->wcl_children[i], indent + 1); } static void witness_display_list(void(*prnt)(const char *fmt, ...), struct witness_list *list) { struct witness *w; STAILQ_FOREACH(w, list, w_typelist) { if (w->w_file == NULL || w->w_level > 0) continue; /* * This lock has no anscestors, display its descendants. */ witness_displaydescendants(prnt, w, 0); } } static void witness_display(void(*prnt)(const char *fmt, ...)) { struct witness *w; KASSERT(!witness_cold, ("%s: witness_cold", __func__)); witness_levelall(); /* Clear all the displayed flags. */ STAILQ_FOREACH(w, &w_all, w_list) { w->w_displayed = 0; } /* * First, handle sleep locks which have been acquired at least * once. */ prnt("Sleep locks:\n"); witness_display_list(prnt, &w_sleep); /* * Now do spin locks which have been acquired at least once. */ prnt("\nSpin locks:\n"); witness_display_list(prnt, &w_spin); /* * Finally, any locks which have not been acquired yet. */ prnt("\nLocks which were never acquired:\n"); STAILQ_FOREACH(w, &w_all, w_list) { if (w->w_file != NULL || w->w_refcount == 0) continue; prnt("%s\n", w->w_name); } } #endif /* DDB */ /* Trim useless garbage from filenames. */ static const char * fixup_filename(const char *file) { if (file == NULL) return (NULL); while (strncmp(file, "../", 3) == 0) file += 3; return (file); } int witness_defineorder(struct lock_object *lock1, struct lock_object *lock2) { if (witness_watch == 0 || panicstr != NULL) return (0); /* Require locks that witness knows about. */ if (lock1 == NULL || lock1->lo_witness == NULL || lock2 == NULL || lock2->lo_witness == NULL) return (EINVAL); MPASS(!mtx_owned(&w_mtx)); mtx_lock_spin(&w_mtx); /* * If we already have either an explicit or implied lock order that * is the other way around, then return an error. */ if (isitmydescendant(lock2->lo_witness, lock1->lo_witness)) { mtx_unlock_spin(&w_mtx); return (EDOOFUS); } /* Try to add the new order. */ CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, lock2->lo_type, lock1->lo_type); if (!itismychild(lock1->lo_witness, lock2->lo_witness)) return (ENOMEM); mtx_unlock_spin(&w_mtx); return (0); } void witness_checkorder(struct lock_object *lock, int flags, const char *file, int line) { struct lock_list_entry **lock_list, *lle; struct lock_instance *lock1, *lock2; struct lock_class *class; struct witness *w, *w1; struct thread *td; int i, j; if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || panicstr != NULL) return; /* * Try locks do not block if they fail to acquire the lock, thus * there is no danger of deadlocks or of switching while holding a * spin lock if we acquire a lock via a try operation. This * function shouldn't even be called for try locks, so panic if * that happens. */ if (flags & LOP_TRYLOCK) panic("%s should not be called for try lock operations", __func__); w = lock->lo_witness; class = LOCK_CLASS(lock); td = curthread; file = fixup_filename(file); if (class->lc_flags & LC_SLEEPLOCK) { /* * Since spin locks include a critical section, this check * implicitly enforces a lock order of all sleep locks before * all spin locks. */ if (td->td_critnest != 0 && !kdb_active) panic("blockable sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); /* * If this is the first lock acquired then just return as * no order checking is needed. */ if (td->td_sleeplocks == NULL) return; lock_list = &td->td_sleeplocks; } else { /* * If this is the first lock, just return as no order * checking is needed. We check this in both if clauses * here as unifying the check would require us to use a * critical section to ensure we don't migrate while doing * the check. Note that if this is not the first lock, we * are already in a critical section and are safe for the * rest of the check. */ if (PCPU_GET(spinlocks) == NULL) return; lock_list = PCPU_PTR(spinlocks); } /* * Check to see if we are recursing on a lock we already own. If * so, make sure that we don't mismatch exclusive and shared lock * acquires. */ lock1 = find_instance(*lock_list, lock); if (lock1 != NULL) { if ((lock1->li_flags & LI_EXCLUSIVE) != 0 && (flags & LOP_EXCLUSIVE) == 0) { printf("shared lock of (%s) %s @ %s:%d\n", class->lc_name, lock->lo_name, file, line); printf("while exclusively locked from %s:%d\n", lock1->li_file, lock1->li_line); panic("share->excl"); } if ((lock1->li_flags & LI_EXCLUSIVE) == 0 && (flags & LOP_EXCLUSIVE) != 0) { printf("exclusive lock of (%s) %s @ %s:%d\n", class->lc_name, lock->lo_name, file, line); printf("while share locked from %s:%d\n", lock1->li_file, lock1->li_line); panic("excl->share"); } return; } /* * Try locks do not block if they fail to acquire the lock, thus * there is no danger of deadlocks or of switching while holding a * spin lock if we acquire a lock via a try operation. */ if (flags & LOP_TRYLOCK) return; /* * Check for duplicate locks of the same type. Note that we only * have to check for this on the last lock we just acquired. Any * other cases will be caught as lock order violations. */ lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; w1 = lock1->li_lock->lo_witness; if (w1 == w) { if (w->w_same_squawked || (lock->lo_flags & LO_DUPOK) || (flags & LOP_DUPOK)) return; w->w_same_squawked = 1; printf("acquiring duplicate lock of same type: \"%s\"\n", lock->lo_type); printf(" 1st %s @ %s:%d\n", lock1->li_lock->lo_name, lock1->li_file, lock1->li_line); printf(" 2nd %s @ %s:%d\n", lock->lo_name, file, line); #ifdef KDB goto debugger; #else return; #endif } MPASS(!mtx_owned(&w_mtx)); mtx_lock_spin(&w_mtx); /* * If we know that the the lock we are acquiring comes after * the lock we most recently acquired in the lock order tree, * then there is no need for any further checks. */ if (isitmychild(w1, w)) { mtx_unlock_spin(&w_mtx); return; } for (j = 0, lle = *lock_list; lle != NULL; lle = lle->ll_next) { for (i = lle->ll_count - 1; i >= 0; i--, j++) { MPASS(j < WITNESS_COUNT); lock1 = &lle->ll_children[i]; w1 = lock1->li_lock->lo_witness; /* * If this lock doesn't undergo witness checking, * then skip it. */ if (w1 == NULL) { KASSERT((lock1->li_lock->lo_flags & LO_WITNESS) == 0, ("lock missing witness structure")); continue; } /* * If we are locking Giant and this is a sleepable * lock, then skip it. */ if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0 && lock == &Giant.lock_object) continue; /* * If we are locking a sleepable lock and this lock * is Giant, then skip it. */ if ((lock->lo_flags & LO_SLEEPABLE) != 0 && lock1->li_lock == &Giant.lock_object) continue; /* * If we are locking a sleepable lock and this lock * isn't sleepable, we want to treat it as a lock * order violation to enfore a general lock order of * sleepable locks before non-sleepable locks. */ if (((lock->lo_flags & LO_SLEEPABLE) != 0 && (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) goto reversal; /* * If we are locking Giant and this is a non-sleepable * lock, then treat it as a reversal. */ if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && lock == &Giant.lock_object) goto reversal; /* * Check the lock order hierarchy for a reveresal. */ if (!isitmydescendant(w, w1)) continue; reversal: /* * We have a lock order violation, check to see if it * is allowed or has already been yelled about. */ mtx_unlock_spin(&w_mtx); #ifdef BLESSING /* * If the lock order is blessed, just bail. We don't * look for other lock order violations though, which * may be a bug. */ if (blessed(w, w1)) return; #endif if (lock1->li_lock == &Giant.lock_object) { if (w1->w_Giant_squawked) return; else w1->w_Giant_squawked = 1; } else { if (w1->w_other_squawked) return; else w1->w_other_squawked = 1; } /* * Ok, yell about it. */ if (((lock->lo_flags & LO_SLEEPABLE) != 0 && (lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0)) printf( "lock order reversal: (sleepable after non-sleepable)\n"); else if ((lock1->li_lock->lo_flags & LO_SLEEPABLE) == 0 && lock == &Giant.lock_object) printf( "lock order reversal: (Giant after non-sleepable)\n"); else printf("lock order reversal:\n"); /* * Try to locate an earlier lock with * witness w in our list. */ do { lock2 = &lle->ll_children[i]; MPASS(lock2->li_lock != NULL); if (lock2->li_lock->lo_witness == w) break; if (i == 0 && lle->ll_next != NULL) { lle = lle->ll_next; i = lle->ll_count - 1; MPASS(i >= 0 && i < LOCK_NCHILDREN); } else i--; } while (i >= 0); if (i < 0) { printf(" 1st %p %s (%s) @ %s:%d\n", lock1->li_lock, lock1->li_lock->lo_name, lock1->li_lock->lo_type, lock1->li_file, lock1->li_line); printf(" 2nd %p %s (%s) @ %s:%d\n", lock, lock->lo_name, lock->lo_type, file, line); } else { printf(" 1st %p %s (%s) @ %s:%d\n", lock2->li_lock, lock2->li_lock->lo_name, lock2->li_lock->lo_type, lock2->li_file, lock2->li_line); printf(" 2nd %p %s (%s) @ %s:%d\n", lock1->li_lock, lock1->li_lock->lo_name, lock1->li_lock->lo_type, lock1->li_file, lock1->li_line); printf(" 3rd %p %s (%s) @ %s:%d\n", lock, lock->lo_name, lock->lo_type, file, line); } #ifdef KDB goto debugger; #else return; #endif } } lock1 = &(*lock_list)->ll_children[(*lock_list)->ll_count - 1]; /* * If requested, build a new lock order. However, don't build a new * relationship between a sleepable lock and Giant if it is in the * wrong direction. The correct lock order is that sleepable locks * always come before Giant. */ if (flags & LOP_NEWORDER && !(lock1->li_lock == &Giant.lock_object && (lock->lo_flags & LO_SLEEPABLE) != 0)) { CTR3(KTR_WITNESS, "%s: adding %s as a child of %s", __func__, lock->lo_type, lock1->li_lock->lo_type); if (!itismychild(lock1->li_lock->lo_witness, w)) /* Witness is dead. */ return; } mtx_unlock_spin(&w_mtx); return; #ifdef KDB debugger: if (witness_trace) kdb_backtrace(); if (witness_kdb) - kdb_enter(__func__); + kdb_enter_why(KDB_WHY_WITNESS, __func__); #endif } void witness_lock(struct lock_object *lock, int flags, const char *file, int line) { struct lock_list_entry **lock_list, *lle; struct lock_instance *instance; struct witness *w; struct thread *td; if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || panicstr != NULL) return; w = lock->lo_witness; td = curthread; file = fixup_filename(file); /* Determine lock list for this lock. */ if (LOCK_CLASS(lock)->lc_flags & LC_SLEEPLOCK) lock_list = &td->td_sleeplocks; else lock_list = PCPU_PTR(spinlocks); /* Check to see if we are recursing on a lock we already own. */ instance = find_instance(*lock_list, lock); if (instance != NULL) { instance->li_flags++; CTR4(KTR_WITNESS, "%s: pid %d recursed on %s r=%d", __func__, td->td_proc->p_pid, lock->lo_name, instance->li_flags & LI_RECURSEMASK); instance->li_file = file; instance->li_line = line; return; } /* Update per-witness last file and line acquire. */ w->w_file = file; w->w_line = line; /* Find the next open lock instance in the list and fill it. */ lle = *lock_list; if (lle == NULL || lle->ll_count == LOCK_NCHILDREN) { lle = witness_lock_list_get(); if (lle == NULL) return; lle->ll_next = *lock_list; CTR3(KTR_WITNESS, "%s: pid %d added lle %p", __func__, td->td_proc->p_pid, lle); *lock_list = lle; } instance = &lle->ll_children[lle->ll_count++]; instance->li_lock = lock; instance->li_line = line; instance->li_file = file; if ((flags & LOP_EXCLUSIVE) != 0) instance->li_flags = LI_EXCLUSIVE; else instance->li_flags = 0; CTR4(KTR_WITNESS, "%s: pid %d added %s as lle[%d]", __func__, td->td_proc->p_pid, lock->lo_name, lle->ll_count - 1); } void witness_upgrade(struct lock_object *lock, int flags, const char *file, int line) { struct lock_instance *instance; struct lock_class *class; KASSERT(!witness_cold, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); file = fixup_filename(file); if ((lock->lo_flags & LO_UPGRADABLE) == 0) panic("upgrade of non-upgradable lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((flags & LOP_TRYLOCK) == 0) panic("non-try upgrade of lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((class->lc_flags & LC_SLEEPLOCK) == 0) panic("upgrade of non-sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); instance = find_instance(curthread->td_sleeplocks, lock); if (instance == NULL) panic("upgrade of unlocked lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((instance->li_flags & LI_EXCLUSIVE) != 0) panic("upgrade of exclusive lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((instance->li_flags & LI_RECURSEMASK) != 0) panic("upgrade of recursed lock (%s) %s r=%d @ %s:%d", class->lc_name, lock->lo_name, instance->li_flags & LI_RECURSEMASK, file, line); instance->li_flags |= LI_EXCLUSIVE; } void witness_downgrade(struct lock_object *lock, int flags, const char *file, int line) { struct lock_instance *instance; struct lock_class *class; KASSERT(!witness_cold, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); file = fixup_filename(file); if ((lock->lo_flags & LO_UPGRADABLE) == 0) panic("downgrade of non-upgradable lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((class->lc_flags & LC_SLEEPLOCK) == 0) panic("downgrade of non-sleep lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); instance = find_instance(curthread->td_sleeplocks, lock); if (instance == NULL) panic("downgrade of unlocked lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((instance->li_flags & LI_EXCLUSIVE) == 0) panic("downgrade of shared lock (%s) %s @ %s:%d", class->lc_name, lock->lo_name, file, line); if ((instance->li_flags & LI_RECURSEMASK) != 0) panic("downgrade of recursed lock (%s) %s r=%d @ %s:%d", class->lc_name, lock->lo_name, instance->li_flags & LI_RECURSEMASK, file, line); instance->li_flags &= ~LI_EXCLUSIVE; } void witness_unlock(struct lock_object *lock, int flags, const char *file, int line) { struct lock_list_entry **lock_list, *lle; struct lock_instance *instance; struct lock_class *class; struct thread *td; register_t s; int i, j; if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL || panicstr != NULL) return; td = curthread; class = LOCK_CLASS(lock); file = fixup_filename(file); /* Find lock instance associated with this lock. */ if (class->lc_flags & LC_SLEEPLOCK) lock_list = &td->td_sleeplocks; else lock_list = PCPU_PTR(spinlocks); for (; *lock_list != NULL; lock_list = &(*lock_list)->ll_next) for (i = 0; i < (*lock_list)->ll_count; i++) { instance = &(*lock_list)->ll_children[i]; if (instance->li_lock == lock) goto found; } panic("lock (%s) %s not locked @ %s:%d", class->lc_name, lock->lo_name, file, line); found: /* First, check for shared/exclusive mismatches. */ if ((instance->li_flags & LI_EXCLUSIVE) != 0 && (flags & LOP_EXCLUSIVE) == 0) { printf("shared unlock of (%s) %s @ %s:%d\n", class->lc_name, lock->lo_name, file, line); printf("while exclusively locked from %s:%d\n", instance->li_file, instance->li_line); panic("excl->ushare"); } if ((instance->li_flags & LI_EXCLUSIVE) == 0 && (flags & LOP_EXCLUSIVE) != 0) { printf("exclusive unlock of (%s) %s @ %s:%d\n", class->lc_name, lock->lo_name, file, line); printf("while share locked from %s:%d\n", instance->li_file, instance->li_line); panic("share->uexcl"); } /* If we are recursed, unrecurse. */ if ((instance->li_flags & LI_RECURSEMASK) > 0) { CTR4(KTR_WITNESS, "%s: pid %d unrecursed on %s r=%d", __func__, td->td_proc->p_pid, instance->li_lock->lo_name, instance->li_flags); instance->li_flags--; return; } /* Otherwise, remove this item from the list. */ s = intr_disable(); CTR4(KTR_WITNESS, "%s: pid %d removed %s from lle[%d]", __func__, td->td_proc->p_pid, instance->li_lock->lo_name, (*lock_list)->ll_count - 1); for (j = i; j < (*lock_list)->ll_count - 1; j++) (*lock_list)->ll_children[j] = (*lock_list)->ll_children[j + 1]; (*lock_list)->ll_count--; intr_restore(s); /* If this lock list entry is now empty, free it. */ if ((*lock_list)->ll_count == 0) { lle = *lock_list; *lock_list = lle->ll_next; CTR3(KTR_WITNESS, "%s: pid %d removed lle %p", __func__, td->td_proc->p_pid, lle); witness_lock_list_free(lle); } } /* * Warn if any locks other than 'lock' are held. Flags can be passed in to * exempt Giant and sleepable locks from the checks as well. If any * non-exempt locks are held, then a supplied message is printed to the * console along with a list of the offending locks. If indicated in the * flags then a failure results in a panic as well. */ int witness_warn(int flags, struct lock_object *lock, const char *fmt, ...) { struct lock_list_entry *lle; struct lock_instance *lock1; struct thread *td; va_list ap; int i, n; if (witness_cold || witness_watch == 0 || panicstr != NULL) return (0); n = 0; td = curthread; for (lle = td->td_sleeplocks; lle != NULL; lle = lle->ll_next) for (i = lle->ll_count - 1; i >= 0; i--) { lock1 = &lle->ll_children[i]; if (lock1->li_lock == lock) continue; if (flags & WARN_GIANTOK && lock1->li_lock == &Giant.lock_object) continue; if (flags & WARN_SLEEPOK && (lock1->li_lock->lo_flags & LO_SLEEPABLE) != 0) continue; if (n == 0) { va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf(" with the following"); if (flags & WARN_SLEEPOK) printf(" non-sleepable"); printf(" locks held:\n"); } n++; witness_list_lock(lock1); } if (PCPU_GET(spinlocks) != NULL) { /* * Since we already hold a spinlock preemption is * already blocked. */ if (n == 0) { va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf(" with the following"); if (flags & WARN_SLEEPOK) printf(" non-sleepable"); printf(" locks held:\n"); } n += witness_list_locks(PCPU_PTR(spinlocks)); } if (flags & WARN_PANIC && n) panic("witness_warn"); #ifdef KDB else if (witness_kdb && n) - kdb_enter(__func__); + kdb_enter_why(KDB_WHY_WITNESS, __func__); else if (witness_trace && n) kdb_backtrace(); #endif return (n); } const char * witness_file(struct lock_object *lock) { struct witness *w; if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) return ("?"); w = lock->lo_witness; return (w->w_file); } int witness_line(struct lock_object *lock) { struct witness *w; if (witness_cold || witness_watch == 0 || lock->lo_witness == NULL) return (0); w = lock->lo_witness; return (w->w_line); } static struct witness * enroll(const char *description, struct lock_class *lock_class) { struct witness *w; if (witness_watch == 0 || panicstr != NULL) return (NULL); if ((lock_class->lc_flags & LC_SPINLOCK) && witness_skipspin) return (NULL); mtx_lock_spin(&w_mtx); STAILQ_FOREACH(w, &w_all, w_list) { if (w->w_name == description || (w->w_refcount > 0 && strcmp(description, w->w_name) == 0)) { w->w_refcount++; mtx_unlock_spin(&w_mtx); if (lock_class != w->w_class) panic( "lock (%s) %s does not match earlier (%s) lock", description, lock_class->lc_name, w->w_class->lc_name); return (w); } } if ((w = witness_get()) == NULL) goto out; w->w_name = description; w->w_class = lock_class; w->w_refcount = 1; STAILQ_INSERT_HEAD(&w_all, w, w_list); if (lock_class->lc_flags & LC_SPINLOCK) { STAILQ_INSERT_HEAD(&w_spin, w, w_typelist); w_spin_cnt++; } else if (lock_class->lc_flags & LC_SLEEPLOCK) { STAILQ_INSERT_HEAD(&w_sleep, w, w_typelist); w_sleep_cnt++; } else { mtx_unlock_spin(&w_mtx); panic("lock class %s is not sleep or spin", lock_class->lc_name); } mtx_unlock_spin(&w_mtx); out: /* * We issue a warning for any spin locks not defined in the static * order list as a way to discourage their use (folks should really * be using non-spin mutexes most of the time). However, several * 3rd part device drivers use spin locks because that is all they * have available on Windows and Linux and they think that normal * mutexes are insufficient. */ if ((lock_class->lc_flags & LC_SPINLOCK) && witness_spin_warn) printf("WITNESS: spin lock %s not in order list\n", description); return (w); } /* Don't let the door bang you on the way out... */ static int depart(struct witness *w) { struct witness_child_list_entry *wcl, *nwcl; struct witness_list *list; struct witness *parent; MPASS(w->w_refcount == 0); if (w->w_class->lc_flags & LC_SLEEPLOCK) { list = &w_sleep; w_sleep_cnt--; } else { list = &w_spin; w_spin_cnt--; } /* * First, we run through the entire tree looking for any * witnesses that the outgoing witness is a child of. For * each parent that we find, we reparent all the direct * children of the outgoing witness to its parent. */ STAILQ_FOREACH(parent, list, w_typelist) { if (!isitmychild(parent, w)) continue; removechild(parent, w); } /* * Now we go through and free up the child list of the * outgoing witness. */ for (wcl = w->w_children; wcl != NULL; wcl = nwcl) { nwcl = wcl->wcl_next; w_child_cnt--; witness_child_free(wcl); } /* * Detach from various lists and free. */ STAILQ_REMOVE(list, w, witness, w_typelist); STAILQ_REMOVE(&w_all, w, witness, w_list); witness_free(w); return (1); } /* * Add "child" as a direct child of "parent". Returns false if * we fail due to out of memory. */ static int insertchild(struct witness *parent, struct witness *child) { struct witness_child_list_entry **wcl; MPASS(child != NULL && parent != NULL); /* * Insert "child" after "parent" */ wcl = &parent->w_children; while (*wcl != NULL && (*wcl)->wcl_count == WITNESS_NCHILDREN) wcl = &(*wcl)->wcl_next; if (*wcl == NULL) { *wcl = witness_child_get(); if (*wcl == NULL) return (0); w_child_cnt++; } (*wcl)->wcl_children[(*wcl)->wcl_count++] = child; return (1); } static int itismychild(struct witness *parent, struct witness *child) { struct witness_list *list; MPASS(child != NULL && parent != NULL); if ((parent->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK)) != (child->w_class->lc_flags & (LC_SLEEPLOCK | LC_SPINLOCK))) panic( "%s: parent (%s) and child (%s) are not the same lock type", __func__, parent->w_class->lc_name, child->w_class->lc_name); if (!insertchild(parent, child)) return (0); if (parent->w_class->lc_flags & LC_SLEEPLOCK) list = &w_sleep; else list = &w_spin; return (1); } static void removechild(struct witness *parent, struct witness *child) { struct witness_child_list_entry **wcl, *wcl1; int i; for (wcl = &parent->w_children; *wcl != NULL; wcl = &(*wcl)->wcl_next) for (i = 0; i < (*wcl)->wcl_count; i++) if ((*wcl)->wcl_children[i] == child) goto found; return; found: (*wcl)->wcl_count--; if ((*wcl)->wcl_count > i) (*wcl)->wcl_children[i] = (*wcl)->wcl_children[(*wcl)->wcl_count]; MPASS((*wcl)->wcl_children[i] != NULL); if ((*wcl)->wcl_count != 0) return; wcl1 = *wcl; *wcl = wcl1->wcl_next; w_child_cnt--; witness_child_free(wcl1); } static int isitmychild(struct witness *parent, struct witness *child) { struct witness_child_list_entry *wcl; int i; for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { for (i = 0; i < wcl->wcl_count; i++) { if (wcl->wcl_children[i] == child) return (1); } } return (0); } static int isitmydescendant(struct witness *parent, struct witness *child) { struct witness_child_list_entry *wcl; int i, j; if (isitmychild(parent, child)) return (1); j = 0; for (wcl = parent->w_children; wcl != NULL; wcl = wcl->wcl_next) { MPASS(j < 1000); for (i = 0; i < wcl->wcl_count; i++) { if (isitmydescendant(wcl->wcl_children[i], child)) return (1); } j++; } return (0); } #ifdef BLESSING static int blessed(struct witness *w1, struct witness *w2) { int i; struct witness_blessed *b; for (i = 0; i < blessed_count; i++) { b = &blessed_list[i]; if (strcmp(w1->w_name, b->b_lock1) == 0) { if (strcmp(w2->w_name, b->b_lock2) == 0) return (1); continue; } if (strcmp(w1->w_name, b->b_lock2) == 0) if (strcmp(w2->w_name, b->b_lock1) == 0) return (1); } return (0); } #endif static struct witness * witness_get(void) { struct witness *w; if (witness_watch == 0) { mtx_unlock_spin(&w_mtx); return (NULL); } if (STAILQ_EMPTY(&w_free)) { witness_watch = 0; mtx_unlock_spin(&w_mtx); printf("%s: witness exhausted\n", __func__); return (NULL); } w = STAILQ_FIRST(&w_free); STAILQ_REMOVE_HEAD(&w_free, w_list); w_free_cnt--; bzero(w, sizeof(*w)); return (w); } static void witness_free(struct witness *w) { STAILQ_INSERT_HEAD(&w_free, w, w_list); w_free_cnt++; } static struct witness_child_list_entry * witness_child_get(void) { struct witness_child_list_entry *wcl; if (witness_watch == 0) { mtx_unlock_spin(&w_mtx); return (NULL); } wcl = w_child_free; if (wcl == NULL) { witness_watch = 0; mtx_unlock_spin(&w_mtx); printf("%s: witness exhausted\n", __func__); return (NULL); } w_child_free = wcl->wcl_next; w_child_free_cnt--; bzero(wcl, sizeof(*wcl)); return (wcl); } static void witness_child_free(struct witness_child_list_entry *wcl) { wcl->wcl_next = w_child_free; w_child_free = wcl; w_child_free_cnt++; } static struct lock_list_entry * witness_lock_list_get(void) { struct lock_list_entry *lle; if (witness_watch == 0) return (NULL); mtx_lock_spin(&w_mtx); lle = w_lock_list_free; if (lle == NULL) { witness_watch = 0; mtx_unlock_spin(&w_mtx); printf("%s: witness exhausted\n", __func__); return (NULL); } w_lock_list_free = lle->ll_next; mtx_unlock_spin(&w_mtx); bzero(lle, sizeof(*lle)); return (lle); } static void witness_lock_list_free(struct lock_list_entry *lle) { mtx_lock_spin(&w_mtx); lle->ll_next = w_lock_list_free; w_lock_list_free = lle; mtx_unlock_spin(&w_mtx); } static struct lock_instance * find_instance(struct lock_list_entry *lock_list, struct lock_object *lock) { struct lock_list_entry *lle; struct lock_instance *instance; int i; for (lle = lock_list; lle != NULL; lle = lle->ll_next) for (i = lle->ll_count - 1; i >= 0; i--) { instance = &lle->ll_children[i]; if (instance->li_lock == lock) return (instance); } return (NULL); } static void witness_list_lock(struct lock_instance *instance) { struct lock_object *lock; lock = instance->li_lock; printf("%s %s %s", (instance->li_flags & LI_EXCLUSIVE) != 0 ? "exclusive" : "shared", LOCK_CLASS(lock)->lc_name, lock->lo_name); if (lock->lo_type != lock->lo_name) printf(" (%s)", lock->lo_type); printf(" r = %d (%p) locked @ %s:%d\n", instance->li_flags & LI_RECURSEMASK, lock, instance->li_file, instance->li_line); } #ifdef DDB static int witness_thread_has_locks(struct thread *td) { return (td->td_sleeplocks != NULL); } static int witness_proc_has_locks(struct proc *p) { struct thread *td; FOREACH_THREAD_IN_PROC(p, td) { if (witness_thread_has_locks(td)) return (1); } return (0); } #endif int witness_list_locks(struct lock_list_entry **lock_list) { struct lock_list_entry *lle; int i, nheld; nheld = 0; for (lle = *lock_list; lle != NULL; lle = lle->ll_next) for (i = lle->ll_count - 1; i >= 0; i--) { witness_list_lock(&lle->ll_children[i]); nheld++; } return (nheld); } /* * This is a bit risky at best. We call this function when we have timed * out acquiring a spin lock, and we assume that the other CPU is stuck * with this lock held. So, we go groveling around in the other CPU's * per-cpu data to try to find the lock instance for this spin lock to * see when it was last acquired. */ void witness_display_spinlock(struct lock_object *lock, struct thread *owner) { struct lock_instance *instance; struct pcpu *pc; if (owner->td_critnest == 0 || owner->td_oncpu == NOCPU) return; pc = pcpu_find(owner->td_oncpu); instance = find_instance(pc->pc_spinlocks, lock); if (instance != NULL) witness_list_lock(instance); } void witness_save(struct lock_object *lock, const char **filep, int *linep) { struct lock_list_entry *lock_list; struct lock_instance *instance; struct lock_class *class; KASSERT(!witness_cold, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); if (class->lc_flags & LC_SLEEPLOCK) lock_list = curthread->td_sleeplocks; else { if (witness_skipspin) return; lock_list = PCPU_GET(spinlocks); } instance = find_instance(lock_list, lock); if (instance == NULL) panic("%s: lock (%s) %s not locked", __func__, class->lc_name, lock->lo_name); *filep = instance->li_file; *linep = instance->li_line; } void witness_restore(struct lock_object *lock, const char *file, int line) { struct lock_list_entry *lock_list; struct lock_instance *instance; struct lock_class *class; KASSERT(!witness_cold, ("%s: witness_cold", __func__)); if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); if (class->lc_flags & LC_SLEEPLOCK) lock_list = curthread->td_sleeplocks; else { if (witness_skipspin) return; lock_list = PCPU_GET(spinlocks); } instance = find_instance(lock_list, lock); if (instance == NULL) panic("%s: lock (%s) %s not locked", __func__, class->lc_name, lock->lo_name); lock->lo_witness->w_file = file; lock->lo_witness->w_line = line; instance->li_file = file; instance->li_line = line; } void witness_assert(struct lock_object *lock, int flags, const char *file, int line) { #ifdef INVARIANT_SUPPORT struct lock_instance *instance; struct lock_class *class; if (lock->lo_witness == NULL || witness_watch == 0 || panicstr != NULL) return; class = LOCK_CLASS(lock); if ((class->lc_flags & LC_SLEEPLOCK) != 0) instance = find_instance(curthread->td_sleeplocks, lock); else if ((class->lc_flags & LC_SPINLOCK) != 0) instance = find_instance(PCPU_GET(spinlocks), lock); else { panic("Lock (%s) %s is not sleep or spin!", class->lc_name, lock->lo_name); } file = fixup_filename(file); switch (flags) { case LA_UNLOCKED: if (instance != NULL) panic("Lock (%s) %s locked @ %s:%d.", class->lc_name, lock->lo_name, file, line); break; case LA_LOCKED: case LA_LOCKED | LA_RECURSED: case LA_LOCKED | LA_NOTRECURSED: case LA_SLOCKED: case LA_SLOCKED | LA_RECURSED: case LA_SLOCKED | LA_NOTRECURSED: case LA_XLOCKED: case LA_XLOCKED | LA_RECURSED: case LA_XLOCKED | LA_NOTRECURSED: if (instance == NULL) { panic("Lock (%s) %s not locked @ %s:%d.", class->lc_name, lock->lo_name, file, line); break; } if ((flags & LA_XLOCKED) != 0 && (instance->li_flags & LI_EXCLUSIVE) == 0) panic("Lock (%s) %s not exclusively locked @ %s:%d.", class->lc_name, lock->lo_name, file, line); if ((flags & LA_SLOCKED) != 0 && (instance->li_flags & LI_EXCLUSIVE) != 0) panic("Lock (%s) %s exclusively locked @ %s:%d.", class->lc_name, lock->lo_name, file, line); if ((flags & LA_RECURSED) != 0 && (instance->li_flags & LI_RECURSEMASK) == 0) panic("Lock (%s) %s not recursed @ %s:%d.", class->lc_name, lock->lo_name, file, line); if ((flags & LA_NOTRECURSED) != 0 && (instance->li_flags & LI_RECURSEMASK) != 0) panic("Lock (%s) %s recursed @ %s:%d.", class->lc_name, lock->lo_name, file, line); break; default: panic("Invalid lock assertion at %s:%d.", file, line); } #endif /* INVARIANT_SUPPORT */ } #ifdef DDB static void witness_list(struct thread *td) { KASSERT(!witness_cold, ("%s: witness_cold", __func__)); KASSERT(kdb_active, ("%s: not in the debugger", __func__)); if (witness_watch == 0) return; witness_list_locks(&td->td_sleeplocks); /* * We only handle spinlocks if td == curthread. This is somewhat broken * if td is currently executing on some other CPU and holds spin locks * as we won't display those locks. If we had a MI way of getting * the per-cpu data for a given cpu then we could use * td->td_oncpu to get the list of spinlocks for this thread * and "fix" this. * * That still wouldn't really fix this unless we locked the scheduler * lock or stopped the other CPU to make sure it wasn't changing the * list out from under us. It is probably best to just not try to * handle threads on other CPU's for now. */ if (td == curthread && PCPU_GET(spinlocks) != NULL) witness_list_locks(PCPU_PTR(spinlocks)); } DB_SHOW_COMMAND(locks, db_witness_list) { struct thread *td; if (have_addr) td = db_lookup_thread(addr, TRUE); else td = kdb_thread; witness_list(td); } DB_SHOW_COMMAND(alllocks, db_witness_list_all) { struct thread *td; struct proc *p; /* * It would be nice to list only threads and processes that actually * held sleep locks, but that information is currently not exported * by WITNESS. */ FOREACH_PROC_IN_SYSTEM(p) { if (!witness_proc_has_locks(p)) continue; FOREACH_THREAD_IN_PROC(p, td) { if (!witness_thread_has_locks(td)) continue; db_printf("Process %d (%s) thread %p (%d)\n", p->p_pid, p->p_comm, td, td->td_tid); witness_list(td); } } } DB_SHOW_COMMAND(witness, db_witness_display) { witness_display(db_printf); } #endif Index: stable/7/sys/kern/vfs_subr.c =================================================================== --- stable/7/sys/kern/vfs_subr.c (revision 177733) +++ stable/7/sys/kern/vfs_subr.c (revision 177734) @@ -1,4031 +1,4031 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 */ /* * External virtual filesystem routines */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #endif static MALLOC_DEFINE(M_NETADDR, "subr_export_host", "Export host address structure"); static void delmntque(struct vnode *vp); static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo); static void syncer_shutdown(void *arg, int howto); static int vtryrecycle(struct vnode *vp); static void vbusy(struct vnode *vp); static void vinactive(struct vnode *, struct thread *); static void v_incr_usecount(struct vnode *); static void v_decr_usecount(struct vnode *); static void v_decr_useonly(struct vnode *); static void v_upgrade_usecount(struct vnode *); static void vfree(struct vnode *); static void vnlru_free(int); static void vdestroy(struct vnode *); static void vgonel(struct vnode *); static void vfs_knllock(void *arg); static void vfs_knlunlock(void *arg); static int vfs_knllocked(void *arg); /* * Enable Giant pushdown based on whether or not the vm is mpsafe in this * build. Without mpsafevm the buffer cache can not run Giant free. */ int mpsafe_vfs = 1; TUNABLE_INT("debug.mpsafevfs", &mpsafe_vfs); SYSCTL_INT(_debug, OID_AUTO, mpsafevfs, CTLFLAG_RD, &mpsafe_vfs, 0, "MPSAFE VFS"); /* * Number of vnodes in existence. Increased whenever getnewvnode() * allocates a new vnode, decreased on vdestroy() called on VI_DOOMed * vnode. */ static unsigned long numvnodes; SYSCTL_LONG(_vfs, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); /* * Conversion tables for conversion from vnode types to inode formats * and back. */ enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD, }; int vttoif_tab[10] = { 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK, S_IFSOCK, S_IFIFO, S_IFMT, S_IFMT }; /* * List of vnodes that are ready for recycling. */ static TAILQ_HEAD(freelst, vnode) vnode_free_list; /* * Free vnode target. Free vnodes may simply be files which have been stat'd * but not read. This is somewhat common, and a small cache of such files * should be kept to avoid recreation costs. */ static u_long wantfreevnodes; SYSCTL_LONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""); /* Number of vnodes in the free list. */ static u_long freevnodes; SYSCTL_LONG(_vfs, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); /* * Various variables used for debugging the new implementation of * reassignbuf(). * XXX these are probably of (very) limited utility now. */ static int reassignbufcalls; SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0, ""); /* * Cache for the mount type id assigned to NFS. This is used for * special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c. */ int nfs_mount_type = -1; /* To keep more than one thread at a time from running vfs_getnewfsid */ static struct mtx mntid_mtx; /* * Lock for any access to the following: * vnode_free_list * numvnodes * freevnodes */ static struct mtx vnode_free_list_mtx; /* Publicly exported FS */ struct nfs_public nfs_pub; /* Zone for allocation of new vnodes - used exclusively by getnewvnode() */ static uma_zone_t vnode_zone; static uma_zone_t vnodepoll_zone; /* Set to 1 to print out reclaim of active vnodes */ int prtactive; /* * The workitem queue. * * It is useful to delay writes of file data and filesystem metadata * for tens of seconds so that quickly created and deleted files need * not waste disk bandwidth being created and removed. To realize this, * we append vnodes to a "workitem" queue. When running with a soft * updates implementation, most pending metadata dependencies should * not wait for more than a few seconds. Thus, mounted on block devices * are delayed only about a half the time that file data is delayed. * Similarly, directory updates are more critical, so are only delayed * about a third the time that file data is delayed. Thus, there are * SYNCER_MAXDELAY queues that are processed round-robin at a rate of * one each second (driven off the filesystem syncer process). The * syncer_delayno variable indicates the next queue that is to be processed. * Items that need to be processed soon are placed in this queue: * * syncer_workitem_pending[syncer_delayno] * * A delay of fifteen seconds is done by placing the request fifteen * entries later in the queue: * * syncer_workitem_pending[(syncer_delayno + 15) & syncer_mask] * */ static int syncer_delayno; static long syncer_mask; LIST_HEAD(synclist, bufobj); static struct synclist *syncer_workitem_pending; /* * The sync_mtx protects: * bo->bo_synclist * sync_vnode_count * syncer_delayno * syncer_state * syncer_workitem_pending * syncer_worklist_len * rushjob */ static struct mtx sync_mtx; #define SYNCER_MAXDELAY 32 static int syncer_maxdelay = SYNCER_MAXDELAY; /* maximum delay time */ static int syncdelay = 30; /* max time to delay syncing data */ static int filedelay = 30; /* time to delay syncing files */ SYSCTL_INT(_kern, OID_AUTO, filedelay, CTLFLAG_RW, &filedelay, 0, ""); static int dirdelay = 29; /* time to delay syncing directories */ SYSCTL_INT(_kern, OID_AUTO, dirdelay, CTLFLAG_RW, &dirdelay, 0, ""); static int metadelay = 28; /* time to delay syncing metadata */ SYSCTL_INT(_kern, OID_AUTO, metadelay, CTLFLAG_RW, &metadelay, 0, ""); static int rushjob; /* number of slots to run ASAP */ static int stat_rush_requests; /* number of times I/O speeded up */ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0, ""); /* * When shutting down the syncer, run it at four times normal speed. */ #define SYNCER_SHUTDOWN_SPEEDUP 4 static int sync_vnode_count; static int syncer_worklist_len; static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY } syncer_state; /* * Number of vnodes we want to exist at any one time. This is mostly used * to size hash tables in vnode-related code. It is normally not used in * getnewvnode(), as wantfreevnodes is normally nonzero.) * * XXX desiredvnodes is historical cruft and should not exist. */ int desiredvnodes; SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW, &desiredvnodes, 0, "Maximum number of vnodes"); SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW, &wantfreevnodes, 0, "Minimum number of vnodes (legacy)"); static int vnlru_nowhere; SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW, &vnlru_nowhere, 0, "Number of times the vnlru process ran without success"); /* * Macros to control when a vnode is freed and recycled. All require * the vnode interlock. */ #define VCANRECYCLE(vp) (((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDFREE(vp) (!((vp)->v_iflag & VI_FREE) && !(vp)->v_holdcnt) #define VSHOULDBUSY(vp) (((vp)->v_iflag & VI_FREE) && (vp)->v_holdcnt) /* * Initialize the vnode management data structures. */ #ifndef MAXVNODES_MAX #define MAXVNODES_MAX 100000 #endif static void vntblinit(void *dummy __unused) { /* * Desiredvnodes is a function of the physical memory size and * the kernel's heap size. Specifically, desiredvnodes scales * in proportion to the physical memory size until two fifths * of the kernel's heap size is consumed by vnodes and vm * objects. */ desiredvnodes = min(maxproc + cnt.v_page_count / 4, 2 * vm_kmem_size / (5 * (sizeof(struct vm_object) + sizeof(struct vnode)))); if (desiredvnodes > MAXVNODES_MAX) { if (bootverbose) printf("Reducing kern.maxvnodes %d -> %d\n", desiredvnodes, MAXVNODES_MAX); desiredvnodes = MAXVNODES_MAX; } wantfreevnodes = desiredvnodes / 4; mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF); TAILQ_INIT(&vnode_free_list); mtx_init(&vnode_free_list_mtx, "vnode_free_list", NULL, MTX_DEF); vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); /* * Initialize the filesystem syncer. */ syncer_workitem_pending = hashinit(syncer_maxdelay, M_VNODE, &syncer_mask); syncer_maxdelay = syncer_mask + 1; mtx_init(&sync_mtx, "Syncer mtx", NULL, MTX_DEF); } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_FIRST, vntblinit, NULL) /* * Mark a mount point as busy. Used to synchronize access and to delay * unmounting. Interlock is not released on failure. */ int vfs_busy(struct mount *mp, int flags, struct mtx *interlkp, struct thread *td) { int lkflags; MNT_ILOCK(mp); MNT_REF(mp); if (mp->mnt_kern_flag & MNTK_UNMOUNT) { if (flags & LK_NOWAIT) { MNT_REL(mp); MNT_IUNLOCK(mp); return (ENOENT); } if (interlkp) mtx_unlock(interlkp); mp->mnt_kern_flag |= MNTK_MWAIT; /* * Since all busy locks are shared except the exclusive * lock granted when unmounting, the only place that a * wakeup needs to be done is at the release of the * exclusive lock at the end of dounmount. */ msleep(mp, MNT_MTX(mp), PVFS, "vfs_busy", 0); MNT_REL(mp); MNT_IUNLOCK(mp); if (interlkp) mtx_lock(interlkp); return (ENOENT); } if (interlkp) mtx_unlock(interlkp); lkflags = LK_SHARED | LK_INTERLOCK; if (lockmgr(&mp->mnt_lock, lkflags, MNT_MTX(mp), td)) panic("vfs_busy: unexpected lock failure"); return (0); } /* * Free a busy filesystem. */ void vfs_unbusy(struct mount *mp, struct thread *td) { lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); vfs_rel(mp); } /* * Lookup a mount point by filesystem identifier. */ struct mount * vfs_getvfs(fsid_t *fsid) { struct mount *mp; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] && mp->mnt_stat.f_fsid.val[1] == fsid->val[1]) { vfs_ref(mp); mtx_unlock(&mountlist_mtx); return (mp); } } mtx_unlock(&mountlist_mtx); return ((struct mount *) 0); } /* * Check if a user can access privileged mount options. */ int vfs_suser(struct mount *mp, struct thread *td) { int error; /* * If the thread is jailed, but this is not a jail-friendly file * system, deny immediately. */ if (jailed(td->td_ucred) && !(mp->mnt_vfc->vfc_flags & VFCF_JAIL)) return (EPERM); /* * If the file system was mounted outside a jail and a jailed thread * tries to access it, deny immediately. */ if (!jailed(mp->mnt_cred) && jailed(td->td_ucred)) return (EPERM); /* * If the file system was mounted inside different jail that the jail of * the calling thread, deny immediately. */ if (jailed(mp->mnt_cred) && jailed(td->td_ucred) && mp->mnt_cred->cr_prison != td->td_ucred->cr_prison) { return (EPERM); } if ((mp->mnt_flag & MNT_USER) == 0 || mp->mnt_cred->cr_uid != td->td_ucred->cr_uid) { if ((error = priv_check(td, PRIV_VFS_MOUNT_OWNER)) != 0) return (error); } return (0); } /* * Get a new unique fsid. Try to make its val[0] unique, since this value * will be used to create fake device numbers for stat(). Also try (but * not so hard) make its val[0] unique mod 2^16, since some emulators only * support 16-bit device numbers. We end up with unique val[0]'s for the * first 2^16 calls and unique val[0]'s mod 2^16 for the first 2^8 calls. * * Keep in mind that several mounts may be running in parallel. Starting * the search one past where the previous search terminated is both a * micro-optimization and a defense against returning the same fsid to * different mounts. */ void vfs_getnewfsid(struct mount *mp) { static u_int16_t mntid_base; struct mount *nmp; fsid_t tfsid; int mtype; mtx_lock(&mntid_mtx); mtype = mp->mnt_vfc->vfc_typenum; tfsid.val[1] = mtype; mtype = (mtype & 0xFF) << 24; for (;;) { tfsid.val[0] = makedev(255, mtype | ((mntid_base & 0xFF00) << 8) | (mntid_base & 0xFF)); mntid_base++; if ((nmp = vfs_getvfs(&tfsid)) == NULL) break; vfs_rel(nmp); } mp->mnt_stat.f_fsid.val[0] = tfsid.val[0]; mp->mnt_stat.f_fsid.val[1] = tfsid.val[1]; mtx_unlock(&mntid_mtx); } /* * Knob to control the precision of file timestamps: * * 0 = seconds only; nanoseconds zeroed. * 1 = seconds and nanoseconds, accurate within 1/HZ. * 2 = seconds and nanoseconds, truncated to microseconds. * >=3 = seconds and nanoseconds, maximum precision. */ enum { TSP_SEC, TSP_HZ, TSP_USEC, TSP_NSEC }; static int timestamp_precision = TSP_SEC; SYSCTL_INT(_vfs, OID_AUTO, timestamp_precision, CTLFLAG_RW, ×tamp_precision, 0, ""); /* * Get a current timestamp. */ void vfs_timestamp(struct timespec *tsp) { struct timeval tv; switch (timestamp_precision) { case TSP_SEC: tsp->tv_sec = time_second; tsp->tv_nsec = 0; break; case TSP_HZ: getnanotime(tsp); break; case TSP_USEC: microtime(&tv); TIMEVAL_TO_TIMESPEC(&tv, tsp); break; case TSP_NSEC: default: nanotime(tsp); break; } } /* * Set vnode attributes to VNOVAL */ void vattr_null(struct vattr *vap) { vap->va_type = VNON; vap->va_size = VNOVAL; vap->va_bytes = VNOVAL; vap->va_mode = VNOVAL; vap->va_nlink = VNOVAL; vap->va_uid = VNOVAL; vap->va_gid = VNOVAL; vap->va_fsid = VNOVAL; vap->va_fileid = VNOVAL; vap->va_blocksize = VNOVAL; vap->va_rdev = VNOVAL; vap->va_atime.tv_sec = VNOVAL; vap->va_atime.tv_nsec = VNOVAL; vap->va_mtime.tv_sec = VNOVAL; vap->va_mtime.tv_nsec = VNOVAL; vap->va_ctime.tv_sec = VNOVAL; vap->va_ctime.tv_nsec = VNOVAL; vap->va_birthtime.tv_sec = VNOVAL; vap->va_birthtime.tv_nsec = VNOVAL; vap->va_flags = VNOVAL; vap->va_gen = VNOVAL; vap->va_vaflags = 0; } /* * This routine is called when we have too many vnodes. It attempts * to free vnodes and will potentially free vnodes that still * have VM backing store (VM backing store is typically the cause * of a vnode blowout so we want to do this). Therefore, this operation * is not considered cheap. * * A number of conditions may prevent a vnode from being reclaimed. * the buffer cache may have references on the vnode, a directory * vnode may still have references due to the namei cache representing * underlying files, or the vnode may be in active use. It is not * desireable to reuse such vnodes. These conditions may cause the * number of vnodes to reach some minimum value regardless of what * you set kern.maxvnodes to. Do not set kern.maxvnodes too low. */ static int vlrureclaim(struct mount *mp) { struct thread *td; struct vnode *vp; int done; int trigger; int usevnodes; int count; /* * Calculate the trigger point, don't allow user * screwups to blow us up. This prevents us from * recycling vnodes with lots of resident pages. We * aren't trying to free memory, we are trying to * free vnodes. */ usevnodes = desiredvnodes; if (usevnodes <= 0) usevnodes = 1; trigger = cnt.v_page_count * 2 / usevnodes; done = 0; td = curthread; vn_start_write(NULL, &mp, V_WAIT); MNT_ILOCK(mp); count = mp->mnt_nvnodelistsize / 10 + 1; while (count != 0) { vp = TAILQ_FIRST(&mp->mnt_nvnodelist); while (vp != NULL && vp->v_type == VMARKER) vp = TAILQ_NEXT(vp, v_nmntvnodes); if (vp == NULL) break; TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); --count; if (!VI_TRYLOCK(vp)) goto next_iter; /* * If it's been deconstructed already, it's still * referenced, or it exceeds the trigger, skip it. */ if (vp->v_usecount || !LIST_EMPTY(&(vp)->v_cache_src) || (vp->v_iflag & VI_DOOMED) != 0 || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VI_UNLOCK(vp); goto next_iter; } MNT_IUNLOCK(mp); vholdl(vp); if (VOP_LOCK(vp, LK_INTERLOCK|LK_EXCLUSIVE|LK_NOWAIT, td)) { vdrop(vp); goto next_iter_mntunlocked; } VI_LOCK(vp); /* * v_usecount may have been bumped after VOP_LOCK() dropped * the vnode interlock and before it was locked again. * * It is not necessary to recheck VI_DOOMED because it can * only be set by another thread that holds both the vnode * lock and vnode interlock. If another thread has the * vnode lock before we get to VOP_LOCK() and obtains the * vnode interlock after VOP_LOCK() drops the vnode * interlock, the other thread will be unable to drop the * vnode lock before our VOP_LOCK() call fails. */ if (vp->v_usecount || !LIST_EMPTY(&(vp)->v_cache_src) || (vp->v_object != NULL && vp->v_object->resident_page_count > trigger)) { VOP_UNLOCK(vp, LK_INTERLOCK, td); goto next_iter_mntunlocked; } KASSERT((vp->v_iflag & VI_DOOMED) == 0, ("VI_DOOMED unexpectedly detected in vlrureclaim()")); vgonel(vp); VOP_UNLOCK(vp, 0, td); vdropl(vp); done++; next_iter_mntunlocked: if ((count % 256) != 0) goto relock_mnt; goto yield; next_iter: if ((count % 256) != 0) continue; MNT_IUNLOCK(mp); yield: uio_yield(); relock_mnt: MNT_ILOCK(mp); } MNT_IUNLOCK(mp); vn_finished_write(mp); return done; } /* * Attempt to keep the free list at wantfreevnodes length. */ static void vnlru_free(int count) { struct vnode *vp; int vfslocked; mtx_assert(&vnode_free_list_mtx, MA_OWNED); for (; count > 0; count--) { vp = TAILQ_FIRST(&vnode_free_list); /* * The list can be modified while the free_list_mtx * has been dropped and vp could be NULL here. */ if (!vp) break; VNASSERT(vp->v_op != NULL, vp, ("vnlru_free: vnode already reclaimed.")); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); /* * Don't recycle if we can't get the interlock. */ if (!VI_TRYLOCK(vp)) { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); continue; } VNASSERT(VCANRECYCLE(vp), vp, ("vp inconsistent on freelist")); freevnodes--; vp->v_iflag &= ~VI_FREE; vholdl(vp); mtx_unlock(&vnode_free_list_mtx); VI_UNLOCK(vp); vfslocked = VFS_LOCK_GIANT(vp->v_mount); vtryrecycle(vp); VFS_UNLOCK_GIANT(vfslocked); /* * If the recycled succeeded this vdrop will actually free * the vnode. If not it will simply place it back on * the free list. */ vdrop(vp); mtx_lock(&vnode_free_list_mtx); } } /* * Attempt to recycle vnodes in a context that is always safe to block. * Calling vlrurecycle() from the bowels of filesystem code has some * interesting deadlock problems. */ static struct proc *vnlruproc; static int vnlruproc_sig; static void vnlru_proc(void) { struct mount *mp, *nmp; int done; struct proc *p = vnlruproc; struct thread *td = curthread; EVENTHANDLER_REGISTER(shutdown_pre_sync, kproc_shutdown, p, SHUTDOWN_PRI_FIRST); mtx_lock(&Giant); for (;;) { kthread_suspend_check(p); mtx_lock(&vnode_free_list_mtx); if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); if (numvnodes <= desiredvnodes * 9 / 10) { vnlruproc_sig = 0; wakeup(&vnlruproc_sig); msleep(vnlruproc, &vnode_free_list_mtx, PVFS|PDROP, "vlruwt", hz); continue; } mtx_unlock(&vnode_free_list_mtx); done = 0; mtx_lock(&mountlist_mtx); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { int vfsunlocked; if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { nmp = TAILQ_NEXT(mp, mnt_list); continue; } if (!VFS_NEEDSGIANT(mp)) { mtx_unlock(&Giant); vfsunlocked = 1; } else vfsunlocked = 0; done += vlrureclaim(mp); if (vfsunlocked) mtx_lock(&Giant); mtx_lock(&mountlist_mtx); nmp = TAILQ_NEXT(mp, mnt_list); vfs_unbusy(mp, td); } mtx_unlock(&mountlist_mtx); if (done == 0) { EVENTHANDLER_INVOKE(vfs_lowvnodes, desiredvnodes / 10); #if 0 /* These messages are temporary debugging aids */ if (vnlru_nowhere < 5) printf("vnlru process getting nowhere..\n"); else if (vnlru_nowhere == 5) printf("vnlru process messages stopped.\n"); #endif vnlru_nowhere++; tsleep(vnlruproc, PPAUSE, "vlrup", hz * 3); } else uio_yield(); } } static struct kproc_desc vnlru_kp = { "vnlru", vnlru_proc, &vnlruproc }; SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp) /* * Routines having to do with the management of the vnode table. */ static void vdestroy(struct vnode *vp) { struct bufobj *bo; CTR1(KTR_VFS, "vdestroy vp %p", vp); mtx_lock(&vnode_free_list_mtx); numvnodes--; mtx_unlock(&vnode_free_list_mtx); bo = &vp->v_bufobj; VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("cleaned vnode still on the free list.")); VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't")); VNASSERT(vp->v_holdcnt == 0, vp, ("Non-zero hold count")); VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count")); VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count")); VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's")); VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0")); VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL")); VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0")); VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL")); VNASSERT(TAILQ_EMPTY(&vp->v_cache_dst), vp, ("vp has namecache dst")); VNASSERT(LIST_EMPTY(&vp->v_cache_src), vp, ("vp has namecache src")); VI_UNLOCK(vp); #ifdef MAC mac_destroy_vnode(vp); #endif if (vp->v_pollinfo != NULL) { knlist_destroy(&vp->v_pollinfo->vpi_selinfo.si_note); mtx_destroy(&vp->v_pollinfo->vpi_lock); uma_zfree(vnodepoll_zone, vp->v_pollinfo); } #ifdef INVARIANTS /* XXX Elsewhere we can detect an already freed vnode via NULL v_op. */ vp->v_op = NULL; #endif lockdestroy(vp->v_vnlock); mtx_destroy(&vp->v_interlock); uma_zfree(vnode_zone, vp); } /* * Try to recycle a freed vnode. We abort if anyone picks up a reference * before we actually vgone(). This function must be called with the vnode * held to prevent the vnode from being returned to the free list midway * through vgone(). */ static int vtryrecycle(struct vnode *vp) { struct thread *td = curthread; struct mount *vnmp; CTR1(KTR_VFS, "vtryrecycle: trying vp %p", vp); VNASSERT(vp->v_holdcnt, vp, ("vtryrecycle: Recycling vp %p without a reference.", vp)); /* * This vnode may found and locked via some other list, if so we * can't recycle it yet. */ if (VOP_LOCK(vp, LK_EXCLUSIVE | LK_NOWAIT, td) != 0) return (EWOULDBLOCK); /* * Don't recycle if its filesystem is being suspended. */ if (vn_start_write(vp, &vnmp, V_NOWAIT) != 0) { VOP_UNLOCK(vp, 0, td); return (EBUSY); } /* * If we got this far, we need to acquire the interlock and see if * anyone picked up this vnode from another list. If not, we will * mark it with DOOMED via vgonel() so that anyone who does find it * will skip over it. */ VI_LOCK(vp); if (vp->v_usecount) { VOP_UNLOCK(vp, LK_INTERLOCK, td); vn_finished_write(vnmp); return (EBUSY); } if ((vp->v_iflag & VI_DOOMED) == 0) vgonel(vp); VOP_UNLOCK(vp, LK_INTERLOCK, td); vn_finished_write(vnmp); CTR1(KTR_VFS, "vtryrecycle: recycled vp %p", vp); return (0); } /* * Return the next vnode from the free list. */ int getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, struct vnode **vpp) { struct vnode *vp = NULL; struct bufobj *bo; mtx_lock(&vnode_free_list_mtx); /* * Lend our context to reclaim vnodes if they've exceeded the max. */ if (freevnodes > wantfreevnodes) vnlru_free(1); /* * Wait for available vnodes. */ if (numvnodes > desiredvnodes) { if (mp != NULL && (mp->mnt_kern_flag & MNTK_SUSPEND)) { /* * File system is beeing suspended, we cannot risk a * deadlock here, so allocate new vnode anyway. */ if (freevnodes > wantfreevnodes) vnlru_free(freevnodes - wantfreevnodes); goto alloc; } if (vnlruproc_sig == 0) { vnlruproc_sig = 1; /* avoid unnecessary wakeups */ wakeup(vnlruproc); } msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS, "vlruwk", hz); #if 0 /* XXX Not all VFS_VGET/ffs_vget callers check returns. */ if (numvnodes > desiredvnodes) { mtx_unlock(&vnode_free_list_mtx); return (ENFILE); } #endif } alloc: numvnodes++; mtx_unlock(&vnode_free_list_mtx); vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO); /* * Setup locks. */ vp->v_vnlock = &vp->v_lock; mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF); /* * By default, don't allow shared locks unless filesystems * opt-in. */ lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOSHARE); /* * Initialize bufobj. */ bo = &vp->v_bufobj; bo->__bo_vnode = vp; bo->bo_mtx = &vp->v_interlock; bo->bo_ops = &buf_ops_bio; bo->bo_private = vp; TAILQ_INIT(&bo->bo_clean.bv_hd); TAILQ_INIT(&bo->bo_dirty.bv_hd); /* * Initialize namecache. */ LIST_INIT(&vp->v_cache_src); TAILQ_INIT(&vp->v_cache_dst); /* * Finalize various vnode identity bits. */ vp->v_type = VNON; vp->v_tag = tag; vp->v_op = vops; v_incr_usecount(vp); vp->v_data = 0; #ifdef MAC mac_init_vnode(vp); if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0) mac_associate_vnode_singlelabel(mp, vp); else if (mp == NULL) printf("NULL mp in getnewvnode()\n"); #endif if (mp != NULL) { bo->bo_bsize = mp->mnt_stat.f_iosize; if ((mp->mnt_kern_flag & MNTK_NOKNOTE) != 0) vp->v_vflag |= VV_NOKNOTE; } CTR2(KTR_VFS, "getnewvnode: mp %p vp %p", mp, vp); *vpp = vp; return (0); } /* * Delete from old mount point vnode list, if on one. */ static void delmntque(struct vnode *vp) { struct mount *mp; mp = vp->v_mount; if (mp == NULL) return; MNT_ILOCK(mp); vp->v_mount = NULL; VNASSERT(mp->mnt_nvnodelistsize > 0, vp, ("bad mount point vnode list size")); TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes); mp->mnt_nvnodelistsize--; MNT_REL(mp); MNT_IUNLOCK(mp); } static void insmntque_stddtr(struct vnode *vp, void *dtr_arg) { struct thread *td; td = curthread; /* XXX ? */ vp->v_data = NULL; vp->v_op = &dead_vnodeops; /* XXX non mp-safe fs may still call insmntque with vnode unlocked */ if (!VOP_ISLOCKED(vp, td)) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); vgone(vp); vput(vp); } /* * Insert into list of vnodes for the new mount point, if available. */ int insmntque1(struct vnode *vp, struct mount *mp, void (*dtr)(struct vnode *, void *), void *dtr_arg) { KASSERT(vp->v_mount == NULL, ("insmntque: vnode already on per mount vnode list")); VNASSERT(mp != NULL, vp, ("Don't call insmntque(foo, NULL)")); MNT_ILOCK(mp); if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 && mp->mnt_nvnodelistsize == 0) { MNT_IUNLOCK(mp); if (dtr != NULL) dtr(vp, dtr_arg); return (EBUSY); } vp->v_mount = mp; MNT_REF(mp); TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes); VNASSERT(mp->mnt_nvnodelistsize >= 0, vp, ("neg mount point vnode list size")); mp->mnt_nvnodelistsize++; MNT_IUNLOCK(mp); return (0); } int insmntque(struct vnode *vp, struct mount *mp) { return (insmntque1(vp, mp, insmntque_stddtr, NULL)); } /* * Flush out and invalidate all buffers associated with a bufobj * Called with the underlying object locked. */ int bufobj_invalbuf(struct bufobj *bo, int flags, struct thread *td, int slpflag, int slptimeo) { int error; BO_LOCK(bo); if (flags & V_SAVE) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); return (error); } if (bo->bo_dirty.bv_cnt > 0) { BO_UNLOCK(bo); if ((error = BO_SYNC(bo, MNT_WAIT, td)) != 0) return (error); /* * XXX We could save a lock/unlock if this was only * enabled under INVARIANTS */ BO_LOCK(bo); if (bo->bo_numoutput > 0 || bo->bo_dirty.bv_cnt > 0) panic("vinvalbuf: dirty bufs"); } } /* * If you alter this loop please notice that interlock is dropped and * reacquired in flushbuflist. Special care is needed to ensure that * no race conditions occur from this. */ do { error = flushbuflist(&bo->bo_clean, flags, bo, slpflag, slptimeo); if (error == 0) error = flushbuflist(&bo->bo_dirty, flags, bo, slpflag, slptimeo); if (error != 0 && error != EAGAIN) { BO_UNLOCK(bo); return (error); } } while (error != 0); /* * Wait for I/O to complete. XXX needs cleaning up. The vnode can * have write I/O in-progress but if there is a VM object then the * VM object can also have read-I/O in-progress. */ do { bufobj_wwait(bo, 0, 0); BO_UNLOCK(bo); if (bo->bo_object != NULL) { VM_OBJECT_LOCK(bo->bo_object); vm_object_pip_wait(bo->bo_object, "bovlbx"); VM_OBJECT_UNLOCK(bo->bo_object); } BO_LOCK(bo); } while (bo->bo_numoutput > 0); BO_UNLOCK(bo); /* * Destroy the copy in the VM cache, too. */ if (bo->bo_object != NULL) { VM_OBJECT_LOCK(bo->bo_object); vm_object_page_remove(bo->bo_object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); VM_OBJECT_UNLOCK(bo->bo_object); } #ifdef INVARIANTS BO_LOCK(bo); if ((flags & (V_ALT | V_NORMAL)) == 0 && (bo->bo_dirty.bv_cnt > 0 || bo->bo_clean.bv_cnt > 0)) panic("vinvalbuf: flush failed"); BO_UNLOCK(bo); #endif return (0); } /* * Flush out and invalidate all buffers associated with a vnode. * Called with the underlying object locked. */ int vinvalbuf(struct vnode *vp, int flags, struct thread *td, int slpflag, int slptimeo) { CTR2(KTR_VFS, "vinvalbuf vp %p flags %d", vp, flags); ASSERT_VOP_LOCKED(vp, "vinvalbuf"); return (bufobj_invalbuf(&vp->v_bufobj, flags, td, slpflag, slptimeo)); } /* * Flush out buffers on the specified list. * */ static int flushbuflist( struct bufv *bufv, int flags, struct bufobj *bo, int slpflag, int slptimeo) { struct buf *bp, *nbp; int retval, error; daddr_t lblkno; b_xflags_t xflags; ASSERT_BO_LOCKED(bo); retval = 0; TAILQ_FOREACH_SAFE(bp, &bufv->bv_hd, b_bobufs, nbp) { if (((flags & V_NORMAL) && (bp->b_xflags & BX_ALTDATA)) || ((flags & V_ALT) && (bp->b_xflags & BX_ALTDATA) == 0)) { continue; } lblkno = 0; xflags = 0; if (nbp != NULL) { lblkno = nbp->b_lblkno; xflags = nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN); } retval = EAGAIN; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "flushbuf", slpflag, slptimeo); if (error) { BO_LOCK(bo); return (error != ENOLCK ? error : EAGAIN); } KASSERT(bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); if (bp->b_bufobj != bo) { /* XXX: necessary ? */ BUF_UNLOCK(bp); BO_LOCK(bo); return (EAGAIN); } /* * XXX Since there are no node locks for NFS, I * believe there is a slight chance that a delayed * write will occur while sleeping just above, so * check for it. */ if (((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) && (flags & V_SAVE)) { bremfree(bp); bp->b_flags |= B_ASYNC; bwrite(bp); BO_LOCK(bo); return (EAGAIN); /* XXX: why not loop ? */ } bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); BO_LOCK(bo); if (nbp != NULL && (nbp->b_bufobj != bo || nbp->b_lblkno != lblkno || (nbp->b_xflags & (BX_BKGRDMARKER | BX_VNDIRTY | BX_VNCLEAN)) != xflags)) break; /* nbp invalid */ } return (retval); } /* * Truncate a file's buffer and pages to a specified length. This * is in lieu of the old vinvalbuf mechanism, which performed unneeded * sync activity. */ int vtruncbuf(struct vnode *vp, struct ucred *cred, struct thread *td, off_t length, int blksize) { struct buf *bp, *nbp; int anyfreed; int trunclbn; struct bufobj *bo; CTR2(KTR_VFS, "vtruncbuf vp %p length %jd", vp, length); /* * Round up to the *next* lbn. */ trunclbn = (length + blksize - 1) / blksize; ASSERT_VOP_LOCKED(vp, "vtruncbuf"); restart: VI_LOCK(vp); bo = &vp->v_bufobj; anyfreed = 1; for (;anyfreed;) { anyfreed = 0; TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNCLEAN) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI))) { goto restart; } VI_LOCK(vp); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < trunclbn) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) goto restart; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~B_ASYNC; brelse(bp); anyfreed = 1; if (nbp != NULL && (((nbp->b_xflags & BX_VNDIRTY) == 0) || (nbp->b_vp != vp) || (nbp->b_flags & B_DELWRI) == 0)) { goto restart; } VI_LOCK(vp); } } if (length > 0) { restartsync: TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno > 0) continue; /* * Since we hold the vnode lock this should only * fail if we're racing with the buf daemon. */ if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, VI_MTX(vp)) == ENOLCK) { goto restart; } VNASSERT((bp->b_flags & B_DELWRI), vp, ("buf(%p) on dirty queue without DELWRI", bp)); bremfree(bp); bawrite(bp); VI_LOCK(vp); goto restartsync; } } bufobj_wwait(bo, 0, 0); VI_UNLOCK(vp); vnode_pager_setsize(vp, length); return (0); } /* * buf_splay() - splay tree core for the clean/dirty list of buffers in * a vnode. * * NOTE: We have to deal with the special case of a background bitmap * buffer, a situation where two buffers will have the same logical * block offset. We want (1) only the foreground buffer to be accessed * in a lookup and (2) must differentiate between the foreground and * background buffer in the splay tree algorithm because the splay * tree cannot normally handle multiple entities with the same 'index'. * We accomplish this by adding differentiating flags to the splay tree's * numerical domain. */ static struct buf * buf_splay(daddr_t lblkno, b_xflags_t xflags, struct buf *root) { struct buf dummy; struct buf *lefttreemax, *righttreemin, *y; if (root == NULL) return (NULL); lefttreemax = righttreemin = &dummy; for (;;) { if (lblkno < root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_left) == NULL) break; if (lblkno < y->b_lblkno) { /* Rotate right. */ root->b_left = y->b_right; y->b_right = root; root = y; if ((y = root->b_left) == NULL) break; } /* Link into the new root's right tree. */ righttreemin->b_left = root; righttreemin = root; } else if (lblkno > root->b_lblkno || (lblkno == root->b_lblkno && (xflags & BX_BKGRDMARKER) > (root->b_xflags & BX_BKGRDMARKER))) { if ((y = root->b_right) == NULL) break; if (lblkno > y->b_lblkno) { /* Rotate left. */ root->b_right = y->b_left; y->b_left = root; root = y; if ((y = root->b_right) == NULL) break; } /* Link into the new root's left tree. */ lefttreemax->b_right = root; lefttreemax = root; } else { break; } root = y; } /* Assemble the new root. */ lefttreemax->b_right = root->b_left; righttreemin->b_left = root->b_right; root->b_left = dummy.b_right; root->b_right = dummy.b_left; return (root); } static void buf_vlist_remove(struct buf *bp) { struct buf *root; struct bufv *bv; KASSERT(bp->b_bufobj != NULL, ("No b_bufobj %p", bp)); ASSERT_BO_LOCKED(bp->b_bufobj); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) != (BX_VNDIRTY|BX_VNCLEAN), ("buf_vlist_remove: Buf %p is on two lists", bp)); if (bp->b_xflags & BX_VNDIRTY) bv = &bp->b_bufobj->bo_dirty; else bv = &bp->b_bufobj->bo_clean; if (bp != bv->bv_root) { root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); KASSERT(root == bp, ("splay lookup failed in remove")); } if (bp->b_left == NULL) { root = bp->b_right; } else { root = buf_splay(bp->b_lblkno, bp->b_xflags, bp->b_left); root->b_right = bp->b_right; } bv->bv_root = root; TAILQ_REMOVE(&bv->bv_hd, bp, b_bobufs); bv->bv_cnt--; bp->b_xflags &= ~(BX_VNDIRTY | BX_VNCLEAN); } /* * Add the buffer to the sorted clean or dirty block list using a * splay tree algorithm. * * NOTE: xflags is passed as a constant, optimizing this inline function! */ static void buf_vlist_add(struct buf *bp, struct bufobj *bo, b_xflags_t xflags) { struct buf *root; struct bufv *bv; ASSERT_BO_LOCKED(bo); KASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, ("buf_vlist_add: Buf %p has existing xflags %d", bp, bp->b_xflags)); bp->b_xflags |= xflags; if (xflags & BX_VNDIRTY) bv = &bo->bo_dirty; else bv = &bo->bo_clean; root = buf_splay(bp->b_lblkno, bp->b_xflags, bv->bv_root); if (root == NULL) { bp->b_left = NULL; bp->b_right = NULL; TAILQ_INSERT_TAIL(&bv->bv_hd, bp, b_bobufs); } else if (bp->b_lblkno < root->b_lblkno || (bp->b_lblkno == root->b_lblkno && (bp->b_xflags & BX_BKGRDMARKER) < (root->b_xflags & BX_BKGRDMARKER))) { bp->b_left = root->b_left; bp->b_right = root; root->b_left = NULL; TAILQ_INSERT_BEFORE(root, bp, b_bobufs); } else { bp->b_right = root->b_right; bp->b_left = root; root->b_right = NULL; TAILQ_INSERT_AFTER(&bv->bv_hd, root, bp, b_bobufs); } bv->bv_cnt++; bv->bv_root = bp; } /* * Lookup a buffer using the splay tree. Note that we specifically avoid * shadow buffers used in background bitmap writes. * * This code isn't quite efficient as it could be because we are maintaining * two sorted lists and do not know which list the block resides in. * * During a "make buildworld" the desired buffer is found at one of * the roots more than 60% of the time. Thus, checking both roots * before performing either splay eliminates unnecessary splays on the * first tree splayed. */ struct buf * gbincore(struct bufobj *bo, daddr_t lblkno) { struct buf *bp; ASSERT_BO_LOCKED(bo); if ((bp = bo->bo_clean.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_dirty.bv_root) != NULL && bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); if ((bp = bo->bo_clean.bv_root) != NULL) { bo->bo_clean.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } if ((bp = bo->bo_dirty.bv_root) != NULL) { bo->bo_dirty.bv_root = bp = buf_splay(lblkno, 0, bp); if (bp->b_lblkno == lblkno && !(bp->b_xflags & BX_BKGRDMARKER)) return (bp); } return (NULL); } /* * Associate a buffer with a vnode. */ void bgetvp(struct vnode *vp, struct buf *bp) { VNASSERT(bp->b_vp == NULL, bp->b_vp, ("bgetvp: not free")); CTR3(KTR_BUF, "bgetvp(%p) vp %p flags %X", bp, vp, bp->b_flags); VNASSERT((bp->b_xflags & (BX_VNDIRTY|BX_VNCLEAN)) == 0, vp, ("bgetvp: bp already attached! %p", bp)); ASSERT_VI_LOCKED(vp, "bgetvp"); vholdl(vp); if (VFS_NEEDSGIANT(vp->v_mount) || vp->v_bufobj.bo_flag & BO_NEEDSGIANT) bp->b_flags |= B_NEEDSGIANT; bp->b_vp = vp; bp->b_bufobj = &vp->v_bufobj; /* * Insert onto list for new vnode. */ buf_vlist_add(bp, &vp->v_bufobj, BX_VNCLEAN); } /* * Disassociate a buffer from a vnode. */ void brelvp(struct buf *bp) { struct bufobj *bo; struct vnode *vp; CTR3(KTR_BUF, "brelvp(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); KASSERT(bp->b_vp != NULL, ("brelvp: NULL")); /* * Delete from old vnode list, if on one. */ vp = bp->b_vp; /* XXX */ bo = bp->b_bufobj; BO_LOCK(bo); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("brelvp: Buffer %p not on queue.", bp); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { bo->bo_flag &= ~BO_ONWORKLST; mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); } bp->b_flags &= ~B_NEEDSGIANT; bp->b_vp = NULL; bp->b_bufobj = NULL; vdropl(vp); } /* * Add an item to the syncer work queue. */ static void vn_syncer_add_to_worklist(struct bufobj *bo, int delay) { int slot; ASSERT_BO_LOCKED(bo); mtx_lock(&sync_mtx); if (bo->bo_flag & BO_ONWORKLST) LIST_REMOVE(bo, bo_synclist); else { bo->bo_flag |= BO_ONWORKLST; syncer_worklist_len++; } if (delay > syncer_maxdelay - 2) delay = syncer_maxdelay - 2; slot = (syncer_delayno + delay) & syncer_mask; LIST_INSERT_HEAD(&syncer_workitem_pending[slot], bo, bo_synclist); mtx_unlock(&sync_mtx); } static int sysctl_vfs_worklist_len(SYSCTL_HANDLER_ARGS) { int error, len; mtx_lock(&sync_mtx); len = syncer_worklist_len - sync_vnode_count; mtx_unlock(&sync_mtx); error = SYSCTL_OUT(req, &len, sizeof(len)); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0, sysctl_vfs_worklist_len, "I", "Syncer thread worklist length"); static struct proc *updateproc; static void sched_sync(void); static struct kproc_desc up_kp = { "syncer", sched_sync, &updateproc }; SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp) static int sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td) { struct vnode *vp; struct mount *mp; int vfslocked; vfslocked = 0; restart: *bo = LIST_FIRST(slp); if (*bo == NULL) { VFS_UNLOCK_GIANT(vfslocked); return (0); } vp = (*bo)->__bo_vnode; /* XXX */ if (VFS_NEEDSGIANT(vp->v_mount)) { if (!vfslocked) { vfslocked = 1; if (mtx_trylock(&Giant) == 0) { mtx_unlock(&sync_mtx); mtx_lock(&Giant); mtx_lock(&sync_mtx); goto restart; } } } else { VFS_UNLOCK_GIANT(vfslocked); vfslocked = 0; } if (VOP_ISLOCKED(vp, NULL) != 0) { VFS_UNLOCK_GIANT(vfslocked); return (1); } if (VI_TRYLOCK(vp) == 0) { VFS_UNLOCK_GIANT(vfslocked); return (1); } /* * We use vhold in case the vnode does not * successfully sync. vhold prevents the vnode from * going away when we unlock the sync_mtx so that * we can acquire the vnode interlock. */ vholdl(vp); mtx_unlock(&sync_mtx); VI_UNLOCK(vp); if (vn_start_write(vp, &mp, V_NOWAIT) != 0) { vdrop(vp); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (1); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); (void) VOP_FSYNC(vp, MNT_LAZY, td); VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); VI_LOCK(vp); if (((*bo)->bo_flag & BO_ONWORKLST) != 0) { /* * Put us back on the worklist. The worklist * routine will remove us from our current * position and then add us back in at a later * position. */ vn_syncer_add_to_worklist(*bo, syncdelay); } vdropl(vp); VFS_UNLOCK_GIANT(vfslocked); mtx_lock(&sync_mtx); return (0); } /* * System filesystem synchronizer daemon. */ static void sched_sync(void) { struct synclist *next; struct synclist *slp; struct bufobj *bo; long starttime; struct thread *td = curthread; static int dummychan; int last_work_seen; int net_worklist_len; int syncer_final_iter; int first_printf; int error; last_work_seen = 0; syncer_final_iter = 0; first_printf = 1; syncer_state = SYNCER_RUNNING; starttime = time_uptime; td->td_pflags |= TDP_NORUNNINGBUF; EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc, SHUTDOWN_PRI_LAST); mtx_lock(&sync_mtx); for (;;) { if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter == 0) { mtx_unlock(&sync_mtx); kthread_suspend_check(td->td_proc); mtx_lock(&sync_mtx); } net_worklist_len = syncer_worklist_len - sync_vnode_count; if (syncer_state != SYNCER_RUNNING && starttime != time_uptime) { if (first_printf) { printf("\nSyncing disks, vnodes remaining..."); first_printf = 0; } printf("%d ", net_worklist_len); } starttime = time_uptime; /* * Push files whose dirty time has expired. Be careful * of interrupt race on slp queue. * * Skip over empty worklist slots when shutting down. */ do { slp = &syncer_workitem_pending[syncer_delayno]; syncer_delayno += 1; if (syncer_delayno == syncer_maxdelay) syncer_delayno = 0; next = &syncer_workitem_pending[syncer_delayno]; /* * If the worklist has wrapped since the * it was emptied of all but syncer vnodes, * switch to the FINAL_DELAY state and run * for one more second. */ if (syncer_state == SYNCER_SHUTTING_DOWN && net_worklist_len == 0 && last_work_seen == syncer_delayno) { syncer_state = SYNCER_FINAL_DELAY; syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP; } } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) && syncer_worklist_len > 0); /* * Keep track of the last time there was anything * on the worklist other than syncer vnodes. * Return to the SHUTTING_DOWN state if any * new work appears. */ if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING) last_work_seen = syncer_delayno; if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY) syncer_state = SYNCER_SHUTTING_DOWN; while (!LIST_EMPTY(slp)) { error = sync_vnode(slp, &bo, td); if (error == 1) { LIST_REMOVE(bo, bo_synclist); LIST_INSERT_HEAD(next, bo, bo_synclist); continue; } } if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0) syncer_final_iter--; /* * The variable rushjob allows the kernel to speed up the * processing of the filesystem syncer process. A rushjob * value of N tells the filesystem syncer to process the next * N seconds worth of work on its queue ASAP. Currently rushjob * is used by the soft update code to speed up the filesystem * syncer process when the incore state is getting so far * ahead of the disk that the kernel memory pool is being * threatened with exhaustion. */ if (rushjob > 0) { rushjob -= 1; continue; } /* * Just sleep for a short period of time between * iterations when shutting down to allow some I/O * to happen. * * If it has taken us less than a second to process the * current work, then wait. Otherwise start right over * again. We can still lose time if any single round * takes more than two seconds, but it does not really * matter as we are just trying to generally pace the * filesystem activity. */ if (syncer_state != SYNCER_RUNNING) msleep(&dummychan, &sync_mtx, PPAUSE, "syncfnl", hz / SYNCER_SHUTDOWN_SPEEDUP); else if (time_uptime == starttime) msleep(&lbolt, &sync_mtx, PPAUSE, "syncer", 0); } } /* * Request the syncer daemon to speed up its work. * We never push it to speed up more than half of its * normal turn time, otherwise it could take over the cpu. */ int speedup_syncer(void) { struct thread *td; int ret = 0; td = FIRST_THREAD_IN_PROC(updateproc); mtx_lock(&sync_mtx); if (rushjob < syncdelay / 2) { rushjob += 1; stat_rush_requests += 1; ret = 1; } mtx_unlock(&sync_mtx); sleepq_remove(td, &lbolt); return (ret); } /* * Tell the syncer to speed up its work and run though its work * list several times, then tell it to shut down. */ static void syncer_shutdown(void *arg, int howto) { struct thread *td; if (howto & RB_NOSYNC) return; td = FIRST_THREAD_IN_PROC(updateproc); mtx_lock(&sync_mtx); syncer_state = SYNCER_SHUTTING_DOWN; rushjob = 0; mtx_unlock(&sync_mtx); sleepq_remove(td, &lbolt); kproc_shutdown(arg, howto); } /* * Reassign a buffer from one vnode to another. * Used to assign file specific control information * (indirect blocks) to the vnode to which they belong. */ void reassignbuf(struct buf *bp) { struct vnode *vp; struct bufobj *bo; int delay; #ifdef INVARIANTS struct bufv *bv; #endif vp = bp->b_vp; bo = bp->b_bufobj; ++reassignbufcalls; CTR3(KTR_BUF, "reassignbuf(%p) vp %p flags %X", bp, bp->b_vp, bp->b_flags); /* * B_PAGING flagged buffers cannot be reassigned because their vp * is not fully linked in. */ if (bp->b_flags & B_PAGING) panic("cannot reassign paging buffer"); /* * Delete from old vnode list, if on one. */ VI_LOCK(vp); if (bp->b_xflags & (BX_VNDIRTY | BX_VNCLEAN)) buf_vlist_remove(bp); else panic("reassignbuf: Buffer %p not on queue.", bp); /* * If dirty, put on list of dirty buffers; otherwise insert onto list * of clean buffers. */ if (bp->b_flags & B_DELWRI) { if ((bo->bo_flag & BO_ONWORKLST) == 0) { switch (vp->v_type) { case VDIR: delay = dirdelay; break; case VCHR: delay = metadelay; break; default: delay = filedelay; } vn_syncer_add_to_worklist(bo, delay); } buf_vlist_add(bp, bo, BX_VNDIRTY); } else { buf_vlist_add(bp, bo, BX_VNCLEAN); if ((bo->bo_flag & BO_ONWORKLST) && bo->bo_dirty.bv_cnt == 0) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } } #ifdef INVARIANTS bv = &bo->bo_clean; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bv = &bo->bo_dirty; bp = TAILQ_FIRST(&bv->bv_hd); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); bp = TAILQ_LAST(&bv->bv_hd, buflists); KASSERT(bp == NULL || bp->b_bufobj == bo, ("bp %p wrong b_bufobj %p should be %p", bp, bp->b_bufobj, bo)); #endif VI_UNLOCK(vp); } /* * Increment the use and hold counts on the vnode, taking care to reference * the driver's usecount if this is a chardev. The vholdl() will remove * the vnode from the free list if it is presently free. Requires the * vnode interlock and returns with it held. */ static void v_incr_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_incr_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } vholdl(vp); } /* * Turn a holdcnt into a use+holdcnt such that only one call to * v_decr_usecount is needed. */ static void v_upgrade_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_upgrade_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); vp->v_usecount++; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount++; dev_unlock(); } } /* * Decrement the vnode use and hold count along with the driver's usecount * if this is a chardev. The vdropl() below releases the vnode interlock * as it may free the vnode. */ static void v_decr_usecount(struct vnode *vp) { CTR3(KTR_VFS, "v_decr_usecount: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_usecount: negative usecount")); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } vdropl(vp); } /* * Decrement only the use count and driver use count. This is intended to * be paired with a follow on vdropl() to release the remaining hold count. * In this way we may vgone() a vnode with a 0 usecount without risk of * having it end up on a free list because the hold count is kept above 0. */ static void v_decr_useonly(struct vnode *vp) { CTR3(KTR_VFS, "v_decr_useonly: vp %p holdcnt %d usecount %d\n", vp, vp->v_holdcnt, vp->v_usecount); ASSERT_VI_LOCKED(vp, __FUNCTION__); VNASSERT(vp->v_usecount > 0, vp, ("v_decr_useonly: negative usecount")); vp->v_usecount--; if (vp->v_type == VCHR && vp->v_rdev != NULL) { dev_lock(); vp->v_rdev->si_usecount--; dev_unlock(); } } /* * Grab a particular vnode from the free list, increment its * reference count and lock it. The vnode lock bit is set if the * vnode is being eliminated in vgone. The process is awakened * when the transition is completed, and an error returned to * indicate that the vnode is no longer usable (possibly having * been changed to a new filesystem type). */ int vget(struct vnode *vp, int flags, struct thread *td) { int oweinact; int oldflags; int error; error = 0; oldflags = flags; oweinact = 0; VFS_ASSERT_GIANT(vp->v_mount); if ((flags & LK_INTERLOCK) == 0) VI_LOCK(vp); /* * If the inactive call was deferred because vput() was called * with a shared lock, we have to do it here before another thread * gets a reference to data that should be dead. */ if (vp->v_iflag & VI_OWEINACT) { if (flags & LK_NOWAIT) { VI_UNLOCK(vp); return (EBUSY); } flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; oweinact = 1; } vholdl(vp); if ((error = vn_lock(vp, flags | LK_INTERLOCK, td)) != 0) { vdrop(vp); return (error); } VI_LOCK(vp); /* Upgrade our holdcnt to a usecount. */ v_upgrade_usecount(vp); if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0) panic("vget: vn_lock failed to return ENOENT\n"); if (oweinact) { if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VI_UNLOCK(vp); if ((oldflags & LK_TYPE_MASK) == 0) VOP_UNLOCK(vp, 0, td); } else VI_UNLOCK(vp); return (0); } /* * Increase the reference count of a vnode. */ void vref(struct vnode *vp) { VI_LOCK(vp); v_incr_usecount(vp); VI_UNLOCK(vp); } /* * Return reference count of a vnode. * * The results of this call are only guaranteed when some mechanism other * than the VI lock is used to stop other processes from gaining references * to the vnode. This may be the case if the caller holds the only reference. * This is also useful when stale data is acceptable as race conditions may * be accounted for by some other means. */ int vrefcnt(struct vnode *vp) { int usecnt; VI_LOCK(vp); usecnt = vp->v_usecount; VI_UNLOCK(vp); return (usecnt); } /* * Vnode put/release. * If count drops to zero, call inactive routine and return to freelist. */ void vrele(struct vnode *vp) { struct thread *td = curthread; /* XXX */ KASSERT(vp != NULL, ("vrele: null vp")); VFS_ASSERT_GIANT(vp->v_mount); VI_LOCK(vp); /* Skip this v_writecount check if we're going to panic below. */ VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, ("vrele: missed vn_close")); if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && vp->v_usecount == 1)) { v_decr_usecount(vp); return; } if (vp->v_usecount != 1) { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); #endif VI_UNLOCK(vp); panic("vrele: negative ref cnt"); } /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ v_decr_useonly(vp); /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. */ vp->v_iflag |= VI_OWEINACT; if (vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK, td) == 0) { VI_LOCK(vp); if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VOP_UNLOCK(vp, 0, td); } else { VI_LOCK(vp); if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; } vdropl(vp); } /* * Release an already locked vnode. This give the same effects as * unlock+vrele(), but takes less time and avoids releasing and * re-aquiring the lock (as vrele() acquires the lock internally.) */ void vput(struct vnode *vp) { struct thread *td = curthread; /* XXX */ int error; KASSERT(vp != NULL, ("vput: null vp")); ASSERT_VOP_LOCKED(vp, "vput"); VFS_ASSERT_GIANT(vp->v_mount); VI_LOCK(vp); /* Skip this v_writecount check if we're going to panic below. */ VNASSERT(vp->v_writecount < vp->v_usecount || vp->v_usecount < 1, vp, ("vput: missed vn_close")); error = 0; if (vp->v_usecount > 1 || ((vp->v_iflag & VI_DOINGINACT) && vp->v_usecount == 1)) { VOP_UNLOCK(vp, 0, td); v_decr_usecount(vp); return; } if (vp->v_usecount != 1) { #ifdef DIAGNOSTIC vprint("vput: negative ref count", vp); #endif panic("vput: negative ref cnt"); } /* * We want to hold the vnode until the inactive finishes to * prevent vgone() races. We drop the use count here and the * hold count below when we're done. */ v_decr_useonly(vp); vp->v_iflag |= VI_OWEINACT; if (VOP_ISLOCKED(vp, NULL) != LK_EXCLUSIVE) { error = VOP_LOCK(vp, LK_EXCLUPGRADE|LK_INTERLOCK|LK_NOWAIT, td); VI_LOCK(vp); if (error) { if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; goto done; } } if (vp->v_usecount > 0) vp->v_iflag &= ~VI_OWEINACT; if (vp->v_iflag & VI_OWEINACT) vinactive(vp, td); VOP_UNLOCK(vp, 0, td); done: vdropl(vp); } /* * Somebody doesn't want the vnode recycled. */ void vhold(struct vnode *vp) { VI_LOCK(vp); vholdl(vp); VI_UNLOCK(vp); } void vholdl(struct vnode *vp) { vp->v_holdcnt++; if (VSHOULDBUSY(vp)) vbusy(vp); } /* * Note that there is one less who cares about this vnode. vdrop() is the * opposite of vhold(). */ void vdrop(struct vnode *vp) { VI_LOCK(vp); vdropl(vp); } /* * Drop the hold count of the vnode. If this is the last reference to * the vnode we will free it if it has been vgone'd otherwise it is * placed on the free list. */ void vdropl(struct vnode *vp) { ASSERT_VI_LOCKED(vp, "vdropl"); if (vp->v_holdcnt <= 0) panic("vdrop: holdcnt %d", vp->v_holdcnt); vp->v_holdcnt--; if (vp->v_holdcnt == 0) { if (vp->v_iflag & VI_DOOMED) { vdestroy(vp); return; } else vfree(vp); } VI_UNLOCK(vp); } /* * Call VOP_INACTIVE on the vnode and manage the DOINGINACT and OWEINACT * flags. DOINGINACT prevents us from recursing in calls to vinactive. * OWEINACT tracks whether a vnode missed a call to inactive due to a * failed lock upgrade. */ static void vinactive(struct vnode *vp, struct thread *td) { ASSERT_VOP_LOCKED(vp, "vinactive"); ASSERT_VI_LOCKED(vp, "vinactive"); VNASSERT((vp->v_iflag & VI_DOINGINACT) == 0, vp, ("vinactive: recursed on VI_DOINGINACT")); vp->v_iflag |= VI_DOINGINACT; vp->v_iflag &= ~VI_OWEINACT; VI_UNLOCK(vp); VOP_INACTIVE(vp, td); VI_LOCK(vp); VNASSERT(vp->v_iflag & VI_DOINGINACT, vp, ("vinactive: lost VI_DOINGINACT")); vp->v_iflag &= ~VI_DOINGINACT; } /* * Remove any vnodes in the vnode table belonging to mount point mp. * * If FORCECLOSE is not specified, there should not be any active ones, * return error if any are found (nb: this is a user error, not a * system error). If FORCECLOSE is specified, detach any active vnodes * that are found. * * If WRITECLOSE is set, only flush out regular file vnodes open for * writing. * * SKIPSYSTEM causes any vnodes marked VV_SYSTEM to be skipped. * * `rootrefs' specifies the base reference count for the root vnode * of this filesystem. The root vnode is considered busy if its * v_usecount exceeds this value. On a successful return, vflush(, td) * will call vrele() on the root vnode exactly rootrefs times. * If the SKIPSYSTEM or WRITECLOSE flags are specified, rootrefs must * be zero. */ #ifdef DIAGNOSTIC static int busyprt = 0; /* print out busy vnodes */ SYSCTL_INT(_debug, OID_AUTO, busyprt, CTLFLAG_RW, &busyprt, 0, ""); #endif int vflush( struct mount *mp, int rootrefs, int flags, struct thread *td) { struct vnode *vp, *mvp, *rootvp = NULL; struct vattr vattr; int busy = 0, error; CTR1(KTR_VFS, "vflush: mp %p", mp); if (rootrefs > 0) { KASSERT((flags & (SKIPSYSTEM | WRITECLOSE)) == 0, ("vflush: bad args")); /* * Get the filesystem root vnode. We can vput() it * immediately, since with rootrefs > 0, it won't go away. */ if ((error = VFS_ROOT(mp, LK_EXCLUSIVE, &rootvp, td)) != 0) return (error); vput(rootvp); } MNT_ILOCK(mp); loop: MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); vholdl(vp); MNT_IUNLOCK(mp); error = vn_lock(vp, LK_INTERLOCK | LK_EXCLUSIVE, td); if (error) { vdrop(vp); MNT_ILOCK(mp); MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); goto loop; } /* * Skip over a vnodes marked VV_SYSTEM. */ if ((flags & SKIPSYSTEM) && (vp->v_vflag & VV_SYSTEM)) { VOP_UNLOCK(vp, 0, td); vdrop(vp); MNT_ILOCK(mp); continue; } /* * If WRITECLOSE is set, flush out unlinked but still open * files (even if open only for reading) and regular file * vnodes open for writing. */ if (flags & WRITECLOSE) { error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); VI_LOCK(vp); if ((vp->v_type == VNON || (error == 0 && vattr.va_nlink > 0)) && (vp->v_writecount == 0 || vp->v_type != VREG)) { VOP_UNLOCK(vp, 0, td); vdropl(vp); MNT_ILOCK(mp); continue; } } else VI_LOCK(vp); /* * With v_usecount == 0, all we need to do is clear out the * vnode data structures and we are done. * * If FORCECLOSE is set, forcibly close the vnode. */ if (vp->v_usecount == 0 || (flags & FORCECLOSE)) { VNASSERT(vp->v_usecount == 0 || (vp->v_type != VCHR && vp->v_type != VBLK), vp, ("device VNODE %p is FORCECLOSED", vp)); vgonel(vp); } else { busy++; #ifdef DIAGNOSTIC if (busyprt) vprint("vflush: busy vnode", vp); #endif } VOP_UNLOCK(vp, 0, td); vdropl(vp); MNT_ILOCK(mp); } MNT_IUNLOCK(mp); if (rootrefs > 0 && (flags & FORCECLOSE) == 0) { /* * If just the root vnode is busy, and if its refcount * is equal to `rootrefs', then go ahead and kill it. */ VI_LOCK(rootvp); KASSERT(busy > 0, ("vflush: not busy")); VNASSERT(rootvp->v_usecount >= rootrefs, rootvp, ("vflush: usecount %d < rootrefs %d", rootvp->v_usecount, rootrefs)); if (busy == 1 && rootvp->v_usecount == rootrefs) { VOP_LOCK(rootvp, LK_EXCLUSIVE|LK_INTERLOCK, td); vgone(rootvp); VOP_UNLOCK(rootvp, 0, td); busy = 0; } else VI_UNLOCK(rootvp); } if (busy) return (EBUSY); for (; rootrefs > 0; rootrefs--) vrele(rootvp); return (0); } /* * Recycle an unused vnode to the front of the free list. */ int vrecycle(struct vnode *vp, struct thread *td) { int recycled; ASSERT_VOP_LOCKED(vp, "vrecycle"); recycled = 0; VI_LOCK(vp); if (vp->v_usecount == 0) { recycled = 1; vgonel(vp); } VI_UNLOCK(vp); return (recycled); } /* * Eliminate all activity associated with a vnode * in preparation for reuse. */ void vgone(struct vnode *vp) { VI_LOCK(vp); vgonel(vp); VI_UNLOCK(vp); } /* * vgone, with the vp interlock held. */ void vgonel(struct vnode *vp) { struct thread *td; int oweinact; int active; struct mount *mp; CTR1(KTR_VFS, "vgonel: vp %p", vp); ASSERT_VOP_LOCKED(vp, "vgonel"); ASSERT_VI_LOCKED(vp, "vgonel"); VNASSERT(vp->v_holdcnt, vp, ("vgonel: vp %p has no reference.", vp)); td = curthread; /* * Don't vgonel if we're already doomed. */ if (vp->v_iflag & VI_DOOMED) return; vp->v_iflag |= VI_DOOMED; /* * Check to see if the vnode is in use. If so, we have to call * VOP_CLOSE() and VOP_INACTIVE(). */ active = vp->v_usecount; oweinact = (vp->v_iflag & VI_OWEINACT); VI_UNLOCK(vp); /* * Clean out any buffers associated with the vnode. * If the flush fails, just toss the buffers. */ mp = NULL; if (!TAILQ_EMPTY(&vp->v_bufobj.bo_dirty.bv_hd)) (void) vn_start_secondary_write(vp, &mp, V_WAIT); if (vinvalbuf(vp, V_SAVE, td, 0, 0) != 0) vinvalbuf(vp, 0, td, 0, 0); /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. */ if (active) VOP_CLOSE(vp, FNONBLOCK, NOCRED, td); if (oweinact || active) { VI_LOCK(vp); if ((vp->v_iflag & VI_DOINGINACT) == 0) vinactive(vp, td); VI_UNLOCK(vp); } /* * Reclaim the vnode. */ if (VOP_RECLAIM(vp, td)) panic("vgone: cannot reclaim"); if (mp != NULL) vn_finished_secondary_write(mp); VNASSERT(vp->v_object == NULL, vp, ("vop_reclaim left v_object vp=%p, tag=%s", vp, vp->v_tag)); /* * Delete from old mount point vnode list. */ delmntque(vp); cache_purge(vp); /* * Done with purge, reset to the standard lock and invalidate * the vnode. */ VI_LOCK(vp); vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_tag = "none"; vp->v_type = VBAD; } /* * Calculate the total number of references to a special device. */ int vcount(struct vnode *vp) { int count; dev_lock(); count = vp->v_rdev->si_usecount; dev_unlock(); return (count); } /* * Same as above, but using the struct cdev *as argument */ int count_dev(struct cdev *dev) { int count; dev_lock(); count = dev->si_usecount; dev_unlock(); return(count); } /* * Print out a description of a vnode. */ static char *typename[] = {"VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD", "VMARKER"}; void vn_printf(struct vnode *vp, const char *fmt, ...) { va_list ap; char buf[256], buf2[16]; u_long flags; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); printf("tag %s, type %s\n", vp->v_tag, typename[vp->v_type]); printf(" usecount %d, writecount %d, refcount %d mountedhere %p\n", vp->v_usecount, vp->v_writecount, vp->v_holdcnt, vp->v_mountedhere); buf[0] = '\0'; buf[1] = '\0'; if (vp->v_vflag & VV_ROOT) strlcat(buf, "|VV_ROOT", sizeof(buf)); if (vp->v_vflag & VV_ISTTY) strlcat(buf, "|VV_ISTTY", sizeof(buf)); if (vp->v_vflag & VV_NOSYNC) strlcat(buf, "|VV_NOSYNC", sizeof(buf)); if (vp->v_vflag & VV_CACHEDLABEL) strlcat(buf, "|VV_CACHEDLABEL", sizeof(buf)); if (vp->v_vflag & VV_TEXT) strlcat(buf, "|VV_TEXT", sizeof(buf)); if (vp->v_vflag & VV_COPYONWRITE) strlcat(buf, "|VV_COPYONWRITE", sizeof(buf)); if (vp->v_vflag & VV_SYSTEM) strlcat(buf, "|VV_SYSTEM", sizeof(buf)); if (vp->v_vflag & VV_PROCDEP) strlcat(buf, "|VV_PROCDEP", sizeof(buf)); if (vp->v_vflag & VV_NOKNOTE) strlcat(buf, "|VV_NOKNOTE", sizeof(buf)); if (vp->v_vflag & VV_DELETED) strlcat(buf, "|VV_DELETED", sizeof(buf)); if (vp->v_vflag & VV_MD) strlcat(buf, "|VV_MD", sizeof(buf)); flags = vp->v_vflag & ~(VV_ROOT | VV_ISTTY | VV_NOSYNC | VV_CACHEDLABEL | VV_TEXT | VV_COPYONWRITE | VV_SYSTEM | VV_PROCDEP | VV_NOKNOTE | VV_DELETED | VV_MD); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VV(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } if (vp->v_iflag & VI_MOUNT) strlcat(buf, "|VI_MOUNT", sizeof(buf)); if (vp->v_iflag & VI_AGE) strlcat(buf, "|VI_AGE", sizeof(buf)); if (vp->v_iflag & VI_DOOMED) strlcat(buf, "|VI_DOOMED", sizeof(buf)); if (vp->v_iflag & VI_FREE) strlcat(buf, "|VI_FREE", sizeof(buf)); if (vp->v_iflag & VI_OBJDIRTY) strlcat(buf, "|VI_OBJDIRTY", sizeof(buf)); if (vp->v_iflag & VI_DOINGINACT) strlcat(buf, "|VI_DOINGINACT", sizeof(buf)); if (vp->v_iflag & VI_OWEINACT) strlcat(buf, "|VI_OWEINACT", sizeof(buf)); flags = vp->v_iflag & ~(VI_MOUNT | VI_AGE | VI_DOOMED | VI_FREE | VI_OBJDIRTY | VI_DOINGINACT | VI_OWEINACT); if (flags != 0) { snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags); strlcat(buf, buf2, sizeof(buf)); } printf(" flags (%s)\n", buf + 1); if (mtx_owned(VI_MTX(vp))) printf(" VI_LOCKed"); if (vp->v_object != NULL) printf(" v_object %p ref %d pages %d\n", vp->v_object, vp->v_object->ref_count, vp->v_object->resident_page_count); printf(" "); lockmgr_printinfo(vp->v_vnlock); printf("\n"); if (vp->v_data != NULL) VOP_PRINT(vp); } #ifdef DDB /* * List all of the locked vnodes in the system. * Called when debugging the kernel. */ DB_SHOW_COMMAND(lockedvnods, lockedvnodes) { struct mount *mp, *nmp; struct vnode *vp; /* * Note: because this is DDB, we can't obey the locking semantics * for these structures, which means we could catch an inconsistent * state and dereference a nasty pointer. Not much to be done * about that. */ printf("Locked vnodes\n"); for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { nmp = TAILQ_NEXT(mp, mnt_list); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (vp->v_type != VMARKER && VOP_ISLOCKED(vp, NULL)) vprint("", vp); } nmp = TAILQ_NEXT(mp, mnt_list); } } /* * Show details about the given vnode. */ DB_SHOW_COMMAND(vnode, db_show_vnode) { struct vnode *vp; if (!have_addr) return; vp = (struct vnode *)addr; vn_printf(vp, "vnode "); } #endif /* DDB */ /* * Fill in a struct xvfsconf based on a struct vfsconf. */ static void vfsconf2x(struct vfsconf *vfsp, struct xvfsconf *xvfsp) { strcpy(xvfsp->vfc_name, vfsp->vfc_name); xvfsp->vfc_typenum = vfsp->vfc_typenum; xvfsp->vfc_refcount = vfsp->vfc_refcount; xvfsp->vfc_flags = vfsp->vfc_flags; /* * These are unused in userland, we keep them * to not break binary compatibility. */ xvfsp->vfc_vfsops = NULL; xvfsp->vfc_next = NULL; } /* * Top level filesystem related information gathering. */ static int sysctl_vfs_conflist(SYSCTL_HANDLER_ARGS) { struct vfsconf *vfsp; struct xvfsconf xvfsp; int error; error = 0; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); error = SYSCTL_OUT(req, &xvfsp, sizeof xvfsp); if (error) break; } return (error); } SYSCTL_PROC(_vfs, OID_AUTO, conflist, CTLFLAG_RD, NULL, 0, sysctl_vfs_conflist, "S,xvfsconf", "List of all configured filesystems"); #ifndef BURN_BRIDGES static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS); static int vfs_sysctl(SYSCTL_HANDLER_ARGS) { int *name = (int *)arg1 - 1; /* XXX */ u_int namelen = arg2 + 1; /* XXX */ struct vfsconf *vfsp; struct xvfsconf xvfsp; printf("WARNING: userland calling deprecated sysctl, " "please rebuild world\n"); #if 1 || defined(COMPAT_PRELITE2) /* Resolve ambiguity between VFS_VFSCONF and VFS_GENERIC. */ if (namelen == 1) return (sysctl_ovfs_conf(oidp, arg1, arg2, req)); #endif switch (name[1]) { case VFS_MAXTYPENUM: if (namelen != 2) return (ENOTDIR); return (SYSCTL_OUT(req, &maxvfsconf, sizeof(int))); case VFS_CONF: if (namelen != 3) return (ENOTDIR); /* overloaded */ TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) if (vfsp->vfc_typenum == name[2]) break; if (vfsp == NULL) return (EOPNOTSUPP); bzero(&xvfsp, sizeof(xvfsp)); vfsconf2x(vfsp, &xvfsp); return (SYSCTL_OUT(req, &xvfsp, sizeof(xvfsp))); } return (EOPNOTSUPP); } static SYSCTL_NODE(_vfs, VFS_GENERIC, generic, CTLFLAG_RD | CTLFLAG_SKIP, vfs_sysctl, "Generic filesystem"); #if 1 || defined(COMPAT_PRELITE2) static int sysctl_ovfs_conf(SYSCTL_HANDLER_ARGS) { int error; struct vfsconf *vfsp; struct ovfsconf ovfs; TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) { bzero(&ovfs, sizeof(ovfs)); ovfs.vfc_vfsops = vfsp->vfc_vfsops; /* XXX used as flag */ strcpy(ovfs.vfc_name, vfsp->vfc_name); ovfs.vfc_index = vfsp->vfc_typenum; ovfs.vfc_refcount = vfsp->vfc_refcount; ovfs.vfc_flags = vfsp->vfc_flags; error = SYSCTL_OUT(req, &ovfs, sizeof ovfs); if (error) return error; } return 0; } #endif /* 1 || COMPAT_PRELITE2 */ #endif /* !BURN_BRIDGES */ #define KINFO_VNODESLOP 10 #ifdef notyet /* * Dump vnode list (via sysctl). */ /* ARGSUSED */ static int sysctl_vnode(SYSCTL_HANDLER_ARGS) { struct xvnode *xvn; struct thread *td = req->td; struct mount *mp; struct vnode *vp; int error, len, n; /* * Stale numvnodes access is not fatal here. */ req->lock = 0; len = (numvnodes + KINFO_VNODESLOP) * sizeof *xvn; if (!req->oldptr) /* Make an estimate */ return (SYSCTL_OUT(req, 0, len)); error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); xvn = malloc(len, M_TEMP, M_ZERO | M_WAITOK); n = 0; mtx_lock(&mountlist_mtx); TAILQ_FOREACH(mp, &mountlist, mnt_list) { if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) continue; MNT_ILOCK(mp); TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) { if (n == len) break; vref(vp); xvn[n].xv_size = sizeof *xvn; xvn[n].xv_vnode = vp; xvn[n].xv_id = 0; /* XXX compat */ #define XV_COPY(field) xvn[n].xv_##field = vp->v_##field XV_COPY(usecount); XV_COPY(writecount); XV_COPY(holdcnt); XV_COPY(mount); XV_COPY(numoutput); XV_COPY(type); #undef XV_COPY xvn[n].xv_flag = vp->v_vflag; switch (vp->v_type) { case VREG: case VDIR: case VLNK: break; case VBLK: case VCHR: if (vp->v_rdev == NULL) { vrele(vp); continue; } xvn[n].xv_dev = dev2udev(vp->v_rdev); break; case VSOCK: xvn[n].xv_socket = vp->v_socket; break; case VFIFO: xvn[n].xv_fifo = vp->v_fifoinfo; break; case VNON: case VBAD: default: /* shouldn't happen? */ vrele(vp); continue; } vrele(vp); ++n; } MNT_IUNLOCK(mp); mtx_lock(&mountlist_mtx); vfs_unbusy(mp, td); if (n == len) break; } mtx_unlock(&mountlist_mtx); error = SYSCTL_OUT(req, xvn, n * sizeof *xvn); free(xvn, M_TEMP); return (error); } SYSCTL_PROC(_kern, KERN_VNODE, vnode, CTLTYPE_OPAQUE|CTLFLAG_RD, 0, 0, sysctl_vnode, "S,xvnode", ""); #endif /* * Unmount all filesystems. The list is traversed in reverse order * of mounting to avoid dependencies. */ void vfs_unmountall(void) { struct mount *mp; struct thread *td; int error; KASSERT(curthread != NULL, ("vfs_unmountall: NULL curthread")); td = curthread; /* * Since this only runs when rebooting, it is not interlocked. */ while(!TAILQ_EMPTY(&mountlist)) { mp = TAILQ_LAST(&mountlist, mntlist); error = dounmount(mp, MNT_FORCE, td); if (error) { TAILQ_REMOVE(&mountlist, mp, mnt_list); /* * XXX: Due to the way in which we mount the root * file system off of devfs, devfs will generate a * "busy" warning when we try to unmount it before * the root. Don't print a warning as a result in * order to avoid false positive errors that may * cause needless upset. */ if (strcmp(mp->mnt_vfc->vfc_name, "devfs") != 0) { printf("unmount of %s failed (", mp->mnt_stat.f_mntonname); if (error == EBUSY) printf("BUSY)\n"); else printf("%d)\n", error); } } else { /* The unmount has removed mp from the mountlist */ } } } /* * perform msync on all vnodes under a mount point * the mount point must be locked. */ void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *mvp; struct vm_object *obj; MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, mvp) { VI_LOCK(vp); if ((vp->v_iflag & VI_OBJDIRTY) && (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) { MNT_IUNLOCK(mp); if (!vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread)) { if (vp->v_vflag & VV_NOSYNC) { /* unlinked */ vput(vp); MNT_ILOCK(mp); continue; } obj = vp->v_object; if (obj != NULL) { VM_OBJECT_LOCK(obj); vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); VM_OBJECT_UNLOCK(obj); } vput(vp); } MNT_ILOCK(mp); } else VI_UNLOCK(vp); } MNT_IUNLOCK(mp); } /* * Mark a vnode as free, putting it up for recycling. */ static void vfree(struct vnode *vp) { CTR1(KTR_VFS, "vfree vp %p", vp); ASSERT_VI_LOCKED(vp, "vfree"); mtx_lock(&vnode_free_list_mtx); VNASSERT(vp->v_op != NULL, vp, ("vfree: vnode already reclaimed.")); VNASSERT((vp->v_iflag & VI_FREE) == 0, vp, ("vnode already free")); VNASSERT(VSHOULDFREE(vp), vp, ("vfree: freeing when we shouldn't")); VNASSERT((vp->v_iflag & VI_DOOMED) == 0, vp, ("vfree: Freeing doomed vnode")); if (vp->v_iflag & VI_AGE) { TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist); } else { TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist); } freevnodes++; vp->v_iflag &= ~VI_AGE; vp->v_iflag |= VI_FREE; mtx_unlock(&vnode_free_list_mtx); } /* * Opposite of vfree() - mark a vnode as in use. */ static void vbusy(struct vnode *vp) { CTR1(KTR_VFS, "vbusy vp %p", vp); ASSERT_VI_LOCKED(vp, "vbusy"); VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free")); VNASSERT(vp->v_op != NULL, vp, ("vbusy: vnode already reclaimed.")); mtx_lock(&vnode_free_list_mtx); TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); freevnodes--; vp->v_iflag &= ~(VI_FREE|VI_AGE); mtx_unlock(&vnode_free_list_mtx); } /* * Initalize per-vnode helper structure to hold poll-related state. */ void v_addpollinfo(struct vnode *vp) { struct vpollinfo *vi; vi = uma_zalloc(vnodepoll_zone, M_WAITOK); if (vp->v_pollinfo != NULL) { uma_zfree(vnodepoll_zone, vi); return; } vp->v_pollinfo = vi; mtx_init(&vp->v_pollinfo->vpi_lock, "vnode pollinfo", NULL, MTX_DEF); knlist_init(&vp->v_pollinfo->vpi_selinfo.si_note, vp, vfs_knllock, vfs_knlunlock, vfs_knllocked); } /* * Record a process's interest in events which might happen to * a vnode. Because poll uses the historic select-style interface * internally, this routine serves as both the ``check for any * pending events'' and the ``record my interest in future events'' * functions. (These are done together, while the lock is held, * to avoid race conditions.) */ int vn_pollrecord(struct vnode *vp, struct thread *td, int events) { if (vp->v_pollinfo == NULL) v_addpollinfo(vp); mtx_lock(&vp->v_pollinfo->vpi_lock); if (vp->v_pollinfo->vpi_revents & events) { /* * This leaves events we are not interested * in available for the other process which * which presumably had requested them * (otherwise they would never have been * recorded). */ events &= vp->v_pollinfo->vpi_revents; vp->v_pollinfo->vpi_revents &= ~events; mtx_unlock(&vp->v_pollinfo->vpi_lock); return events; } vp->v_pollinfo->vpi_events |= events; selrecord(td, &vp->v_pollinfo->vpi_selinfo); mtx_unlock(&vp->v_pollinfo->vpi_lock); return 0; } /* * Routine to create and manage a filesystem syncer vnode. */ #define sync_close ((int (*)(struct vop_close_args *))nullop) static int sync_fsync(struct vop_fsync_args *); static int sync_inactive(struct vop_inactive_args *); static int sync_reclaim(struct vop_reclaim_args *); static struct vop_vector sync_vnodeops = { .vop_bypass = VOP_EOPNOTSUPP, .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ .vop_islocked = vop_stdislocked, /* islocked */ }; /* * Create a new filesystem syncer vnode for the specified mount point. */ int vfs_allocate_syncvnode(struct mount *mp) { struct vnode *vp; static long start, incr, next; int error; /* Allocate a new vnode */ if ((error = getnewvnode("syncer", mp, &sync_vnodeops, &vp)) != 0) { mp->mnt_syncer = NULL; return (error); } vp->v_type = VNON; error = insmntque(vp, mp); if (error != 0) panic("vfs_allocate_syncvnode: insmntque failed"); /* * Place the vnode onto the syncer worklist. We attempt to * scatter them about on the list so that they will go off * at evenly distributed times even if all the filesystems * are mounted at once. */ next += incr; if (next == 0 || next > syncer_maxdelay) { start /= 2; incr /= 2; if (start == 0) { start = syncer_maxdelay / 2; incr = syncer_maxdelay; } next = start; } VI_LOCK(vp); vn_syncer_add_to_worklist(&vp->v_bufobj, syncdelay > 0 ? next % syncdelay : 0); /* XXX - vn_syncer_add_to_worklist() also grabs and drops sync_mtx. */ mtx_lock(&sync_mtx); sync_vnode_count++; mtx_unlock(&sync_mtx); VI_UNLOCK(vp); mp->mnt_syncer = vp; return (0); } /* * Do a lazy sync of the filesystem. */ static int sync_fsync(struct vop_fsync_args *ap) { struct vnode *syncvp = ap->a_vp; struct mount *mp = syncvp->v_mount; struct thread *td = ap->a_td; int error; struct bufobj *bo; /* * We only need to do something if this is a lazy evaluation. */ if (ap->a_waitfor != MNT_LAZY) return (0); /* * Move ourselves to the back of the sync list. */ bo = &syncvp->v_bufobj; BO_LOCK(bo); vn_syncer_add_to_worklist(bo, syncdelay); BO_UNLOCK(bo); /* * Walk the list of vnodes pushing all that are dirty and * not already on the sync list. */ mtx_lock(&mountlist_mtx); if (vfs_busy(mp, LK_EXCLUSIVE | LK_NOWAIT, &mountlist_mtx, td) != 0) { mtx_unlock(&mountlist_mtx); return (0); } if (vn_start_write(NULL, &mp, V_NOWAIT) != 0) { vfs_unbusy(mp, td); return (0); } MNT_ILOCK(mp); mp->mnt_noasync++; mp->mnt_kern_flag &= ~MNTK_ASYNC; MNT_IUNLOCK(mp); vfs_msync(mp, MNT_NOWAIT); error = VFS_SYNC(mp, MNT_LAZY, td); MNT_ILOCK(mp); mp->mnt_noasync--; if ((mp->mnt_flag & MNT_ASYNC) != 0 && mp->mnt_noasync == 0) mp->mnt_kern_flag |= MNTK_ASYNC; MNT_IUNLOCK(mp); vn_finished_write(mp); vfs_unbusy(mp, td); return (error); } /* * The syncer vnode is no referenced. */ static int sync_inactive(struct vop_inactive_args *ap) { vgone(ap->a_vp); return (0); } /* * The syncer vnode is no longer needed and is being decommissioned. * * Modifications to the worklist must be protected by sync_mtx. */ static int sync_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct bufobj *bo; VI_LOCK(vp); bo = &vp->v_bufobj; vp->v_mount->mnt_syncer = NULL; if (bo->bo_flag & BO_ONWORKLST) { mtx_lock(&sync_mtx); LIST_REMOVE(bo, bo_synclist); syncer_worklist_len--; sync_vnode_count--; mtx_unlock(&sync_mtx); bo->bo_flag &= ~BO_ONWORKLST; } VI_UNLOCK(vp); return (0); } /* * Check if vnode represents a disk device */ int vn_isdisk(struct vnode *vp, int *errp) { int error; error = 0; dev_lock(); if (vp->v_type != VCHR) error = ENOTBLK; else if (vp->v_rdev == NULL) error = ENXIO; else if (vp->v_rdev->si_devsw == NULL) error = ENXIO; else if (!(vp->v_rdev->si_devsw->d_flags & D_DISK)) error = ENOTBLK; dev_unlock(); if (errp != NULL) *errp = error; return (error == 0); } /* * Common filesystem object access control check routine. Accepts a * vnode's type, "mode", uid and gid, requested access mode, credentials, * and optional call-by-reference privused argument allowing vaccess() * to indicate to the caller whether privilege was used to satisfy the * request (obsoleted). Returns 0 on success, or an errno on failure. * * The ifdef'd CAPABILITIES version is here for reference, but is not * actually used. */ int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid, gid_t file_gid, mode_t acc_mode, struct ucred *cred, int *privused) { mode_t dac_granted; mode_t priv_granted; /* * Look for a normal, non-privileged way to access the file/directory * as requested. If it exists, go with that. */ if (privused != NULL) *privused = 0; dac_granted = 0; /* Check the owner. */ if (cred->cr_uid == file_uid) { dac_granted |= VADMIN; if (file_mode & S_IXUSR) dac_granted |= VEXEC; if (file_mode & S_IRUSR) dac_granted |= VREAD; if (file_mode & S_IWUSR) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); goto privcheck; } /* Otherwise, check the groups (first match) */ if (groupmember(file_gid, cred)) { if (file_mode & S_IXGRP) dac_granted |= VEXEC; if (file_mode & S_IRGRP) dac_granted |= VREAD; if (file_mode & S_IWGRP) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); goto privcheck; } /* Otherwise, check everyone else. */ if (file_mode & S_IXOTH) dac_granted |= VEXEC; if (file_mode & S_IROTH) dac_granted |= VREAD; if (file_mode & S_IWOTH) dac_granted |= (VWRITE | VAPPEND); if ((acc_mode & dac_granted) == acc_mode) return (0); privcheck: /* * Build a privilege mask to determine if the set of privileges * satisfies the requirements when combined with the granted mask * from above. For each privilege, if the privilege is required, * bitwise or the request type onto the priv_granted mask. */ priv_granted = 0; if (type == VDIR) { /* * For directories, use PRIV_VFS_LOOKUP to satisfy VEXEC * requests, instead of PRIV_VFS_EXEC. */ if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_LOOKUP, 0)) priv_granted |= VEXEC; } else { if ((acc_mode & VEXEC) && ((dac_granted & VEXEC) == 0) && !priv_check_cred(cred, PRIV_VFS_EXEC, 0)) priv_granted |= VEXEC; } if ((acc_mode & VREAD) && ((dac_granted & VREAD) == 0) && !priv_check_cred(cred, PRIV_VFS_READ, 0)) priv_granted |= VREAD; if ((acc_mode & VWRITE) && ((dac_granted & VWRITE) == 0) && !priv_check_cred(cred, PRIV_VFS_WRITE, 0)) priv_granted |= (VWRITE | VAPPEND); if ((acc_mode & VADMIN) && ((dac_granted & VADMIN) == 0) && !priv_check_cred(cred, PRIV_VFS_ADMIN, 0)) priv_granted |= VADMIN; if ((acc_mode & (priv_granted | dac_granted)) == acc_mode) { /* XXX audit: privilege used */ if (privused != NULL) *privused = 1; return (0); } return ((acc_mode & VADMIN) ? EPERM : EACCES); } /* * Credential check based on process requesting service, and per-attribute * permissions. */ int extattr_check_cred(struct vnode *vp, int attrnamespace, struct ucred *cred, struct thread *td, int access) { /* * Kernel-invoked always succeeds. */ if (cred == NOCRED) return (0); /* * Do not allow privileged processes in jail to directly manipulate * system attributes. */ switch (attrnamespace) { case EXTATTR_NAMESPACE_SYSTEM: /* Potentially should be: return (EPERM); */ return (priv_check_cred(cred, PRIV_VFS_EXTATTR_SYSTEM, 0)); case EXTATTR_NAMESPACE_USER: return (VOP_ACCESS(vp, access, cred, td)); default: return (EPERM); } } #ifdef DEBUG_VFS_LOCKS /* * This only exists to supress warnings from unlocked specfs accesses. It is * no longer ok to have an unlocked VFS. */ #define IGNORE_LOCK(vp) ((vp)->v_type == VCHR || (vp)->v_type == VBAD) int vfs_badlock_ddb = 1; /* Drop into debugger on violation. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_ddb, CTLFLAG_RW, &vfs_badlock_ddb, 0, ""); int vfs_badlock_mutex = 1; /* Check for interlock across VOPs. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_mutex, CTLFLAG_RW, &vfs_badlock_mutex, 0, ""); int vfs_badlock_print = 1; /* Print lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_print, CTLFLAG_RW, &vfs_badlock_print, 0, ""); #ifdef KDB int vfs_badlock_backtrace = 1; /* Print backtrace at lock violations. */ SYSCTL_INT(_debug, OID_AUTO, vfs_badlock_backtrace, CTLFLAG_RW, &vfs_badlock_backtrace, 0, ""); #endif static void vfs_badlock(const char *msg, const char *str, struct vnode *vp) { #ifdef KDB if (vfs_badlock_backtrace) kdb_backtrace(); #endif if (vfs_badlock_print) printf("%s: %p %s\n", str, (void *)vp, msg); if (vfs_badlock_ddb) - kdb_enter("lock violation"); + kdb_enter_why(KDB_WHY_VFSLOCK, "lock violation"); } void assert_vi_locked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && !mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is not locked but should be", str, vp); } void assert_vi_unlocked(struct vnode *vp, const char *str) { if (vfs_badlock_mutex && mtx_owned(VI_MTX(vp))) vfs_badlock("interlock is locked but should not be", str, vp); } void assert_vop_locked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, NULL) == 0) vfs_badlock("is not locked but should be", str, vp); } void assert_vop_unlocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE) vfs_badlock("is locked but should not be", str, vp); } void assert_vop_elocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_EXCLUSIVE) vfs_badlock("is not exclusive locked but should be", str, vp); } #if 0 void assert_vop_elocked_other(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_EXCLOTHER) vfs_badlock("is not exclusive locked by another thread", str, vp); } void assert_vop_slocked(struct vnode *vp, const char *str) { if (vp && !IGNORE_LOCK(vp) && VOP_ISLOCKED(vp, curthread) != LK_SHARED) vfs_badlock("is not locked shared but should be", str, vp); } #endif /* 0 */ #endif /* DEBUG_VFS_LOCKS */ void vop_rename_pre(void *ap) { struct vop_rename_args *a = ap; #ifdef DEBUG_VFS_LOCKS if (a->a_tvp) ASSERT_VI_UNLOCKED(a->a_tvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_tdvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fvp, "VOP_RENAME"); ASSERT_VI_UNLOCKED(a->a_fdvp, "VOP_RENAME"); /* Check the source (from). */ if (a->a_tdvp != a->a_fdvp && a->a_tvp != a->a_fdvp) ASSERT_VOP_UNLOCKED(a->a_fdvp, "vop_rename: fdvp locked"); if (a->a_tvp != a->a_fvp) ASSERT_VOP_UNLOCKED(a->a_fvp, "vop_rename: fvp locked"); /* Check the target. */ if (a->a_tvp) ASSERT_VOP_LOCKED(a->a_tvp, "vop_rename: tvp not locked"); ASSERT_VOP_LOCKED(a->a_tdvp, "vop_rename: tdvp not locked"); #endif if (a->a_tdvp != a->a_fdvp) vhold(a->a_fdvp); if (a->a_tvp != a->a_fvp) vhold(a->a_fvp); vhold(a->a_tdvp); if (a->a_tvp) vhold(a->a_tvp); } void vop_strategy_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_strategy_args *a; struct buf *bp; a = ap; bp = a->a_bp; /* * Cluster ops lock their component buffers but not the IO container. */ if ((bp->b_flags & B_CLUSTER) != 0) return; if (BUF_REFCNT(bp) < 1) { if (vfs_badlock_print) printf( "VOP_STRATEGY: bp is not locked but should be\n"); if (vfs_badlock_ddb) - kdb_enter("lock violation"); + kdb_enter_why(KDB_WHY_VFSLOCK, "lock violation"); } #endif } void vop_lookup_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; a = ap; dvp = a->a_dvp; ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); #endif } void vop_lookup_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lookup_args *a; struct vnode *dvp; struct vnode *vp; a = ap; dvp = a->a_dvp; vp = *(a->a_vpp); ASSERT_VI_UNLOCKED(dvp, "VOP_LOOKUP"); ASSERT_VOP_LOCKED(dvp, "VOP_LOOKUP"); if (!rc) ASSERT_VOP_LOCKED(vp, "VOP_LOOKUP (child)"); #endif } void vop_lock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; if ((a->a_flags & LK_INTERLOCK) == 0) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); else ASSERT_VI_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_lock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_lock1_args *a = ap; ASSERT_VI_UNLOCKED(a->a_vp, "VOP_LOCK"); if (rc == 0) ASSERT_VOP_LOCKED(a->a_vp, "VOP_LOCK"); #endif } void vop_unlock_pre(void *ap) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_LOCKED(a->a_vp, "VOP_UNLOCK"); ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_unlock_post(void *ap, int rc) { #ifdef DEBUG_VFS_LOCKS struct vop_unlock_args *a = ap; if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); #endif } void vop_create_post(void *ap, int rc) { struct vop_create_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_link_post(void *ap, int rc) { struct vop_link_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_vp, NOTE_LINK); VFS_KNOTE_LOCKED(a->a_tdvp, NOTE_WRITE); } } void vop_mkdir_post(void *ap, int rc) { struct vop_mkdir_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); } void vop_mknod_post(void *ap, int rc) { struct vop_mknod_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } void vop_remove_post(void *ap, int rc) { struct vop_remove_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_rename_post(void *ap, int rc) { struct vop_rename_args *a = ap; if (!rc) { VFS_KNOTE_UNLOCKED(a->a_fdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_tdvp, NOTE_WRITE); VFS_KNOTE_UNLOCKED(a->a_fvp, NOTE_RENAME); if (a->a_tvp) VFS_KNOTE_UNLOCKED(a->a_tvp, NOTE_DELETE); } if (a->a_tdvp != a->a_fdvp) vdrop(a->a_fdvp); if (a->a_tvp != a->a_fvp) vdrop(a->a_fvp); vdrop(a->a_tdvp); if (a->a_tvp) vdrop(a->a_tvp); } void vop_rmdir_post(void *ap, int rc) { struct vop_rmdir_args *a = ap; if (!rc) { VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE | NOTE_LINK); VFS_KNOTE_LOCKED(a->a_vp, NOTE_DELETE); } } void vop_setattr_post(void *ap, int rc) { struct vop_setattr_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_vp, NOTE_ATTRIB); } void vop_symlink_post(void *ap, int rc) { struct vop_symlink_args *a = ap; if (!rc) VFS_KNOTE_LOCKED(a->a_dvp, NOTE_WRITE); } static struct knlist fs_knlist; static void vfs_event_init(void *arg) { knlist_init(&fs_knlist, NULL, NULL, NULL, NULL); } /* XXX - correct order? */ SYSINIT(vfs_knlist, SI_SUB_VFS, SI_ORDER_ANY, vfs_event_init, NULL); void vfs_event_signal(fsid_t *fsid, u_int32_t event, intptr_t data __unused) { KNOTE_UNLOCKED(&fs_knlist, event); } static int filt_fsattach(struct knote *kn); static void filt_fsdetach(struct knote *kn); static int filt_fsevent(struct knote *kn, long hint); struct filterops fs_filtops = { 0, filt_fsattach, filt_fsdetach, filt_fsevent }; static int filt_fsattach(struct knote *kn) { kn->kn_flags |= EV_CLEAR; knlist_add(&fs_knlist, kn, 0); return (0); } static void filt_fsdetach(struct knote *kn) { knlist_remove(&fs_knlist, kn, 0); } static int filt_fsevent(struct knote *kn, long hint) { kn->kn_fflags |= hint; return (kn->kn_fflags != 0); } static int sysctl_vfs_ctl(SYSCTL_HANDLER_ARGS) { struct vfsidctl vc; int error; struct mount *mp; error = SYSCTL_IN(req, &vc, sizeof(vc)); if (error) return (error); if (vc.vc_vers != VFS_CTL_VERS1) return (EINVAL); mp = vfs_getvfs(&vc.vc_fsid); if (mp == NULL) return (ENOENT); /* ensure that a specific sysctl goes to the right filesystem. */ if (strcmp(vc.vc_fstypename, "*") != 0 && strcmp(vc.vc_fstypename, mp->mnt_vfc->vfc_name) != 0) { vfs_rel(mp); return (EINVAL); } VCTLTOREQ(&vc, req); error = VFS_SYSCTL(mp, vc.vc_op, req); vfs_rel(mp); return (error); } SYSCTL_PROC(_vfs, OID_AUTO, ctl, CTLFLAG_WR, NULL, 0, sysctl_vfs_ctl, "", "Sysctl by fsid"); /* * Function to initialize a va_filerev field sensibly. * XXX: Wouldn't a random number make a lot more sense ?? */ u_quad_t init_va_filerev(void) { struct bintime bt; getbinuptime(&bt); return (((u_quad_t)bt.sec << 32LL) | (bt.frac >> 32LL)); } static int filt_vfsread(struct knote *kn, long hint); static int filt_vfswrite(struct knote *kn, long hint); static int filt_vfsvnode(struct knote *kn, long hint); static void filt_vfsdetach(struct knote *kn); static struct filterops vfsread_filtops = { 1, NULL, filt_vfsdetach, filt_vfsread }; static struct filterops vfswrite_filtops = { 1, NULL, filt_vfsdetach, filt_vfswrite }; static struct filterops vfsvnode_filtops = { 1, NULL, filt_vfsdetach, filt_vfsvnode }; static void vfs_knllock(void *arg) { struct vnode *vp = arg; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); } static void vfs_knlunlock(void *arg) { struct vnode *vp = arg; VOP_UNLOCK(vp, 0, curthread); } static int vfs_knllocked(void *arg) { struct vnode *vp = arg; return (VOP_ISLOCKED(vp, curthread) == LK_EXCLUSIVE); } int vfs_kqfilter(struct vop_kqfilter_args *ap) { struct vnode *vp = ap->a_vp; struct knote *kn = ap->a_kn; struct knlist *knl; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &vfsread_filtops; break; case EVFILT_WRITE: kn->kn_fop = &vfswrite_filtops; break; case EVFILT_VNODE: kn->kn_fop = &vfsvnode_filtops; break; default: return (EINVAL); } kn->kn_hook = (caddr_t)vp; if (vp->v_pollinfo == NULL) v_addpollinfo(vp); if (vp->v_pollinfo == NULL) return (ENOMEM); knl = &vp->v_pollinfo->vpi_selinfo.si_note; knlist_add(knl, kn, 0); return (0); } /* * Detach knote from vnode */ static void filt_vfsdetach(struct knote *kn) { struct vnode *vp = (struct vnode *)kn->kn_hook; KASSERT(vp->v_pollinfo != NULL, ("Missing v_pollinfo")); knlist_remove(&vp->v_pollinfo->vpi_selinfo.si_note, kn, 0); } /*ARGSUSED*/ static int filt_vfsread(struct knote *kn, long hint) { struct vnode *vp = (struct vnode *)kn->kn_hook; struct vattr va; /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) { kn->kn_flags |= (EV_EOF | EV_ONESHOT); return (1); } if (VOP_GETATTR(vp, &va, curthread->td_ucred, curthread)) return (0); kn->kn_data = va.va_size - kn->kn_fp->f_offset; return (kn->kn_data != 0); } /*ARGSUSED*/ static int filt_vfswrite(struct knote *kn, long hint) { /* * filesystem is gone, so set the EOF flag and schedule * the knote for deletion. */ if (hint == NOTE_REVOKE) kn->kn_flags |= (EV_EOF | EV_ONESHOT); kn->kn_data = 0; return (1); } static int filt_vfsvnode(struct knote *kn, long hint) { if (kn->kn_sfflags & hint) kn->kn_fflags |= hint; if (hint == NOTE_REVOKE) { kn->kn_flags |= EV_EOF; return (1); } return (kn->kn_fflags != 0); } int vfs_read_dirent(struct vop_readdir_args *ap, struct dirent *dp, off_t off) { int error; if (dp->d_reclen > ap->a_uio->uio_resid) return (ENAMETOOLONG); error = uiomove(dp, dp->d_reclen, ap->a_uio); if (error) { if (ap->a_ncookies != NULL) { if (ap->a_cookies != NULL) free(ap->a_cookies, M_TEMP); ap->a_cookies = NULL; *ap->a_ncookies = 0; } return (error); } if (ap->a_ncookies == NULL) return (0); KASSERT(ap->a_cookies, ("NULL ap->a_cookies value with non-NULL ap->a_ncookies!")); *ap->a_cookies = realloc(*ap->a_cookies, (*ap->a_ncookies + 1) * sizeof(u_long), M_TEMP, M_WAITOK | M_ZERO); (*ap->a_cookies)[*ap->a_ncookies] = off; return (0); } /* * Mark for update the access time of the file if the filesystem * supports VA_MARK_ATIME. This functionality is used by execve * and mmap, so we want to avoid the synchronous I/O implied by * directly setting va_atime for the sake of efficiency. */ void vfs_mark_atime(struct vnode *vp, struct thread *td) { struct vattr atimeattr; if ((vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) { VATTR_NULL(&atimeattr); atimeattr.va_vaflags |= VA_MARK_ATIME; (void)VOP_SETATTR(vp, &atimeattr, td->td_ucred, td); } } Index: stable/7/sys/netgraph/ng_base.c =================================================================== --- stable/7/sys/netgraph/ng_base.c (revision 177733) +++ stable/7/sys/netgraph/ng_base.c (revision 177734) @@ -1,3870 +1,3870 @@ /* * ng_base.c */ /*- * Copyright (c) 1996-1999 Whistle Communications, Inc. * All rights reserved. * * Subject to the following obligations and disclaimer of warranty, use and * redistribution of this software, in source or object code forms, with or * without modifications are expressly permitted by Whistle Communications; * provided, however, that: * 1. Any and all reproductions of the source or object code must include the * copyright notice above and the following disclaimer of warranties; and * 2. No rights are granted, in any manner or form, to use Whistle * Communications, Inc. trademarks, including the mark "WHISTLE * COMMUNICATIONS" on advertising, endorsements, or otherwise except as * such appears in the above copyright notice or in the software. * * THIS SOFTWARE IS BEING PROVIDED BY WHISTLE COMMUNICATIONS "AS IS", AND * TO THE MAXIMUM EXTENT PERMITTED BY LAW, WHISTLE COMMUNICATIONS MAKES NO * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, REGARDING THIS SOFTWARE, * INCLUDING WITHOUT LIMITATION, ANY AND ALL IMPLIED WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT. * WHISTLE COMMUNICATIONS DOES NOT WARRANT, GUARANTEE, OR MAKE ANY * REPRESENTATIONS REGARDING THE USE OF, OR THE RESULTS OF THE USE OF THIS * SOFTWARE IN TERMS OF ITS CORRECTNESS, ACCURACY, RELIABILITY OR OTHERWISE. * IN NO EVENT SHALL WHISTLE COMMUNICATIONS BE LIABLE FOR ANY DAMAGES * RESULTING FROM OR ARISING OUT OF ANY USE OF THIS SOFTWARE, INCLUDING * WITHOUT LIMITATION, ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * PUNITIVE, OR CONSEQUENTIAL DAMAGES, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES, LOSS OF USE, DATA OR PROFITS, HOWEVER CAUSED AND UNDER ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF WHISTLE COMMUNICATIONS IS ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. * * Authors: Julian Elischer * Archie Cobbs * * $FreeBSD$ * $Whistle: ng_base.c,v 1.39 1999/01/28 23:54:53 julian Exp $ */ /* * This file implements the base netgraph code. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(netgraph, NG_ABI_VERSION); /* Mutex to protect topology events. */ static struct mtx ng_topo_mtx; #ifdef NETGRAPH_DEBUG static struct mtx ng_nodelist_mtx; /* protects global node/hook lists */ static struct mtx ngq_mtx; /* protects the queue item list */ static SLIST_HEAD(, ng_node) ng_allnodes; static LIST_HEAD(, ng_node) ng_freenodes; /* in debug, we never free() them */ static SLIST_HEAD(, ng_hook) ng_allhooks; static LIST_HEAD(, ng_hook) ng_freehooks; /* in debug, we never free() them */ static void ng_dumpitems(void); static void ng_dumpnodes(void); static void ng_dumphooks(void); #endif /* NETGRAPH_DEBUG */ /* * DEAD versions of the structures. * In order to avoid races, it is sometimes neccesary to point * at SOMETHING even though theoretically, the current entity is * INVALID. Use these to avoid these races. */ struct ng_type ng_deadtype = { NG_ABI_VERSION, "dead", NULL, /* modevent */ NULL, /* constructor */ NULL, /* rcvmsg */ NULL, /* shutdown */ NULL, /* newhook */ NULL, /* findhook */ NULL, /* connect */ NULL, /* rcvdata */ NULL, /* disconnect */ NULL, /* cmdlist */ }; struct ng_node ng_deadnode = { "dead", &ng_deadtype, NGF_INVALID, 1, /* refs */ 0, /* numhooks */ NULL, /* private */ 0, /* ID */ LIST_HEAD_INITIALIZER(ng_deadnode.hooks), {}, /* all_nodes list entry */ {}, /* id hashtable list entry */ {}, /* workqueue entry */ { 0, {}, /* should never use! (should hang) */ NULL, &ng_deadnode.nd_input_queue.queue, &ng_deadnode }, #ifdef NETGRAPH_DEBUG ND_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; struct ng_hook ng_deadhook = { "dead", NULL, /* private */ HK_INVALID | HK_DEAD, 1, /* refs always >= 1 */ 0, /* undefined data link type */ &ng_deadhook, /* Peer is self */ &ng_deadnode, /* attached to deadnode */ {}, /* hooks list */ NULL, /* override rcvmsg() */ NULL, /* override rcvdata() */ #ifdef NETGRAPH_DEBUG HK_MAGIC, __FILE__, __LINE__, {NULL} #endif /* NETGRAPH_DEBUG */ }; /* * END DEAD STRUCTURES */ /* List nodes with unallocated work */ static TAILQ_HEAD(, ng_node) ng_worklist = TAILQ_HEAD_INITIALIZER(ng_worklist); static struct mtx ng_worklist_mtx; /* MUST LOCK NODE FIRST */ /* List of installed types */ static LIST_HEAD(, ng_type) ng_typelist; static struct mtx ng_typelist_mtx; /* Hash related definitions */ /* XXX Don't need to initialise them because it's a LIST */ #define NG_ID_HASH_SIZE 128 /* most systems wont need even this many */ static LIST_HEAD(, ng_node) ng_ID_hash[NG_ID_HASH_SIZE]; static struct mtx ng_idhash_mtx; /* Method to find a node.. used twice so do it here */ #define NG_IDHASH_FN(ID) ((ID) % (NG_ID_HASH_SIZE)) #define NG_IDHASH_FIND(ID, node) \ do { \ mtx_assert(&ng_idhash_mtx, MA_OWNED); \ LIST_FOREACH(node, &ng_ID_hash[NG_IDHASH_FN(ID)], \ nd_idnodes) { \ if (NG_NODE_IS_VALID(node) \ && (NG_NODE_ID(node) == ID)) { \ break; \ } \ } \ } while (0) #define NG_NAME_HASH_SIZE 128 /* most systems wont need even this many */ static LIST_HEAD(, ng_node) ng_name_hash[NG_NAME_HASH_SIZE]; static struct mtx ng_namehash_mtx; #define NG_NAMEHASH(NAME, HASH) \ do { \ u_char h = 0; \ const u_char *c; \ for (c = (const u_char*)(NAME); *c; c++)\ h += *c; \ (HASH) = h % (NG_NAME_HASH_SIZE); \ } while (0) /* Internal functions */ static int ng_add_hook(node_p node, const char *name, hook_p * hookp); static int ng_generic_msg(node_p here, item_p item, hook_p lasthook); static ng_ID_t ng_decodeidname(const char *name); static int ngb_mod_event(module_t mod, int event, void *data); static void ng_worklist_remove(node_p node); static void ngintr(void); static int ng_apply_item(node_p node, item_p item, int rw); static void ng_flush_input_queue(struct ng_queue * ngq); static void ng_setisr(node_p node); static node_p ng_ID2noderef(ng_ID_t ID); static int ng_con_nodes(item_p item, node_p node, const char *name, node_p node2, const char *name2); static int ng_con_part2(node_p node, item_p item, hook_p hook); static int ng_con_part3(node_p node, item_p item, hook_p hook); static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type); /* Imported, these used to be externally visible, some may go back. */ void ng_destroy_hook(hook_p hook); node_p ng_name2noderef(node_p node, const char *name); int ng_path2noderef(node_p here, const char *path, node_p *dest, hook_p *lasthook); int ng_make_node(const char *type, node_p *nodepp); int ng_path_parse(char *addr, char **node, char **path, char **hook); void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3); void ng_unname(node_p node); /* Our own netgraph malloc type */ MALLOC_DEFINE(M_NETGRAPH, "netgraph", "netgraph structures and ctrl messages"); MALLOC_DEFINE(M_NETGRAPH_HOOK, "netgraph_hook", "netgraph hook structures"); MALLOC_DEFINE(M_NETGRAPH_NODE, "netgraph_node", "netgraph node structures"); MALLOC_DEFINE(M_NETGRAPH_ITEM, "netgraph_item", "netgraph item structures"); MALLOC_DEFINE(M_NETGRAPH_MSG, "netgraph_msg", "netgraph name storage"); /* Should not be visible outside this file */ #define _NG_ALLOC_HOOK(hook) \ MALLOC(hook, hook_p, sizeof(*hook), M_NETGRAPH_HOOK, M_NOWAIT | M_ZERO) #define _NG_ALLOC_NODE(node) \ MALLOC(node, node_p, sizeof(*node), M_NETGRAPH_NODE, M_NOWAIT | M_ZERO) #define NG_QUEUE_LOCK_INIT(n) \ mtx_init(&(n)->q_mtx, "ng_node", NULL, MTX_DEF) #define NG_QUEUE_LOCK(n) \ mtx_lock(&(n)->q_mtx) #define NG_QUEUE_UNLOCK(n) \ mtx_unlock(&(n)->q_mtx) #define NG_WORKLIST_LOCK_INIT() \ mtx_init(&ng_worklist_mtx, "ng_worklist", NULL, MTX_DEF) #define NG_WORKLIST_LOCK() \ mtx_lock(&ng_worklist_mtx) #define NG_WORKLIST_UNLOCK() \ mtx_unlock(&ng_worklist_mtx) #ifdef NETGRAPH_DEBUG /*----------------------------------------------*/ /* * In debug mode: * In an attempt to help track reference count screwups * we do not free objects back to the malloc system, but keep them * in a local cache where we can examine them and keep information safely * after they have been freed. * We use this scheme for nodes and hooks, and to some extent for items. */ static __inline hook_p ng_alloc_hook(void) { hook_p hook; SLIST_ENTRY(ng_hook) temp; mtx_lock(&ng_nodelist_mtx); hook = LIST_FIRST(&ng_freehooks); if (hook) { LIST_REMOVE(hook, hk_hooks); bcopy(&hook->hk_all, &temp, sizeof(temp)); bzero(hook, sizeof(struct ng_hook)); bcopy(&temp, &hook->hk_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); hook->hk_magic = HK_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_HOOK(hook); if (hook) { hook->hk_magic = HK_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allhooks, hook, hk_all); mtx_unlock(&ng_nodelist_mtx); } } return (hook); } static __inline node_p ng_alloc_node(void) { node_p node; SLIST_ENTRY(ng_node) temp; mtx_lock(&ng_nodelist_mtx); node = LIST_FIRST(&ng_freenodes); if (node) { LIST_REMOVE(node, nd_nodes); bcopy(&node->nd_all, &temp, sizeof(temp)); bzero(node, sizeof(struct ng_node)); bcopy(&temp, &node->nd_all, sizeof(temp)); mtx_unlock(&ng_nodelist_mtx); node->nd_magic = ND_MAGIC; } else { mtx_unlock(&ng_nodelist_mtx); _NG_ALLOC_NODE(node); if (node) { node->nd_magic = ND_MAGIC; mtx_lock(&ng_nodelist_mtx); SLIST_INSERT_HEAD(&ng_allnodes, node, nd_all); mtx_unlock(&ng_nodelist_mtx); } } return (node); } #define NG_ALLOC_HOOK(hook) do { (hook) = ng_alloc_hook(); } while (0) #define NG_ALLOC_NODE(node) do { (node) = ng_alloc_node(); } while (0) #define NG_FREE_HOOK(hook) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freehooks, hook, hk_hooks); \ hook->hk_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #define NG_FREE_NODE(node) \ do { \ mtx_lock(&ng_nodelist_mtx); \ LIST_INSERT_HEAD(&ng_freenodes, node, nd_nodes); \ node->nd_magic = 0; \ mtx_unlock(&ng_nodelist_mtx); \ } while (0) #else /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ #define NG_ALLOC_HOOK(hook) _NG_ALLOC_HOOK(hook) #define NG_ALLOC_NODE(node) _NG_ALLOC_NODE(node) #define NG_FREE_HOOK(hook) do { FREE((hook), M_NETGRAPH_HOOK); } while (0) #define NG_FREE_NODE(node) do { FREE((node), M_NETGRAPH_NODE); } while (0) #endif /* NETGRAPH_DEBUG */ /*----------------------------------------------*/ /* Set this to kdb_enter("X") to catch all errors as they occur */ #ifndef TRAP_ERROR #define TRAP_ERROR() #endif static ng_ID_t nextID = 1; #ifdef INVARIANTS #define CHECK_DATA_MBUF(m) do { \ struct mbuf *n; \ int total; \ \ M_ASSERTPKTHDR(m); \ for (total = 0, n = (m); n != NULL; n = n->m_next) { \ total += n->m_len; \ if (n->m_nextpkt != NULL) \ panic("%s: m_nextpkt", __func__); \ } \ \ if ((m)->m_pkthdr.len != total) { \ panic("%s: %d != %d", \ __func__, (m)->m_pkthdr.len, total); \ } \ } while (0) #else #define CHECK_DATA_MBUF(m) #endif #define ERROUT(x) do { error = (x); goto done; } while (0) /************************************************************************ Parse type definitions for generic messages ************************************************************************/ /* Handy structure parse type defining macro */ #define DEFINE_PARSE_STRUCT_TYPE(lo, up, args) \ static const struct ng_parse_struct_field \ ng_ ## lo ## _type_fields[] = NG_GENERIC_ ## up ## _INFO args; \ static const struct ng_parse_type ng_generic_ ## lo ## _type = { \ &ng_parse_struct_type, \ &ng_ ## lo ## _type_fields \ } DEFINE_PARSE_STRUCT_TYPE(mkpeer, MKPEER, ()); DEFINE_PARSE_STRUCT_TYPE(connect, CONNECT, ()); DEFINE_PARSE_STRUCT_TYPE(name, NAME, ()); DEFINE_PARSE_STRUCT_TYPE(rmhook, RMHOOK, ()); DEFINE_PARSE_STRUCT_TYPE(nodeinfo, NODEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(typeinfo, TYPEINFO, ()); DEFINE_PARSE_STRUCT_TYPE(linkinfo, LINKINFO, (&ng_generic_nodeinfo_type)); /* Get length of an array when the length is stored as a 32 bit value immediately preceding the array -- as with struct namelist and struct typelist. */ static int ng_generic_list_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { return *((const u_int32_t *)(buf - 4)); } /* Get length of the array of struct linkinfo inside a struct hooklist */ static int ng_generic_linkinfo_getLength(const struct ng_parse_type *type, const u_char *start, const u_char *buf) { const struct hooklist *hl = (const struct hooklist *)start; return hl->nodeinfo.hooks; } /* Array type for a variable length array of struct namelist */ static const struct ng_parse_array_info ng_nodeinfoarray_type_info = { &ng_generic_nodeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_nodeinfoarray_type = { &ng_parse_array_type, &ng_nodeinfoarray_type_info }; /* Array type for a variable length array of struct typelist */ static const struct ng_parse_array_info ng_typeinfoarray_type_info = { &ng_generic_typeinfo_type, &ng_generic_list_getLength }; static const struct ng_parse_type ng_generic_typeinfoarray_type = { &ng_parse_array_type, &ng_typeinfoarray_type_info }; /* Array type for array of struct linkinfo in struct hooklist */ static const struct ng_parse_array_info ng_generic_linkinfo_array_type_info = { &ng_generic_linkinfo_type, &ng_generic_linkinfo_getLength }; static const struct ng_parse_type ng_generic_linkinfo_array_type = { &ng_parse_array_type, &ng_generic_linkinfo_array_type_info }; DEFINE_PARSE_STRUCT_TYPE(typelist, TYPELIST, (&ng_generic_nodeinfoarray_type)); DEFINE_PARSE_STRUCT_TYPE(hooklist, HOOKLIST, (&ng_generic_nodeinfo_type, &ng_generic_linkinfo_array_type)); DEFINE_PARSE_STRUCT_TYPE(listnodes, LISTNODES, (&ng_generic_nodeinfoarray_type)); /* List of commands and how to convert arguments to/from ASCII */ static const struct ng_cmdlist ng_generic_cmds[] = { { NGM_GENERIC_COOKIE, NGM_SHUTDOWN, "shutdown", NULL, NULL }, { NGM_GENERIC_COOKIE, NGM_MKPEER, "mkpeer", &ng_generic_mkpeer_type, NULL }, { NGM_GENERIC_COOKIE, NGM_CONNECT, "connect", &ng_generic_connect_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NAME, "name", &ng_generic_name_type, NULL }, { NGM_GENERIC_COOKIE, NGM_RMHOOK, "rmhook", &ng_generic_rmhook_type, NULL }, { NGM_GENERIC_COOKIE, NGM_NODEINFO, "nodeinfo", NULL, &ng_generic_nodeinfo_type }, { NGM_GENERIC_COOKIE, NGM_LISTHOOKS, "listhooks", NULL, &ng_generic_hooklist_type }, { NGM_GENERIC_COOKIE, NGM_LISTNAMES, "listnames", NULL, &ng_generic_listnodes_type /* same as NGM_LISTNODES */ }, { NGM_GENERIC_COOKIE, NGM_LISTNODES, "listnodes", NULL, &ng_generic_listnodes_type }, { NGM_GENERIC_COOKIE, NGM_LISTTYPES, "listtypes", NULL, &ng_generic_typeinfo_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_CONFIG, "textconfig", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_TEXT_STATUS, "textstatus", NULL, &ng_parse_string_type }, { NGM_GENERIC_COOKIE, NGM_ASCII2BINARY, "ascii2binary", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { NGM_GENERIC_COOKIE, NGM_BINARY2ASCII, "binary2ascii", &ng_parse_ng_mesg_type, &ng_parse_ng_mesg_type }, { 0 } }; /************************************************************************ Node routines ************************************************************************/ /* * Instantiate a node of the requested type */ int ng_make_node(const char *typename, node_p *nodepp) { struct ng_type *type; int error; /* Check that the type makes sense */ if (typename == NULL) { TRAP_ERROR(); return (EINVAL); } /* Locate the node type. If we fail we return. Do not try to load * module. */ if ((type = ng_findtype(typename)) == NULL) return (ENXIO); /* * If we have a constructor, then make the node and * call the constructor to do type specific initialisation. */ if (type->constructor != NULL) { if ((error = ng_make_node_common(type, nodepp)) == 0) { if ((error = ((*type->constructor)(*nodepp)) != 0)) { NG_NODE_UNREF(*nodepp); } } } else { /* * Node has no constructor. We cannot ask for one * to be made. It must be brought into existence by * some external agency. The external agency should * call ng_make_node_common() directly to get the * netgraph part initialised. */ TRAP_ERROR(); error = EINVAL; } return (error); } /* * Generic node creation. Called by node initialisation for externally * instantiated nodes (e.g. hardware, sockets, etc ). * The returned node has a reference count of 1. */ int ng_make_node_common(struct ng_type *type, node_p *nodepp) { node_p node; /* Require the node type to have been already installed */ if (ng_findtype(type->name) == NULL) { TRAP_ERROR(); return (EINVAL); } /* Make a node and try attach it to the type */ NG_ALLOC_NODE(node); if (node == NULL) { TRAP_ERROR(); return (ENOMEM); } node->nd_type = type; NG_NODE_REF(node); /* note reference */ type->refs++; NG_QUEUE_LOCK_INIT(&node->nd_input_queue); node->nd_input_queue.queue = NULL; node->nd_input_queue.last = &node->nd_input_queue.queue; node->nd_input_queue.q_flags = 0; node->nd_input_queue.q_node = node; /* Initialize hook list for new node */ LIST_INIT(&node->nd_hooks); /* Link us into the name hash. */ mtx_lock(&ng_namehash_mtx); LIST_INSERT_HEAD(&ng_name_hash[0], node, nd_nodes); mtx_unlock(&ng_namehash_mtx); /* get an ID and put us in the hash chain */ mtx_lock(&ng_idhash_mtx); for (;;) { /* wrap protection, even if silly */ node_p node2 = NULL; node->nd_ID = nextID++; /* 137/second for 1 year before wrap */ /* Is there a problem with the new number? */ NG_IDHASH_FIND(node->nd_ID, node2); /* already taken? */ if ((node->nd_ID != 0) && (node2 == NULL)) { break; } } LIST_INSERT_HEAD(&ng_ID_hash[NG_IDHASH_FN(node->nd_ID)], node, nd_idnodes); mtx_unlock(&ng_idhash_mtx); /* Done */ *nodepp = node; return (0); } /* * Forceably start the shutdown process on a node. Either call * its shutdown method, or do the default shutdown if there is * no type-specific method. * * We can only be called from a shutdown message, so we know we have * a writer lock, and therefore exclusive access. It also means * that we should not be on the work queue, but we check anyhow. * * Persistent node types must have a type-specific method which * allocates a new node in which case, this one is irretrievably going away, * or cleans up anything it needs, and just makes the node valid again, * in which case we allow the node to survive. * * XXX We need to think of how to tell a persistent node that we * REALLY need to go away because the hardware has gone or we * are rebooting.... etc. */ void ng_rmnode(node_p node, hook_p dummy1, void *dummy2, int dummy3) { hook_p hook; /* Check if it's already shutting down */ if ((node->nd_flags & NGF_CLOSING) != 0) return; if (node == &ng_deadnode) { printf ("shutdown called on deadnode\n"); return; } /* Add an extra reference so it doesn't go away during this */ NG_NODE_REF(node); /* * Mark it invalid so any newcomers know not to try use it * Also add our own mark so we can't recurse * note that NGF_INVALID does not do this as it's also set during * creation */ node->nd_flags |= NGF_INVALID|NGF_CLOSING; /* If node has its pre-shutdown method, then call it first*/ if (node->nd_type && node->nd_type->close) (*node->nd_type->close)(node); /* Notify all remaining connected nodes to disconnect */ while ((hook = LIST_FIRST(&node->nd_hooks)) != NULL) ng_destroy_hook(hook); /* * Drain the input queue forceably. * it has no hooks so what's it going to do, bleed on someone? * Theoretically we came here from a queue entry that was added * Just before the queue was closed, so it should be empty anyway. * Also removes us from worklist if needed. */ ng_flush_input_queue(&node->nd_input_queue); /* Ask the type if it has anything to do in this case */ if (node->nd_type && node->nd_type->shutdown) { (*node->nd_type->shutdown)(node); if (NG_NODE_IS_VALID(node)) { /* * Well, blow me down if the node code hasn't declared * that it doesn't want to die. * Presumably it is a persistant node. * If we REALLY want it to go away, * e.g. hardware going away, * Our caller should set NGF_REALLY_DIE in nd_flags. */ node->nd_flags &= ~(NGF_INVALID|NGF_CLOSING); NG_NODE_UNREF(node); /* Assume they still have theirs */ return; } } else { /* do the default thing */ NG_NODE_UNREF(node); } ng_unname(node); /* basically a NOP these days */ /* * Remove extra reference, possibly the last * Possible other holders of references may include * timeout callouts, but theoretically the node's supposed to * have cancelled them. Possibly hardware dependencies may * force a driver to 'linger' with a reference. */ NG_NODE_UNREF(node); } /* * Remove a reference to the node, possibly the last. * deadnode always acts as it it were the last. */ int ng_unref_node(node_p node) { int v; if (node == &ng_deadnode) { return (0); } do { v = node->nd_refs - 1; } while (! atomic_cmpset_int(&node->nd_refs, v + 1, v)); if (v == 0) { /* we were the last */ mtx_lock(&ng_namehash_mtx); node->nd_type->refs--; /* XXX maybe should get types lock? */ LIST_REMOVE(node, nd_nodes); mtx_unlock(&ng_namehash_mtx); mtx_lock(&ng_idhash_mtx); LIST_REMOVE(node, nd_idnodes); mtx_unlock(&ng_idhash_mtx); mtx_destroy(&node->nd_input_queue.q_mtx); NG_FREE_NODE(node); } return (v); } /************************************************************************ Node ID handling ************************************************************************/ static node_p ng_ID2noderef(ng_ID_t ID) { node_p node; mtx_lock(&ng_idhash_mtx); NG_IDHASH_FIND(ID, node); if(node) NG_NODE_REF(node); mtx_unlock(&ng_idhash_mtx); return(node); } ng_ID_t ng_node2ID(node_p node) { return (node ? NG_NODE_ID(node) : 0); } /************************************************************************ Node name handling ************************************************************************/ /* * Assign a node a name. Once assigned, the name cannot be changed. */ int ng_name_node(node_p node, const char *name) { int i, hash; node_p node2; /* Check the name is valid */ for (i = 0; i < NG_NODESIZ; i++) { if (name[i] == '\0' || name[i] == '.' || name[i] == ':') break; } if (i == 0 || name[i] != '\0') { TRAP_ERROR(); return (EINVAL); } if (ng_decodeidname(name) != 0) { /* valid IDs not allowed here */ TRAP_ERROR(); return (EINVAL); } /* Check the name isn't already being used */ if ((node2 = ng_name2noderef(node, name)) != NULL) { NG_NODE_UNREF(node2); TRAP_ERROR(); return (EADDRINUSE); } /* copy it */ strlcpy(NG_NODE_NAME(node), name, NG_NODESIZ); /* Update name hash. */ NG_NAMEHASH(name, hash); mtx_lock(&ng_namehash_mtx); LIST_REMOVE(node, nd_nodes); LIST_INSERT_HEAD(&ng_name_hash[hash], node, nd_nodes); mtx_unlock(&ng_namehash_mtx); return (0); } /* * Find a node by absolute name. The name should NOT end with ':' * The name "." means "this node" and "[xxx]" means "the node * with ID (ie, at address) xxx". * * Returns the node if found, else NULL. * Eventually should add something faster than a sequential search. * Note it acquires a reference on the node so you can be sure it's still * there. */ node_p ng_name2noderef(node_p here, const char *name) { node_p node; ng_ID_t temp; int hash; /* "." means "this node" */ if (strcmp(name, ".") == 0) { NG_NODE_REF(here); return(here); } /* Check for name-by-ID */ if ((temp = ng_decodeidname(name)) != 0) { return (ng_ID2noderef(temp)); } /* Find node by name */ NG_NAMEHASH(name, hash); mtx_lock(&ng_namehash_mtx); LIST_FOREACH(node, &ng_name_hash[hash], nd_nodes) { if (NG_NODE_IS_VALID(node) && (strcmp(NG_NODE_NAME(node), name) == 0)) { break; } } if (node) NG_NODE_REF(node); mtx_unlock(&ng_namehash_mtx); return (node); } /* * Decode an ID name, eg. "[f03034de]". Returns 0 if the * string is not valid, otherwise returns the value. */ static ng_ID_t ng_decodeidname(const char *name) { const int len = strlen(name); char *eptr; u_long val; /* Check for proper length, brackets, no leading junk */ if ((len < 3) || (name[0] != '[') || (name[len - 1] != ']') || (!isxdigit(name[1]))) { return ((ng_ID_t)0); } /* Decode number */ val = strtoul(name + 1, &eptr, 16); if ((eptr - name != len - 1) || (val == ULONG_MAX) || (val == 0)) { return ((ng_ID_t)0); } return (ng_ID_t)val; } /* * Remove a name from a node. This should only be called * when shutting down and removing the node. * IF we allow name changing this may be more resurrected. */ void ng_unname(node_p node) { } /************************************************************************ Hook routines Names are not optional. Hooks are always connected, except for a brief moment within these routines. On invalidation or during creation they are connected to the 'dead' hook. ************************************************************************/ /* * Remove a hook reference */ void ng_unref_hook(hook_p hook) { int v; if (hook == &ng_deadhook) { return; } do { v = hook->hk_refs; } while (! atomic_cmpset_int(&hook->hk_refs, v, v - 1)); if (v == 1) { /* we were the last */ if (_NG_HOOK_NODE(hook)) { /* it'll probably be ng_deadnode */ _NG_NODE_UNREF((_NG_HOOK_NODE(hook))); hook->hk_node = NULL; } NG_FREE_HOOK(hook); } } /* * Add an unconnected hook to a node. Only used internally. * Assumes node is locked. (XXX not yet true ) */ static int ng_add_hook(node_p node, const char *name, hook_p *hookp) { hook_p hook; int error = 0; /* Check that the given name is good */ if (name == NULL) { TRAP_ERROR(); return (EINVAL); } if (ng_findhook(node, name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Allocate the hook and link it up */ NG_ALLOC_HOOK(hook); if (hook == NULL) { TRAP_ERROR(); return (ENOMEM); } hook->hk_refs = 1; /* add a reference for us to return */ hook->hk_flags = HK_INVALID; hook->hk_peer = &ng_deadhook; /* start off this way */ hook->hk_node = node; NG_NODE_REF(node); /* each hook counts as a reference */ /* Set hook name */ strlcpy(NG_HOOK_NAME(hook), name, NG_HOOKSIZ); /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the node. */ if (node->nd_type->newhook != NULL) { if ((error = (*node->nd_type->newhook)(node, hook, name))) { NG_HOOK_UNREF(hook); /* this frees the hook */ return (error); } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ if (hookp) *hookp = hook; return (0); } /* * Find a hook * * Node types may supply their own optimized routines for finding * hooks. If none is supplied, we just do a linear search. * XXX Possibly we should add a reference to the hook? */ hook_p ng_findhook(node_p node, const char *name) { hook_p hook; if (node->nd_type->findhook != NULL) return (*node->nd_type->findhook)(node, name); LIST_FOREACH(hook, &node->nd_hooks, hk_hooks) { if (NG_HOOK_IS_VALID(hook) && (strcmp(NG_HOOK_NAME(hook), name) == 0)) return (hook); } return (NULL); } /* * Destroy a hook * * As hooks are always attached, this really destroys two hooks. * The one given, and the one attached to it. Disconnect the hooks * from each other first. We reconnect the peer hook to the 'dead' * hook so that it can still exist after we depart. We then * send the peer its own destroy message. This ensures that we only * interact with the peer's structures when it is locked processing that * message. We hold a reference to the peer hook so we are guaranteed that * the peer hook and node are still going to exist until * we are finished there as the hook holds a ref on the node. * We run this same code again on the peer hook, but that time it is already * attached to the 'dead' hook. * * This routine is called at all stages of hook creation * on error detection and must be able to handle any such stage. */ void ng_destroy_hook(hook_p hook) { hook_p peer; node_p node; if (hook == &ng_deadhook) { /* better safe than sorry */ printf("ng_destroy_hook called on deadhook\n"); return; } /* * Protect divorce process with mutex, to avoid races on * simultaneous disconnect. */ mtx_lock(&ng_topo_mtx); hook->hk_flags |= HK_INVALID; peer = NG_HOOK_PEER(hook); node = NG_HOOK_NODE(hook); if (peer && (peer != &ng_deadhook)) { /* * Set the peer to point to ng_deadhook * from this moment on we are effectively independent it. * send it an rmhook message of it's own. */ peer->hk_peer = &ng_deadhook; /* They no longer know us */ hook->hk_peer = &ng_deadhook; /* Nor us, them */ if (NG_HOOK_NODE(peer) == &ng_deadnode) { /* * If it's already divorced from a node, * just free it. */ mtx_unlock(&ng_topo_mtx); } else { mtx_unlock(&ng_topo_mtx); ng_rmhook_self(peer); /* Send it a surprise */ } NG_HOOK_UNREF(peer); /* account for peer link */ NG_HOOK_UNREF(hook); /* account for peer link */ } else mtx_unlock(&ng_topo_mtx); mtx_assert(&ng_topo_mtx, MA_NOTOWNED); /* * Remove the hook from the node's list to avoid possible recursion * in case the disconnection results in node shutdown. */ if (node == &ng_deadnode) { /* happens if called from ng_con_nodes() */ return; } LIST_REMOVE(hook, hk_hooks); node->nd_numhooks--; if (node->nd_type->disconnect) { /* * The type handler may elect to destroy the node so don't * trust its existence after this point. (except * that we still hold a reference on it. (which we * inherrited from the hook we are destroying) */ (*node->nd_type->disconnect) (hook); } /* * Note that because we will point to ng_deadnode, the original node * is not decremented automatically so we do that manually. */ _NG_HOOK_NODE(hook) = &ng_deadnode; NG_NODE_UNREF(node); /* We no longer point to it so adjust count */ NG_HOOK_UNREF(hook); /* Account for linkage (in list) to node */ } /* * Take two hooks on a node and merge the connection so that the given node * is effectively bypassed. */ int ng_bypass(hook_p hook1, hook_p hook2) { if (hook1->hk_node != hook2->hk_node) { TRAP_ERROR(); return (EINVAL); } hook1->hk_peer->hk_peer = hook2->hk_peer; hook2->hk_peer->hk_peer = hook1->hk_peer; hook1->hk_peer = &ng_deadhook; hook2->hk_peer = &ng_deadhook; NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); /* XXX If we ever cache methods on hooks update them as well */ ng_destroy_hook(hook1); ng_destroy_hook(hook2); return (0); } /* * Install a new netgraph type */ int ng_newtype(struct ng_type *tp) { const size_t namelen = strlen(tp->name); /* Check version and type name fields */ if ((tp->version != NG_ABI_VERSION) || (namelen == 0) || (namelen >= NG_TYPESIZ)) { TRAP_ERROR(); if (tp->version != NG_ABI_VERSION) { printf("Netgraph: Node type rejected. ABI mismatch. Suggest recompile\n"); } return (EINVAL); } /* Check for name collision */ if (ng_findtype(tp->name) != NULL) { TRAP_ERROR(); return (EEXIST); } /* Link in new type */ mtx_lock(&ng_typelist_mtx); LIST_INSERT_HEAD(&ng_typelist, tp, types); tp->refs = 1; /* first ref is linked list */ mtx_unlock(&ng_typelist_mtx); return (0); } /* * unlink a netgraph type * If no examples exist */ int ng_rmtype(struct ng_type *tp) { /* Check for name collision */ if (tp->refs != 1) { TRAP_ERROR(); return (EBUSY); } /* Unlink type */ mtx_lock(&ng_typelist_mtx); LIST_REMOVE(tp, types); mtx_unlock(&ng_typelist_mtx); return (0); } /* * Look for a type of the name given */ struct ng_type * ng_findtype(const char *typename) { struct ng_type *type; mtx_lock(&ng_typelist_mtx); LIST_FOREACH(type, &ng_typelist, types) { if (strcmp(type->name, typename) == 0) break; } mtx_unlock(&ng_typelist_mtx); return (type); } /************************************************************************ Composite routines ************************************************************************/ /* * Connect two nodes using the specified hooks, using queued functions. */ static int ng_con_part3(node_p node, item_p item, hook_p hook) { int error = 0; /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * We are all set up except for the final call to the node, and * the clearing of the INVALID flag. */ if (NG_HOOK_NODE(hook) == &ng_deadnode) { /* * The node must have been freed again since we last visited * here. ng_destry_hook() has this effect but nothing else does. * We should just release our references and * free anything we can think of. * Since we know it's been destroyed, and it's our caller * that holds the references, just return. */ ERROUT(ENOENT); } if (hook->hk_node->nd_type->connect) { if ((error = (*hook->hk_node->nd_type->connect) (hook))) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part3()\n"); ERROUT(error); } } /* * XXX this is wrong for SMP. Possibly we need * to separate out 'create' and 'invalid' flags. * should only set flags on hooks we have locked under our node. */ hook->hk_flags &= ~HK_INVALID; done: NG_FREE_ITEM(item); return (error); } static int ng_con_part2(node_p node, item_p item, hook_p hook) { hook_p peer; int error = 0; /* * When we run, we know that the node 'node' is locked for us. * Our caller has a reference on the hook. * Our caller has a reference on the node. * (In this case our caller is ng_apply_item() ). * The peer hook has a reference on the hook. * our node pointer points to the 'dead' node. * First check the hook name is unique. * Should not happen because we checked before queueing this. */ if (ng_findhook(node, NG_HOOK_NAME(hook)) != NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); ERROUT(EEXIST); } /* * Check if the node type code has something to say about it * If it fails, the unref of the hook will also unref the attached node, * however since that node is 'ng_deadnode' this will do nothing. * The peer hook will also be destroyed. */ if (node->nd_type->newhook != NULL) { if ((error = (*node->nd_type->newhook)(node, hook, hook->hk_name))) { ng_destroy_hook(hook); /* should destroy peer too */ printf("failed in ng_con_part2()\n"); ERROUT(error); } } /* * The 'type' agrees so far, so go ahead and link it in. * We'll ask again later when we actually connect the hooks. */ hook->hk_node = node; /* just overwrite ng_deadnode */ NG_NODE_REF(node); /* each hook counts as a reference */ LIST_INSERT_HEAD(&node->nd_hooks, hook, hk_hooks); node->nd_numhooks++; NG_HOOK_REF(hook); /* one for the node */ /* * We now have a symmetrical situation, where both hooks have been * linked to their nodes, the newhook methods have been called * And the references are all correct. The hooks are still marked * as invalid, as we have not called the 'connect' methods * yet. * We can call the local one immediately as we have the * node locked, but we need to queue the remote one. */ if (hook->hk_node->nd_type->connect) { if ((error = (*hook->hk_node->nd_type->connect) (hook))) { ng_destroy_hook(hook); /* also zaps peer */ printf("failed in ng_con_part2(A)\n"); ERROUT(error); } } /* * Acquire topo mutex to avoid race with ng_destroy_hook(). */ mtx_lock(&ng_topo_mtx); peer = hook->hk_peer; if (peer == &ng_deadhook) { mtx_unlock(&ng_topo_mtx); printf("failed in ng_con_part2(B)\n"); ng_destroy_hook(hook); ERROUT(ENOENT); } mtx_unlock(&ng_topo_mtx); if ((error = ng_send_fn2(peer->hk_node, peer, item, &ng_con_part3, NULL, 0, NG_REUSE_ITEM))) { printf("failed in ng_con_part2(C)\n"); ng_destroy_hook(hook); /* also zaps peer */ return (error); /* item was consumed. */ } hook->hk_flags &= ~HK_INVALID; /* need both to be able to work */ return (0); /* item was consumed. */ done: NG_FREE_ITEM(item); return (error); } /* * Connect this node with another node. We assume that this node is * currently locked, as we are only called from an NGM_CONNECT message. */ static int ng_con_nodes(item_p item, node_p node, const char *name, node_p node2, const char *name2) { int error; hook_p hook; hook_p hook2; if (ng_findhook(node2, name2) != NULL) { return(EEXIST); } if ((error = ng_add_hook(node, name, &hook))) /* gives us a ref */ return (error); /* Allocate the other hook and link it up */ NG_ALLOC_HOOK(hook2); if (hook2 == NULL) { TRAP_ERROR(); ng_destroy_hook(hook); /* XXX check ref counts so far */ NG_HOOK_UNREF(hook); /* including our ref */ return (ENOMEM); } hook2->hk_refs = 1; /* start with a reference for us. */ hook2->hk_flags = HK_INVALID; hook2->hk_peer = hook; /* Link the two together */ hook->hk_peer = hook2; NG_HOOK_REF(hook); /* Add a ref for the peer to each*/ NG_HOOK_REF(hook2); hook2->hk_node = &ng_deadnode; strlcpy(NG_HOOK_NAME(hook2), name2, NG_HOOKSIZ); /* * Queue the function above. * Procesing continues in that function in the lock context of * the other node. */ if ((error = ng_send_fn2(node2, hook2, item, &ng_con_part2, NULL, 0, NG_NOFLAGS))) { printf("failed in ng_con_nodes(): %d\n", error); ng_destroy_hook(hook); /* also zaps peer */ } NG_HOOK_UNREF(hook); /* Let each hook go if it wants to */ NG_HOOK_UNREF(hook2); return (error); } /* * Make a peer and connect. * We assume that the local node is locked. * The new node probably doesn't need a lock until * it has a hook, because it cannot really have any work until then, * but we should think about it a bit more. * * The problem may come if the other node also fires up * some hardware or a timer or some other source of activation, * also it may already get a command msg via it's ID. * * We could use the same method as ng_con_nodes() but we'd have * to add ability to remove the node when failing. (Not hard, just * make arg1 point to the node to remove). * Unless of course we just ignore failure to connect and leave * an unconnected node? */ static int ng_mkpeer(node_p node, const char *name, const char *name2, char *type) { node_p node2; hook_p hook1, hook2; int error; if ((error = ng_make_node(type, &node2))) { return (error); } if ((error = ng_add_hook(node, name, &hook1))) { /* gives us a ref */ ng_rmnode(node2, NULL, NULL, 0); return (error); } if ((error = ng_add_hook(node2, name2, &hook2))) { ng_rmnode(node2, NULL, NULL, 0); ng_destroy_hook(hook1); NG_HOOK_UNREF(hook1); return (error); } /* * Actually link the two hooks together. */ hook1->hk_peer = hook2; hook2->hk_peer = hook1; /* Each hook is referenced by the other */ NG_HOOK_REF(hook1); NG_HOOK_REF(hook2); /* Give each node the opportunity to veto the pending connection */ if (hook1->hk_node->nd_type->connect) { error = (*hook1->hk_node->nd_type->connect) (hook1); } if ((error == 0) && hook2->hk_node->nd_type->connect) { error = (*hook2->hk_node->nd_type->connect) (hook2); } /* * drop the references we were holding on the two hooks. */ if (error) { ng_destroy_hook(hook2); /* also zaps hook1 */ ng_rmnode(node2, NULL, NULL, 0); } else { /* As a last act, allow the hooks to be used */ hook1->hk_flags &= ~HK_INVALID; hook2->hk_flags &= ~HK_INVALID; } NG_HOOK_UNREF(hook1); NG_HOOK_UNREF(hook2); return (error); } /************************************************************************ Utility routines to send self messages ************************************************************************/ /* Shut this node down as soon as everyone is clear of it */ /* Should add arg "immediately" to jump the queue */ int ng_rmnode_self(node_p node) { int error; if (node == &ng_deadnode) return (0); node->nd_flags |= NGF_INVALID; if (node->nd_flags & NGF_CLOSING) return (0); error = ng_send_fn(node, NULL, &ng_rmnode, NULL, 0); return (error); } static void ng_rmhook_part2(node_p node, hook_p hook, void *arg1, int arg2) { ng_destroy_hook(hook); return ; } int ng_rmhook_self(hook_p hook) { int error; node_p node = NG_HOOK_NODE(hook); if (node == &ng_deadnode) return (0); error = ng_send_fn(node, hook, &ng_rmhook_part2, NULL, 0); return (error); } /*********************************************************************** * Parse and verify a string of the form: * * Such a string can refer to a specific node or a specific hook * on a specific node, depending on how you look at it. In the * latter case, the PATH component must not end in a dot. * * Both and are optional. The is a string * of hook names separated by dots. This breaks out the original * string, setting *nodep to "NODE" (or NULL if none) and *pathp * to "PATH" (or NULL if degenerate). Also, *hookp will point to * the final hook component of , if any, otherwise NULL. * * This returns -1 if the path is malformed. The char ** are optional. ***********************************************************************/ int ng_path_parse(char *addr, char **nodep, char **pathp, char **hookp) { char *node, *path, *hook; int k; /* * Extract absolute NODE, if any */ for (path = addr; *path && *path != ':'; path++); if (*path) { node = addr; /* Here's the NODE */ *path++ = '\0'; /* Here's the PATH */ /* Node name must not be empty */ if (!*node) return -1; /* A name of "." is OK; otherwise '.' not allowed */ if (strcmp(node, ".") != 0) { for (k = 0; node[k]; k++) if (node[k] == '.') return -1; } } else { node = NULL; /* No absolute NODE */ path = addr; /* Here's the PATH */ } /* Snoop for illegal characters in PATH */ for (k = 0; path[k]; k++) if (path[k] == ':') return -1; /* Check for no repeated dots in PATH */ for (k = 0; path[k]; k++) if (path[k] == '.' && path[k + 1] == '.') return -1; /* Remove extra (degenerate) dots from beginning or end of PATH */ if (path[0] == '.') path++; if (*path && path[strlen(path) - 1] == '.') path[strlen(path) - 1] = 0; /* If PATH has a dot, then we're not talking about a hook */ if (*path) { for (hook = path, k = 0; path[k]; k++) if (path[k] == '.') { hook = NULL; break; } } else path = hook = NULL; /* Done */ if (nodep) *nodep = node; if (pathp) *pathp = path; if (hookp) *hookp = hook; return (0); } /* * Given a path, which may be absolute or relative, and a starting node, * return the destination node. */ int ng_path2noderef(node_p here, const char *address, node_p *destp, hook_p *lasthook) { char fullpath[NG_PATHSIZ]; char *nodename, *path, pbuf[2]; node_p node, oldnode; char *cp; hook_p hook = NULL; /* Initialize */ if (destp == NULL) { TRAP_ERROR(); return EINVAL; } *destp = NULL; /* Make a writable copy of address for ng_path_parse() */ strncpy(fullpath, address, sizeof(fullpath) - 1); fullpath[sizeof(fullpath) - 1] = '\0'; /* Parse out node and sequence of hooks */ if (ng_path_parse(fullpath, &nodename, &path, NULL) < 0) { TRAP_ERROR(); return EINVAL; } if (path == NULL) { pbuf[0] = '.'; /* Needs to be writable */ pbuf[1] = '\0'; path = pbuf; } /* * For an absolute address, jump to the starting node. * Note that this holds a reference on the node for us. * Don't forget to drop the reference if we don't need it. */ if (nodename) { node = ng_name2noderef(here, nodename); if (node == NULL) { TRAP_ERROR(); return (ENOENT); } } else { if (here == NULL) { TRAP_ERROR(); return (EINVAL); } node = here; NG_NODE_REF(node); } /* * Now follow the sequence of hooks * XXX * We actually cannot guarantee that the sequence * is not being demolished as we crawl along it * without extra-ordinary locking etc. * So this is a bit dodgy to say the least. * We can probably hold up some things by holding * the nodelist mutex for the time of this * crawl if we wanted.. At least that way we wouldn't have to * worry about the nodes disappearing, but the hooks would still * be a problem. */ for (cp = path; node != NULL && *cp != '\0'; ) { char *segment; /* * Break out the next path segment. Replace the dot we just * found with a NUL; "cp" points to the next segment (or the * NUL at the end). */ for (segment = cp; *cp != '\0'; cp++) { if (*cp == '.') { *cp++ = '\0'; break; } } /* Empty segment */ if (*segment == '\0') continue; /* We have a segment, so look for a hook by that name */ hook = ng_findhook(node, segment); /* Can't get there from here... */ if (hook == NULL || NG_HOOK_PEER(hook) == NULL || NG_HOOK_NOT_VALID(hook) || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))) { TRAP_ERROR(); NG_NODE_UNREF(node); #if 0 printf("hooknotvalid %s %s %d %d %d %d ", path, segment, hook == NULL, NG_HOOK_PEER(hook) == NULL, NG_HOOK_NOT_VALID(hook), NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook))); #endif return (ENOENT); } /* * Hop on over to the next node * XXX * Big race conditions here as hooks and nodes go away * *** Idea.. store an ng_ID_t in each hook and use that * instead of the direct hook in this crawl? */ oldnode = node; if ((node = NG_PEER_NODE(hook))) NG_NODE_REF(node); /* XXX RACE */ NG_NODE_UNREF(oldnode); /* XXX another race */ if (NG_NODE_NOT_VALID(node)) { NG_NODE_UNREF(node); /* XXX more races */ node = NULL; } } /* If node somehow missing, fail here (probably this is not needed) */ if (node == NULL) { TRAP_ERROR(); return (ENXIO); } /* Done */ *destp = node; if (lasthook != NULL) *lasthook = (hook ? NG_HOOK_PEER(hook) : NULL); return (0); } /***************************************************************\ * Input queue handling. * All activities are submitted to the node via the input queue * which implements a multiple-reader/single-writer gate. * Items which cannot be handled immediately are queued. * * read-write queue locking inline functions * \***************************************************************/ static __inline item_p ng_dequeue(struct ng_queue * ngq, int *rw); static __inline item_p ng_acquire_read(struct ng_queue * ngq, item_p item); static __inline item_p ng_acquire_write(struct ng_queue * ngq, item_p item); static __inline void ng_leave_read(struct ng_queue * ngq); static __inline void ng_leave_write(struct ng_queue * ngq); static __inline void ng_queue_rw(struct ng_queue * ngq, item_p item, int rw); /* * Definition of the bits fields in the ng_queue flag word. * Defined here rather than in netgraph.h because no-one should fiddle * with them. * * The ordering here may be important! don't shuffle these. */ /*- Safety Barrier--------+ (adjustable to suit taste) (not used yet) | V +-------+-------+-------+-------+-------+-------+-------+-------+ | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |A|c|t|i|v|e| |R|e|a|d|e|r| |C|o|u|n|t| | | | | | | | | |P|A| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |O|W| +-------+-------+-------+-------+-------+-------+-------+-------+ \___________________________ ____________________________/ | | V | | [active reader count] | | | | Operation Pending -------------------------------+ | | Active Writer ---------------------------------------+ */ #define WRITER_ACTIVE 0x00000001 #define OP_PENDING 0x00000002 #define READER_INCREMENT 0x00000004 #define READER_MASK 0xfffffffc /* Not valid if WRITER_ACTIVE is set */ #define SAFETY_BARRIER 0x00100000 /* 128K items queued should be enough */ /* Defines of more elaborate states on the queue */ /* Mask of bits a new read cares about */ #define NGQ_RMASK (WRITER_ACTIVE|OP_PENDING) /* Mask of bits a new write cares about */ #define NGQ_WMASK (NGQ_RMASK|READER_MASK) /* Test to decide if there is something on the queue. */ #define QUEUE_ACTIVE(QP) ((QP)->q_flags & OP_PENDING) /* How to decide what the next queued item is. */ #define HEAD_IS_READER(QP) NGI_QUEUED_READER((QP)->queue) #define HEAD_IS_WRITER(QP) NGI_QUEUED_WRITER((QP)->queue) /* notused */ /* Read the status to decide if the next item on the queue can now run. */ #define QUEUED_READER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_RMASK & ~OP_PENDING)) == 0) #define QUEUED_WRITER_CAN_PROCEED(QP) \ (((QP)->q_flags & (NGQ_WMASK & ~OP_PENDING)) == 0) /* Is there a chance of getting ANY work off the queue? */ #define NEXT_QUEUED_ITEM_CAN_PROCEED(QP) \ (QUEUE_ACTIVE(QP) && \ ((HEAD_IS_READER(QP)) ? QUEUED_READER_CAN_PROCEED(QP) : \ QUEUED_WRITER_CAN_PROCEED(QP))) #define NGQRW_R 0 #define NGQRW_W 1 /* * Taking into account the current state of the queue and node, possibly take * the next entry off the queue and return it. Return NULL if there was * nothing we could return, either because there really was nothing there, or * because the node was in a state where it cannot yet process the next item * on the queue. * * This MUST MUST MUST be called with the mutex held. */ static __inline item_p ng_dequeue(struct ng_queue *ngq, int *rw) { item_p item; u_int add_arg; mtx_assert(&ngq->q_mtx, MA_OWNED); /* * If there is nothing queued, then just return. * No point in continuing. * XXXGL: assert this? */ if (!QUEUE_ACTIVE(ngq)) { CTR4(KTR_NET, "%20s: node [%x] (%p) queue empty; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * From here, we can assume there is a head item. * We need to find out what it is and if it can be dequeued, given * the current state of the node. */ if (HEAD_IS_READER(ngq)) { if (!QUEUED_READER_CAN_PROCEED(ngq)) { /* * It's a reader but we can't use it. * We are stalled so make sure we don't * get called again until something changes. */ ng_worklist_remove(ngq->q_node); CTR4(KTR_NET, "%20s: node [%x] (%p) queued reader " "can't proceed; queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * Head of queue is a reader and we have no write active. * We don't care how many readers are already active. * Add the correct increment for the reader count. */ add_arg = READER_INCREMENT; *rw = NGQRW_R; } else if (QUEUED_WRITER_CAN_PROCEED(ngq)) { /* * There is a pending write, no readers and no active writer. * This means we can go ahead with the pending writer. Note * the fact that we now have a writer, ready for when we take * it off the queue. * * We don't need to worry about a possible collision with the * fasttrack reader. * * The fasttrack thread may take a long time to discover that we * are running so we would have an inconsistent state in the * flags for a while. Since we ignore the reader count * entirely when the WRITER_ACTIVE flag is set, this should * not matter (in fact it is defined that way). If it tests * the flag before this operation, the OP_PENDING flag * will make it fail, and if it tests it later, the * WRITER_ACTIVE flag will do the same. If it is SO slow that * we have actually completed the operation, and neither flag * is set by the time that it tests the flags, then it is * actually ok for it to continue. If it completes and we've * finished and the read pending is set it still fails. * * So we can just ignore it, as long as we can ensure that the * transition from WRITE_PENDING state to the WRITER_ACTIVE * state is atomic. * * After failing, first it will be held back by the mutex, then * when it can proceed, it will queue its request, then it * would arrive at this function. Usually it will have to * leave empty handed because the ACTIVE WRITER bit will be * set. * * Adjust the flags for the new active writer. */ add_arg = WRITER_ACTIVE; *rw = NGQRW_W; /* * We want to write "active writer, no readers " Now go make * it true. In fact there may be a number in the readers * count but we know it is not true and will be fixed soon. * We will fix the flags for the next pending entry in a * moment. */ } else { /* * We can't dequeue anything.. return and say so. Probably we * have a write pending and the readers count is non zero. If * we got here because a reader hit us just at the wrong * moment with the fasttrack code, and put us in a strange * state, then it will be coming through in just a moment, * (just as soon as we release the mutex) and keep things * moving. * Make sure we remove ourselves from the work queue. It * would be a waste of effort to do all this again. */ ng_worklist_remove(ngq->q_node); CTR4(KTR_NET, "%20s: node [%x] (%p) can't dequeue anything; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, ngq->q_flags); return (NULL); } /* * Now we dequeue the request (whatever it may be) and correct the * pending flags and the next and last pointers. */ item = ngq->queue; ngq->queue = item->el_next; CTR6(KTR_NET, "%20s: node [%x] (%p) dequeued item %p with flags 0x%lx; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID,ngq->q_node, item, item->el_flags, ngq->q_flags); if (ngq->last == &(item->el_next)) { /* * that was the last entry in the queue so set the 'last * pointer up correctly and make sure the pending flag is * clear. */ add_arg += -OP_PENDING; ngq->last = &(ngq->queue); /* * Whatever flag was set will be cleared and * the new acive field will be set by the add as well, * so we don't need to change add_arg. * But we know we don't need to be on the work list. */ atomic_add_long(&ngq->q_flags, add_arg); ng_worklist_remove(ngq->q_node); } else { /* * Since there is still something on the queue * we don't need to change the PENDING flag. */ atomic_add_long(&ngq->q_flags, add_arg); /* * If we see more doable work, make sure we are * on the work queue. */ if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) { ng_setisr(ngq->q_node); } } CTR6(KTR_NET, "%20s: node [%x] (%p) returning item %p as %s; " "queue flags 0x%lx", __func__, ngq->q_node->nd_ID, ngq->q_node, item, *rw ? "WRITER" : "READER" , ngq->q_flags); return (item); } /* * Queue a packet to be picked up by someone else. * We really don't care who, but we can't or don't want to hang around * to process it ourselves. We are probably an interrupt routine.. * If the queue could be run, flag the netisr handler to start. */ static __inline void ng_queue_rw(struct ng_queue * ngq, item_p item, int rw) { mtx_assert(&ngq->q_mtx, MA_OWNED); if (rw == NGQRW_W) NGI_SET_WRITER(item); else NGI_SET_READER(item); item->el_next = NULL; /* maybe not needed */ *ngq->last = item; CTR5(KTR_NET, "%20s: node [%x] (%p) queued item %p as %s", __func__, ngq->q_node->nd_ID, ngq->q_node, item, rw ? "WRITER" : "READER" ); /* * If it was the first item in the queue then we need to * set the last pointer and the type flags. */ if (ngq->last == &(ngq->queue)) { atomic_add_long(&ngq->q_flags, OP_PENDING); CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__, ngq->q_node->nd_ID, ngq->q_node); } ngq->last = &(item->el_next); /* * We can take the worklist lock with the node locked * BUT NOT THE REVERSE! */ if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); } /* * This function 'cheats' in that it first tries to 'grab' the use of the * node, without going through the mutex. We can do this becasue of the * semantics of the lock. The semantics include a clause that says that the * value of the readers count is invalid if the WRITER_ACTIVE flag is set. It * also says that the WRITER_ACTIVE flag cannot be set if the readers count * is not zero. Note that this talks about what is valid to SET the * WRITER_ACTIVE flag, because from the moment it is set, the value if the * reader count is immaterial, and not valid. The two 'pending' flags have a * similar effect, in that If they are orthogonal to the two active fields in * how they are set, but if either is set, the attempted 'grab' need to be * backed out because there is earlier work, and we maintain ordering in the * queue. The result of this is that the reader request can try obtain use of * the node with only a single atomic addition, and without any of the mutex * overhead. If this fails the operation degenerates to the same as for other * cases. * */ static __inline item_p ng_acquire_read(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); /* ######### Hack alert ######### */ atomic_add_long(&ngq->q_flags, READER_INCREMENT); if ((ngq->q_flags & NGQ_RMASK) == 0) { /* Successfully grabbed node */ CTR4(KTR_NET, "%20s: node [%x] (%p) fast acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* undo the damage if we didn't succeed */ atomic_subtract_long(&ngq->q_flags, READER_INCREMENT); /* ######### End Hack alert ######### */ NG_QUEUE_LOCK(ngq); /* * Try again. Another processor (or interrupt for that matter) may * have removed the last queued item that was stopping us from * running, between the previous test, and the moment that we took * the mutex. (Or maybe a writer completed.) * Even if another fast-track reader hits during this period * we don't care as multiple readers is OK. */ if ((ngq->q_flags & NGQ_RMASK) == 0) { atomic_add_long(&ngq->q_flags, READER_INCREMENT); NG_QUEUE_UNLOCK(ngq); CTR4(KTR_NET, "%20s: node [%x] (%p) slow acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* * and queue the request for later. */ ng_queue_rw(ngq, item, NGQRW_R); NG_QUEUE_UNLOCK(ngq); return (NULL); } static __inline item_p ng_acquire_write(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); restart: NG_QUEUE_LOCK(ngq); /* * If there are no readers, no writer, and no pending packets, then * we can just go ahead. In all other situations we need to queue the * request */ if ((ngq->q_flags & NGQ_WMASK) == 0) { /* collision could happen *HERE* */ atomic_add_long(&ngq->q_flags, WRITER_ACTIVE); NG_QUEUE_UNLOCK(ngq); if (ngq->q_flags & READER_MASK) { /* Collision with fast-track reader */ atomic_subtract_long(&ngq->q_flags, WRITER_ACTIVE); goto restart; } CTR4(KTR_NET, "%20s: node [%x] (%p) acquired item %p", __func__, ngq->q_node->nd_ID, ngq->q_node, item); return (item); } /* * and queue the request for later. */ ng_queue_rw(ngq, item, NGQRW_W); NG_QUEUE_UNLOCK(ngq); return (NULL); } #if 0 static __inline item_p ng_upgrade_write(struct ng_queue *ngq, item_p item) { KASSERT(ngq != &ng_deadnode.nd_input_queue, ("%s: working on deadnode", __func__)); NGI_SET_WRITER(item); mtx_lock_spin(&(ngq->q_mtx)); /* * There will never be no readers as we are there ourselves. * Set the WRITER_ACTIVE flags ASAP to block out fast track readers. * The caller we are running from will call ng_leave_read() * soon, so we must account for that. We must leave again with the * READER lock. If we find other readers, then * queue the request for later. However "later" may be rignt now * if there are no readers. We don't really care if there are queued * items as we will bypass them anyhow. */ atomic_add_long(&ngq->q_flags, WRITER_ACTIVE - READER_INCREMENT); if (ngq->q_flags & (NGQ_WMASK & ~OP_PENDING) == WRITER_ACTIVE) { mtx_unlock_spin(&(ngq->q_mtx)); /* It's just us, act on the item. */ /* will NOT drop writer lock when done */ ng_apply_item(node, item, 0); /* * Having acted on the item, atomically * down grade back to READER and finish up */ atomic_add_long(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); /* Our caller will call ng_leave_read() */ return; } /* * It's not just us active, so queue us AT THE HEAD. * "Why?" I hear you ask. * Put us at the head of the queue as we've already been * through it once. If there is nothing else waiting, * set the correct flags. */ if ((item->el_next = ngq->queue) == NULL) { /* * Set up the "last" pointer. * We are the only (and thus last) item */ ngq->last = &(item->el_next); /* We've gone from, 0 to 1 item in the queue */ atomic_add_long(&ngq->q_flags, OP_PENDING); CTR3(KTR_NET, "%20s: node [%x] (%p) set OP_PENDING", __func__, ngq->q_node->nd_ID, ngq->q_node); }; ngq->queue = item; CTR5(KTR_NET, "%20s: node [%x] (%p) requeued item %p as WRITER", __func__, ngq->q_node->nd_ID, ngq->q_node, item ); /* Reverse what we did above. That downgrades us back to reader */ atomic_add_long(&ngq->q_flags, READER_INCREMENT - WRITER_ACTIVE); if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); mtx_unlock_spin(&(ngq->q_mtx)); return; } #endif static __inline void ng_leave_read(struct ng_queue *ngq) { atomic_subtract_long(&ngq->q_flags, READER_INCREMENT); } static __inline void ng_leave_write(struct ng_queue *ngq) { atomic_subtract_long(&ngq->q_flags, WRITER_ACTIVE); } static void ng_flush_input_queue(struct ng_queue * ngq) { item_p item; NG_QUEUE_LOCK(ngq); while (ngq->queue) { item = ngq->queue; ngq->queue = item->el_next; if (ngq->last == &(item->el_next)) { ngq->last = &(ngq->queue); atomic_add_long(&ngq->q_flags, -OP_PENDING); } NG_QUEUE_UNLOCK(ngq); /* If the item is supplying a callback, call it with an error */ if (item->apply != NULL) { if (item->depth == 1) item->apply->error = ENOENT; if (refcount_release(&item->apply->refs)) { (*item->apply->apply)(item->apply->context, item->apply->error); } } NG_FREE_ITEM(item); NG_QUEUE_LOCK(ngq); } /* * Take us off the work queue if we are there. * We definately have no work to be done. */ ng_worklist_remove(ngq->q_node); NG_QUEUE_UNLOCK(ngq); } /*********************************************************************** * Externally visible method for sending or queueing messages or data. ***********************************************************************/ /* * The module code should have filled out the item correctly by this stage: * Common: * reference to destination node. * Reference to destination rcv hook if relevant. * apply pointer must be or NULL or reference valid struct ng_apply_info. * Data: * pointer to mbuf * Control_Message: * pointer to msg. * ID of original sender node. (return address) * Function: * Function pointer * void * argument * integer argument * * The nodes have several routines and macros to help with this task: */ int ng_snd_item(item_p item, int flags) { hook_p hook; node_p node; int queue, rw; struct ng_queue *ngq; int error = 0; /* We are sending item, so it must be present! */ KASSERT(item != NULL, ("ng_snd_item: item is NULL")); #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif /* Item was sent once more, postpone apply() call. */ if (item->apply) refcount_acquire(&item->apply->refs); node = NGI_NODE(item); /* Node is never optional. */ KASSERT(node != NULL, ("ng_snd_item: node is NULL")); hook = NGI_HOOK(item); /* Valid hook and mbuf are mandatory for data. */ if ((item->el_flags & NGQF_TYPE) == NGQF_DATA) { KASSERT(hook != NULL, ("ng_snd_item: hook for data is NULL")); if (NGI_M(item) == NULL) ERROUT(EINVAL); CHECK_DATA_MBUF(NGI_M(item)); } /* * If the item or the node specifies single threading, force * writer semantics. Similarly, the node may say one hook always * produces writers. These are overrides. */ if (((item->el_flags & NGQF_RW) == NGQF_WRITER) || (node->nd_flags & NGF_FORCE_WRITER) || (hook && (hook->hk_flags & HK_FORCE_WRITER))) { rw = NGQRW_W; } else { rw = NGQRW_R; } /* * If sender or receiver requests queued delivery or stack usage * level is dangerous - enqueue message. */ if ((flags & NG_QUEUE) || (hook && (hook->hk_flags & HK_QUEUE))) { queue = 1; } else { queue = 0; #ifdef GET_STACK_USAGE /* * Most of netgraph nodes have small stack consumption and * for them 25% of free stack space is more than enough. * Nodes/hooks with higher stack usage should be marked as * HI_STACK. For them 50% of stack will be guaranteed then. * XXX: Values 25% and 50% are completely empirical. */ size_t st, su, sl; GET_STACK_USAGE(st, su); sl = st - su; if ((sl * 4 < st) || ((sl * 2 < st) && ((node->nd_flags & NGF_HI_STACK) || (hook && (hook->hk_flags & HK_HI_STACK))))) { queue = 1; } #endif } ngq = &node->nd_input_queue; if (queue) { /* Put it on the queue for that node*/ #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif item->depth = 1; NG_QUEUE_LOCK(ngq); ng_queue_rw(ngq, item, rw); NG_QUEUE_UNLOCK(ngq); return ((flags & NG_PROGRESS) ? EINPROGRESS : 0); } /* * We already decided how we will be queueud or treated. * Try get the appropriate operating permission. */ if (rw == NGQRW_R) item = ng_acquire_read(ngq, item); else item = ng_acquire_write(ngq, item); /* Item was queued while trying to get permission. */ if (item == NULL) return ((flags & NG_PROGRESS) ? EINPROGRESS : 0); #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif NGI_GET_NODE(item, node); /* zaps stored node */ item->depth++; error = ng_apply_item(node, item, rw); /* drops r/w lock when done */ /* * If the node goes away when we remove the reference, * whatever we just did caused it.. whatever we do, DO NOT * access the node again! */ if (NG_NODE_UNREF(node) == 0) return (error); NG_QUEUE_LOCK(ngq); if (NEXT_QUEUED_ITEM_CAN_PROCEED(ngq)) ng_setisr(ngq->q_node); NG_QUEUE_UNLOCK(ngq); return (error); done: /* If was not sent, apply callback here. */ if (item->apply != NULL) { if (item->depth == 0 && error != 0) item->apply->error = error; if (refcount_release(&item->apply->refs)) { (*item->apply->apply)(item->apply->context, item->apply->error); } } NG_FREE_ITEM(item); return (error); } /* * We have an item that was possibly queued somewhere. * It should contain all the information needed * to run it on the appropriate node/hook. * If there is apply pointer and we own the last reference, call apply(). */ static int ng_apply_item(node_p node, item_p item, int rw) { hook_p hook; ng_rcvdata_t *rcvdata; ng_rcvmsg_t *rcvmsg; struct ng_apply_info *apply; int error = 0, depth; /* Node and item are never optional. */ KASSERT(node != NULL, ("ng_apply_item: node is NULL")); KASSERT(item != NULL, ("ng_apply_item: item is NULL")); NGI_GET_HOOK(item, hook); /* clears stored hook */ #ifdef NETGRAPH_DEBUG _ngi_check(item, __FILE__, __LINE__); #endif apply = item->apply; depth = item->depth; switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: /* * Check things are still ok as when we were queued. */ KASSERT(hook != NULL, ("ng_apply_item: hook for data is NULL")); if (NG_HOOK_NOT_VALID(hook) || NG_NODE_NOT_VALID(node)) { error = EIO; NG_FREE_ITEM(item); break; } /* * If no receive method, just silently drop it. * Give preference to the hook over-ride method */ if ((!(rcvdata = hook->hk_rcvdata)) && (!(rcvdata = NG_HOOK_NODE(hook)->nd_type->rcvdata))) { error = 0; NG_FREE_ITEM(item); break; } error = (*rcvdata)(hook, item); break; case NGQF_MESG: if (hook && NG_HOOK_NOT_VALID(hook)) { /* * The hook has been zapped then we can't use it. * Immediately drop its reference. * The message may not need it. */ NG_HOOK_UNREF(hook); hook = NULL; } /* * Similarly, if the node is a zombie there is * nothing we can do with it, drop everything. */ if (NG_NODE_NOT_VALID(node)) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } /* * Call the appropriate message handler for the object. * It is up to the message handler to free the message. * If it's a generic message, handle it generically, * otherwise call the type's message handler (if it exists). * XXX (race). Remember that a queued message may * reference a node or hook that has just been * invalidated. It will exist as the queue code * is holding a reference, but.. */ if ((NGI_MSG(item)->header.typecookie == NGM_GENERIC_COOKIE) && ((NGI_MSG(item)->header.flags & NGF_RESP) == 0)) { error = ng_generic_msg(node, item, hook); break; } if (((!hook) || (!(rcvmsg = hook->hk_rcvmsg))) && (!(rcvmsg = node->nd_type->rcvmsg))) { TRAP_ERROR(); error = 0; NG_FREE_ITEM(item); break; } error = (*rcvmsg)(node, item, hook); break; case NGQF_FN: case NGQF_FN2: /* * We have to implicitly trust the hook, * as some of these are used for system purposes * where the hook is invalid. In the case of * the shutdown message we allow it to hit * even if the node is invalid. */ if ((NG_NODE_NOT_VALID(node)) && (NGI_FN(item) != &ng_rmnode)) { TRAP_ERROR(); error = EINVAL; NG_FREE_ITEM(item); break; } if ((item->el_flags & NGQF_TYPE) == NGQF_FN) { (*NGI_FN(item))(node, hook, NGI_ARG1(item), NGI_ARG2(item)); NG_FREE_ITEM(item); } else /* it is NGQF_FN2 */ error = (*NGI_FN2(item))(node, item, hook); break; } /* * We held references on some of the resources * that we took from the item. Now that we have * finished doing everything, drop those references. */ if (hook) NG_HOOK_UNREF(hook); if (rw == NGQRW_R) ng_leave_read(&node->nd_input_queue); else ng_leave_write(&node->nd_input_queue); /* Apply callback. */ if (apply != NULL) { if (depth == 1 && error != 0) apply->error = error; if (refcount_release(&apply->refs)) (*apply->apply)(apply->context, apply->error); } return (error); } /*********************************************************************** * Implement the 'generic' control messages ***********************************************************************/ static int ng_generic_msg(node_p here, item_p item, hook_p lasthook) { int error = 0; struct ng_mesg *msg; struct ng_mesg *resp = NULL; NGI_GET_MSG(item, msg); if (msg->header.typecookie != NGM_GENERIC_COOKIE) { TRAP_ERROR(); error = EINVAL; goto out; } switch (msg->header.cmd) { case NGM_SHUTDOWN: ng_rmnode(here, NULL, NULL, 0); break; case NGM_MKPEER: { struct ngm_mkpeer *const mkp = (struct ngm_mkpeer *) msg->data; if (msg->header.arglen != sizeof(*mkp)) { TRAP_ERROR(); error = EINVAL; break; } mkp->type[sizeof(mkp->type) - 1] = '\0'; mkp->ourhook[sizeof(mkp->ourhook) - 1] = '\0'; mkp->peerhook[sizeof(mkp->peerhook) - 1] = '\0'; error = ng_mkpeer(here, mkp->ourhook, mkp->peerhook, mkp->type); break; } case NGM_CONNECT: { struct ngm_connect *const con = (struct ngm_connect *) msg->data; node_p node2; if (msg->header.arglen != sizeof(*con)) { TRAP_ERROR(); error = EINVAL; break; } con->path[sizeof(con->path) - 1] = '\0'; con->ourhook[sizeof(con->ourhook) - 1] = '\0'; con->peerhook[sizeof(con->peerhook) - 1] = '\0'; /* Don't forget we get a reference.. */ error = ng_path2noderef(here, con->path, &node2, NULL); if (error) break; error = ng_con_nodes(item, here, con->ourhook, node2, con->peerhook); NG_NODE_UNREF(node2); break; } case NGM_NAME: { struct ngm_name *const nam = (struct ngm_name *) msg->data; if (msg->header.arglen != sizeof(*nam)) { TRAP_ERROR(); error = EINVAL; break; } nam->name[sizeof(nam->name) - 1] = '\0'; error = ng_name_node(here, nam->name); break; } case NGM_RMHOOK: { struct ngm_rmhook *const rmh = (struct ngm_rmhook *) msg->data; hook_p hook; if (msg->header.arglen != sizeof(*rmh)) { TRAP_ERROR(); error = EINVAL; break; } rmh->ourhook[sizeof(rmh->ourhook) - 1] = '\0'; if ((hook = ng_findhook(here, rmh->ourhook)) != NULL) ng_destroy_hook(hook); break; } case NGM_NODEINFO: { struct nodeinfo *ni; NG_MKRESPONSE(resp, msg, sizeof(*ni), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } /* Fill in node info */ ni = (struct nodeinfo *) resp->data; if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); ni->hooks = here->nd_numhooks; break; } case NGM_LISTHOOKS: { const int nhooks = here->nd_numhooks; struct hooklist *hl; struct nodeinfo *ni; hook_p hook; /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*hl) + (nhooks * sizeof(struct linkinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } hl = (struct hooklist *) resp->data; ni = &hl->nodeinfo; /* Fill in node info */ if (NG_NODE_HAS_NAME(here)) strcpy(ni->name, NG_NODE_NAME(here)); strcpy(ni->type, here->nd_type->name); ni->id = ng_node2ID(here); /* Cycle through the linked list of hooks */ ni->hooks = 0; LIST_FOREACH(hook, &here->nd_hooks, hk_hooks) { struct linkinfo *const link = &hl->link[ni->hooks]; if (ni->hooks >= nhooks) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "hooks"); break; } if (NG_HOOK_NOT_VALID(hook)) continue; strcpy(link->ourhook, NG_HOOK_NAME(hook)); strcpy(link->peerhook, NG_PEER_HOOK_NAME(hook)); if (NG_PEER_NODE_NAME(hook)[0] != '\0') strcpy(link->nodeinfo.name, NG_PEER_NODE_NAME(hook)); strcpy(link->nodeinfo.type, NG_PEER_NODE(hook)->nd_type->name); link->nodeinfo.id = ng_node2ID(NG_PEER_NODE(hook)); link->nodeinfo.hooks = NG_PEER_NODE(hook)->nd_numhooks; ni->hooks++; } break; } case NGM_LISTNAMES: case NGM_LISTNODES: { const int unnamed = (msg->header.cmd == NGM_LISTNODES); struct namelist *nl; node_p node; int num = 0, i; mtx_lock(&ng_namehash_mtx); /* Count number of nodes */ for (i = 0; i < NG_NAME_HASH_SIZE; i++) { LIST_FOREACH(node, &ng_name_hash[i], nd_nodes) { if (NG_NODE_IS_VALID(node) && (unnamed || NG_NODE_HAS_NAME(node))) { num++; } } } mtx_unlock(&ng_namehash_mtx); /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*nl) + (num * sizeof(struct nodeinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } nl = (struct namelist *) resp->data; /* Cycle through the linked list of nodes */ nl->numnames = 0; mtx_lock(&ng_namehash_mtx); for (i = 0; i < NG_NAME_HASH_SIZE; i++) { LIST_FOREACH(node, &ng_name_hash[i], nd_nodes) { struct nodeinfo *const np = &nl->nodeinfo[nl->numnames]; if (NG_NODE_NOT_VALID(node)) continue; if (!unnamed && (! NG_NODE_HAS_NAME(node))) continue; if (nl->numnames >= num) { log(LOG_ERR, "%s: number of nodes changed\n", __func__); break; } if (NG_NODE_HAS_NAME(node)) strcpy(np->name, NG_NODE_NAME(node)); strcpy(np->type, node->nd_type->name); np->id = ng_node2ID(node); np->hooks = node->nd_numhooks; nl->numnames++; } } mtx_unlock(&ng_namehash_mtx); break; } case NGM_LISTTYPES: { struct typelist *tl; struct ng_type *type; int num = 0; mtx_lock(&ng_typelist_mtx); /* Count number of types */ LIST_FOREACH(type, &ng_typelist, types) { num++; } mtx_unlock(&ng_typelist_mtx); /* Get response struct */ NG_MKRESPONSE(resp, msg, sizeof(*tl) + (num * sizeof(struct typeinfo)), M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } tl = (struct typelist *) resp->data; /* Cycle through the linked list of types */ tl->numtypes = 0; mtx_lock(&ng_typelist_mtx); LIST_FOREACH(type, &ng_typelist, types) { struct typeinfo *const tp = &tl->typeinfo[tl->numtypes]; if (tl->numtypes >= num) { log(LOG_ERR, "%s: number of %s changed\n", __func__, "types"); break; } strcpy(tp->type_name, type->name); tp->numnodes = type->refs - 1; /* don't count list */ tl->numtypes++; } mtx_unlock(&ng_typelist_mtx); break; } case NGM_BINARY2ASCII: { int bufSize = 20 * 1024; /* XXX hard coded constant */ const struct ng_parse_type *argstype; const struct ng_cmdlist *c; struct ng_mesg *binary, *ascii; /* Data area must contain a valid netgraph message */ binary = (struct ng_mesg *)msg->data; if (msg->header.arglen < sizeof(struct ng_mesg) || (msg->header.arglen - sizeof(struct ng_mesg) < binary->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*ascii) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } ascii = (struct ng_mesg *)resp->data; /* Copy binary message header to response message payload */ bcopy(binary, ascii, sizeof(*binary)); /* Find command by matching typecookie and command number */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (binary->header.typecookie == c->cookie && binary->header.cmd == c->cmd) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to ASCII */ snprintf(ascii->header.cmdstr, sizeof(ascii->header.cmdstr), "%s", c->name); /* Convert command arguments to ASCII */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { *ascii->data = '\0'; } else { if ((error = ng_unparse(argstype, (u_char *)binary->data, ascii->data, bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result as struct ng_mesg plus ASCII string */ bufSize = strlen(ascii->data) + 1; ascii->header.arglen = bufSize; resp->header.arglen = sizeof(*ascii) + bufSize; break; } case NGM_ASCII2BINARY: { int bufSize = 2000; /* XXX hard coded constant */ const struct ng_cmdlist *c; const struct ng_parse_type *argstype; struct ng_mesg *ascii, *binary; int off = 0; /* Data area must contain at least a struct ng_mesg + '\0' */ ascii = (struct ng_mesg *)msg->data; if ((msg->header.arglen < sizeof(*ascii) + 1) || (ascii->header.arglen < 1) || (msg->header.arglen < sizeof(*ascii) + ascii->header.arglen)) { TRAP_ERROR(); error = EINVAL; break; } ascii->data[ascii->header.arglen - 1] = '\0'; /* Get a response message with lots of room */ NG_MKRESPONSE(resp, msg, sizeof(*binary) + bufSize, M_NOWAIT); if (resp == NULL) { error = ENOMEM; break; } binary = (struct ng_mesg *)resp->data; /* Copy ASCII message header to response message payload */ bcopy(ascii, binary, sizeof(*ascii)); /* Find command by matching ASCII command string */ for (c = here->nd_type->cmdlist; c != NULL && c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c == NULL || c->name == NULL) { for (c = ng_generic_cmds; c->name != NULL; c++) { if (strcmp(ascii->header.cmdstr, c->name) == 0) break; } if (c->name == NULL) { NG_FREE_MSG(resp); error = ENOSYS; break; } } /* Convert command name to binary */ binary->header.cmd = c->cmd; binary->header.typecookie = c->cookie; /* Convert command arguments to binary */ argstype = (binary->header.flags & NGF_RESP) ? c->respType : c->mesgType; if (argstype == NULL) { bufSize = 0; } else { if ((error = ng_parse(argstype, ascii->data, &off, (u_char *)binary->data, &bufSize)) != 0) { NG_FREE_MSG(resp); break; } } /* Return the result */ binary->header.arglen = bufSize; resp->header.arglen = sizeof(*binary) + bufSize; break; } case NGM_TEXT_CONFIG: case NGM_TEXT_STATUS: /* * This one is tricky as it passes the command down to the * actual node, even though it is a generic type command. * This means we must assume that the item/msg is already freed * when control passes back to us. */ if (here->nd_type->rcvmsg != NULL) { NGI_MSG(item) = msg; /* put it back as we found it */ return((*here->nd_type->rcvmsg)(here, item, lasthook)); } /* Fall through if rcvmsg not supported */ default: TRAP_ERROR(); error = EINVAL; } /* * Sometimes a generic message may be statically allocated * to avoid problems with allocating when in tight memeory situations. * Don't free it if it is so. * I break them appart here, because erros may cause a free if the item * in which case we'd be doing it twice. * they are kept together above, to simplify freeing. */ out: NG_RESPOND_MSG(error, here, item, resp); if (msg) NG_FREE_MSG(msg); return (error); } /************************************************************************ Queue element get/free routines ************************************************************************/ uma_zone_t ng_qzone; static int maxalloc = 4096;/* limit the damage of a leak */ static int maxdata = 512; /* limit the damage of a DoS */ static int useddata = 0; TUNABLE_INT("net.graph.maxalloc", &maxalloc); SYSCTL_INT(_net_graph, OID_AUTO, maxalloc, CTLFLAG_RDTUN, &maxalloc, 0, "Maximum number of queue items to allocate"); TUNABLE_INT("net.graph.maxdata", &maxdata); SYSCTL_INT(_net_graph, OID_AUTO, maxdata, CTLFLAG_RW | CTLFLAG_TUN, &maxdata, 0, "Maximum number of queue data items to allocate"); #ifdef NETGRAPH_DEBUG static TAILQ_HEAD(, ng_item) ng_itemlist = TAILQ_HEAD_INITIALIZER(ng_itemlist); static int allocated; /* number of items malloc'd */ #endif /* * Get a queue entry. * This is usually called when a packet first enters netgraph. * By definition, this is usually from an interrupt, or from a user. * Users are not so important, but try be quick for the times that it's * an interrupt. */ static __inline item_p ng_getqblk(int flags) { item_p item = NULL; int wait; wait = (flags & NG_WAITOK) ? M_WAITOK : M_NOWAIT; item = uma_zalloc(ng_qzone, wait | M_ZERO); #ifdef NETGRAPH_DEBUG if (item) { mtx_lock(&ngq_mtx); TAILQ_INSERT_TAIL(&ng_itemlist, item, all); allocated++; mtx_unlock(&ngq_mtx); } #endif return (item); } /* * Release a queue entry */ void ng_free_item(item_p item) { /* * The item may hold resources on it's own. We need to free * these before we can free the item. What they are depends upon * what kind of item it is. it is important that nodes zero * out pointers to resources that they remove from the item * or we release them again here. */ switch (item->el_flags & NGQF_TYPE) { case NGQF_DATA: atomic_subtract_int(&useddata, 1); /* If we have an mbuf still attached.. */ NG_FREE_M(_NGI_M(item)); break; case NGQF_MESG: _NGI_RETADDR(item) = 0; NG_FREE_MSG(_NGI_MSG(item)); break; case NGQF_FN: case NGQF_FN2: /* nothing to free really, */ _NGI_FN(item) = NULL; _NGI_ARG1(item) = NULL; _NGI_ARG2(item) = 0; break; } /* If we still have a node or hook referenced... */ _NGI_CLR_NODE(item); _NGI_CLR_HOOK(item); #ifdef NETGRAPH_DEBUG mtx_lock(&ngq_mtx); TAILQ_REMOVE(&ng_itemlist, item, all); allocated--; mtx_unlock(&ngq_mtx); #endif uma_zfree(ng_qzone, item); } /************************************************************************ Module routines ************************************************************************/ /* * Handle the loading/unloading of a netgraph node type module */ int ng_mod_event(module_t mod, int event, void *data) { struct ng_type *const type = data; int s, error = 0; switch (event) { case MOD_LOAD: /* Register new netgraph node type */ s = splnet(); if ((error = ng_newtype(type)) != 0) { splx(s); break; } /* Call type specific code */ if (type->mod_event != NULL) if ((error = (*type->mod_event)(mod, event, data))) { mtx_lock(&ng_typelist_mtx); type->refs--; /* undo it */ LIST_REMOVE(type, types); mtx_unlock(&ng_typelist_mtx); } splx(s); break; case MOD_UNLOAD: s = splnet(); if (type->refs > 1) { /* make sure no nodes exist! */ error = EBUSY; } else { if (type->refs == 0) { /* failed load, nothing to undo */ splx(s); break; } if (type->mod_event != NULL) { /* check with type */ error = (*type->mod_event)(mod, event, data); if (error != 0) { /* type refuses.. */ splx(s); break; } } mtx_lock(&ng_typelist_mtx); LIST_REMOVE(type, types); mtx_unlock(&ng_typelist_mtx); } splx(s); break; default: if (type->mod_event != NULL) error = (*type->mod_event)(mod, event, data); else error = EOPNOTSUPP; /* XXX ? */ break; } return (error); } /* * Handle loading and unloading for this code. * The only thing we need to link into is the NETISR strucure. */ static int ngb_mod_event(module_t mod, int event, void *data) { int error = 0; switch (event) { case MOD_LOAD: /* Initialize everything. */ NG_WORKLIST_LOCK_INIT(); mtx_init(&ng_typelist_mtx, "netgraph types mutex", NULL, MTX_DEF); mtx_init(&ng_idhash_mtx, "netgraph idhash mutex", NULL, MTX_DEF); mtx_init(&ng_namehash_mtx, "netgraph namehash mutex", NULL, MTX_DEF); mtx_init(&ng_topo_mtx, "netgraph topology mutex", NULL, MTX_DEF); #ifdef NETGRAPH_DEBUG mtx_init(&ng_nodelist_mtx, "netgraph nodelist mutex", NULL, MTX_DEF); mtx_init(&ngq_mtx, "netgraph item list mutex", NULL, MTX_DEF); #endif ng_qzone = uma_zcreate("NetGraph items", sizeof(struct ng_item), NULL, NULL, NULL, NULL, UMA_ALIGN_CACHE, 0); uma_zone_set_max(ng_qzone, maxalloc); netisr_register(NETISR_NETGRAPH, (netisr_t *)ngintr, NULL, NETISR_MPSAFE); break; case MOD_UNLOAD: /* You can't unload it because an interface may be using it. */ error = EBUSY; break; default: error = EOPNOTSUPP; break; } return (error); } static moduledata_t netgraph_mod = { "netgraph", ngb_mod_event, (NULL) }; DECLARE_MODULE(netgraph, netgraph_mod, SI_SUB_NETGRAPH, SI_ORDER_MIDDLE); SYSCTL_NODE(_net, OID_AUTO, graph, CTLFLAG_RW, 0, "netgraph Family"); SYSCTL_INT(_net_graph, OID_AUTO, abi_version, CTLFLAG_RD, 0, NG_ABI_VERSION,""); SYSCTL_INT(_net_graph, OID_AUTO, msg_version, CTLFLAG_RD, 0, NG_VERSION, ""); #ifdef NETGRAPH_DEBUG void dumphook (hook_p hook, char *file, int line) { printf("hook: name %s, %d refs, Last touched:\n", _NG_HOOK_NAME(hook), hook->hk_refs); printf(" Last active @ %s, line %d\n", hook->lastfile, hook->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); } } void dumpnode(node_p node, char *file, int line) { printf("node: ID [%x]: type '%s', %d hooks, flags 0x%x, %d refs, %s:\n", _NG_NODE_ID(node), node->nd_type->name, node->nd_numhooks, node->nd_flags, node->nd_refs, node->nd_name); printf(" Last active @ %s, line %d\n", node->lastfile, node->lastline); if (line) { printf(" problem discovered at file %s, line %d\n", file, line); } } void dumpitem(item_p item, char *file, int line) { printf(" ACTIVE item, last used at %s, line %d", item->lastfile, item->lastline); switch(item->el_flags & NGQF_TYPE) { case NGQF_DATA: printf(" - [data]\n"); break; case NGQF_MESG: printf(" - retaddr[%d]:\n", _NGI_RETADDR(item)); break; case NGQF_FN: printf(" - fn@%p (%p, %p, %p, %d (%x))\n", _NGI_FN(item), _NGI_NODE(item), _NGI_HOOK(item), item->body.fn.fn_arg1, item->body.fn.fn_arg2, item->body.fn.fn_arg2); break; case NGQF_FN2: printf(" - fn2@%p (%p, %p, %p, %d (%x))\n", _NGI_FN2(item), _NGI_NODE(item), _NGI_HOOK(item), item->body.fn.fn_arg1, item->body.fn.fn_arg2, item->body.fn.fn_arg2); break; } if (line) { printf(" problem discovered at file %s, line %d\n", file, line); if (_NGI_NODE(item)) { printf("node %p ([%x])\n", _NGI_NODE(item), ng_node2ID(_NGI_NODE(item))); } } } static void ng_dumpitems(void) { item_p item; int i = 1; TAILQ_FOREACH(item, &ng_itemlist, all) { printf("[%d] ", i++); dumpitem(item, NULL, 0); } } static void ng_dumpnodes(void) { node_p node; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(node, &ng_allnodes, nd_all) { printf("[%d] ", i++); dumpnode(node, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static void ng_dumphooks(void) { hook_p hook; int i = 1; mtx_lock(&ng_nodelist_mtx); SLIST_FOREACH(hook, &ng_allhooks, hk_all) { printf("[%d] ", i++); dumphook(hook, NULL, 0); } mtx_unlock(&ng_nodelist_mtx); } static int sysctl_debug_ng_dump_items(SYSCTL_HANDLER_ARGS) { int error; int val; int i; val = allocated; i = 1; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val == 42) { ng_dumpitems(); ng_dumpnodes(); ng_dumphooks(); } return (0); } SYSCTL_PROC(_debug, OID_AUTO, ng_dump_items, CTLTYPE_INT | CTLFLAG_RW, 0, sizeof(int), sysctl_debug_ng_dump_items, "I", "Number of allocated items"); #endif /* NETGRAPH_DEBUG */ /*********************************************************************** * Worklist routines **********************************************************************/ /* NETISR thread enters here */ /* * Pick a node off the list of nodes with work, * try get an item to process off it. * If there are no more, remove the node from the list. */ static void ngintr(void) { item_p item; node_p node = NULL; for (;;) { NG_WORKLIST_LOCK(); node = TAILQ_FIRST(&ng_worklist); if (!node) { NG_WORKLIST_UNLOCK(); break; } node->nd_flags &= ~NGF_WORKQ; TAILQ_REMOVE(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); CTR3(KTR_NET, "%20s: node [%x] (%p) taken off worklist", __func__, node->nd_ID, node); /* * We have the node. We also take over the reference * that the list had on it. * Now process as much as you can, until it won't * let you have another item off the queue. * All this time, keep the reference * that lets us be sure that the node still exists. * Let the reference go at the last minute. * ng_dequeue will put us back on the worklist * if there is more too do. This may be of use if there * are Multiple Processors and multiple Net threads in the * future. */ for (;;) { int rw; NG_QUEUE_LOCK(&node->nd_input_queue); item = ng_dequeue(&node->nd_input_queue, &rw); if (item == NULL) { NG_QUEUE_UNLOCK(&node->nd_input_queue); break; /* go look for another node */ } else { NG_QUEUE_UNLOCK(&node->nd_input_queue); NGI_GET_NODE(item, node); /* zaps stored node */ ng_apply_item(node, item, rw); NG_NODE_UNREF(node); } } NG_NODE_UNREF(node); } } static void ng_worklist_remove(node_p node) { mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED); NG_WORKLIST_LOCK(); if (node->nd_flags & NGF_WORKQ) { node->nd_flags &= ~NGF_WORKQ; TAILQ_REMOVE(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); NG_NODE_UNREF(node); CTR3(KTR_NET, "%20s: node [%x] (%p) removed from worklist", __func__, node->nd_ID, node); } else { NG_WORKLIST_UNLOCK(); } } /* * XXX * It's posible that a debugging NG_NODE_REF may need * to be outside the mutex zone */ static void ng_setisr(node_p node) { mtx_assert(&node->nd_input_queue.q_mtx, MA_OWNED); if ((node->nd_flags & NGF_WORKQ) == 0) { /* * If we are not already on the work queue, * then put us on. */ node->nd_flags |= NGF_WORKQ; NG_WORKLIST_LOCK(); TAILQ_INSERT_TAIL(&ng_worklist, node, nd_work); NG_WORKLIST_UNLOCK(); NG_NODE_REF(node); /* XXX fafe in mutex? */ CTR3(KTR_NET, "%20s: node [%x] (%p) put on worklist", __func__, node->nd_ID, node); } else CTR3(KTR_NET, "%20s: node [%x] (%p) already on worklist", __func__, node->nd_ID, node); schednetisr(NETISR_NETGRAPH); } /*********************************************************************** * Externally useable functions to set up a queue item ready for sending ***********************************************************************/ #ifdef NETGRAPH_DEBUG #define ITEM_DEBUG_CHECKS \ do { \ if (NGI_NODE(item) ) { \ printf("item already has node"); \ - kdb_enter("has node"); \ + kdb_enter_why(KDB_WHY_NETGRAPH, "has node"); \ NGI_CLR_NODE(item); \ } \ if (NGI_HOOK(item) ) { \ printf("item already has hook"); \ - kdb_enter("has hook"); \ + kdb_enter_why(KDB_WHY_NETGRAPH, "has hook"); \ NGI_CLR_HOOK(item); \ } \ } while (0) #else #define ITEM_DEBUG_CHECKS #endif /* * Put mbuf into the item. * Hook and node references will be removed when the item is dequeued. * (or equivalent) * (XXX) Unsafe because no reference held by peer on remote node. * remote node might go away in this timescale. * We know the hooks can't go away because that would require getting * a writer item on both nodes and we must have at least a reader * here to be able to do this. * Note that the hook loaded is the REMOTE hook. * * This is possibly in the critical path for new data. */ item_p ng_package_data(struct mbuf *m, int flags) { item_p item; if (atomic_fetchadd_int(&useddata, 1) >= maxdata) { atomic_subtract_int(&useddata, 1); NG_FREE_M(m); return (NULL); } if ((item = ng_getqblk(flags)) == NULL) { NG_FREE_M(m); return (NULL); } ITEM_DEBUG_CHECKS; item->el_flags = NGQF_DATA | NGQF_READER; NGI_M(item) = m; return (item); } /* * Allocate a queue item and put items into it.. * Evaluate the address as this will be needed to queue it and * to work out what some of the fields should be. * Hook and node references will be removed when the item is dequeued. * (or equivalent) */ item_p ng_package_msg(struct ng_mesg *msg, int flags) { item_p item; if ((item = ng_getqblk(flags)) == NULL) { NG_FREE_MSG(msg); return (NULL); } ITEM_DEBUG_CHECKS; /* Messages items count as writers unless explicitly exempted. */ if (msg->header.cmd & NGM_READONLY) item->el_flags = NGQF_MESG | NGQF_READER; else item->el_flags = NGQF_MESG | NGQF_WRITER; /* * Set the current lasthook into the queue item */ NGI_MSG(item) = msg; NGI_RETADDR(item) = 0; return (item); } #define SET_RETADDR(item, here, retaddr) \ do { /* Data or fn items don't have retaddrs */ \ if ((item->el_flags & NGQF_TYPE) == NGQF_MESG) { \ if (retaddr) { \ NGI_RETADDR(item) = retaddr; \ } else { \ /* \ * The old return address should be ok. \ * If there isn't one, use the address \ * here. \ */ \ if (NGI_RETADDR(item) == 0) { \ NGI_RETADDR(item) \ = ng_node2ID(here); \ } \ } \ } \ } while (0) int ng_address_hook(node_p here, item_p item, hook_p hook, ng_ID_t retaddr) { hook_p peer; node_p peernode; ITEM_DEBUG_CHECKS; /* * Quick sanity check.. * Since a hook holds a reference on it's node, once we know * that the peer is still connected (even if invalid,) we know * that the peer node is present, though maybe invalid. */ if ((hook == NULL) || NG_HOOK_NOT_VALID(hook) || (NG_HOOK_PEER(hook) == NULL) || NG_HOOK_NOT_VALID(NG_HOOK_PEER(hook)) || NG_NODE_NOT_VALID(NG_PEER_NODE(hook))) { NG_FREE_ITEM(item); TRAP_ERROR(); return (ENETDOWN); } /* * Transfer our interest to the other (peer) end. */ peer = NG_HOOK_PEER(hook); NG_HOOK_REF(peer); NGI_SET_HOOK(item, peer); peernode = NG_PEER_NODE(hook); NG_NODE_REF(peernode); NGI_SET_NODE(item, peernode); SET_RETADDR(item, here, retaddr); return (0); } int ng_address_path(node_p here, item_p item, char *address, ng_ID_t retaddr) { node_p dest = NULL; hook_p hook = NULL; int error; ITEM_DEBUG_CHECKS; /* * Note that ng_path2noderef increments the reference count * on the node for us if it finds one. So we don't have to. */ error = ng_path2noderef(here, address, &dest, &hook); if (error) { NG_FREE_ITEM(item); return (error); } NGI_SET_NODE(item, dest); if ( hook) { NG_HOOK_REF(hook); /* don't let it go while on the queue */ NGI_SET_HOOK(item, hook); } SET_RETADDR(item, here, retaddr); return (0); } int ng_address_ID(node_p here, item_p item, ng_ID_t ID, ng_ID_t retaddr) { node_p dest; ITEM_DEBUG_CHECKS; /* * Find the target node. */ dest = ng_ID2noderef(ID); /* GETS REFERENCE! */ if (dest == NULL) { NG_FREE_ITEM(item); TRAP_ERROR(); return(EINVAL); } /* Fill out the contents */ NGI_SET_NODE(item, dest); NGI_CLR_HOOK(item); SET_RETADDR(item, here, retaddr); return (0); } /* * special case to send a message to self (e.g. destroy node) * Possibly indicate an arrival hook too. * Useful for removing that hook :-) */ item_p ng_package_msg_self(node_p here, hook_p hook, struct ng_mesg *msg) { item_p item; /* * Find the target node. * If there is a HOOK argument, then use that in preference * to the address. */ if ((item = ng_getqblk(NG_NOFLAGS)) == NULL) { NG_FREE_MSG(msg); return (NULL); } /* Fill out the contents */ item->el_flags = NGQF_MESG | NGQF_WRITER; NG_NODE_REF(here); NGI_SET_NODE(item, here); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_MSG(item) = msg; NGI_RETADDR(item) = ng_node2ID(here); return (item); } /* * Send ng_item_fn function call to the specified node. */ int ng_send_fn(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2) { return ng_send_fn1(node, hook, fn, arg1, arg2, NG_NOFLAGS); } int ng_send_fn1(node_p node, hook_p hook, ng_item_fn *fn, void * arg1, int arg2, int flags) { item_p item; if ((item = ng_getqblk(flags)) == NULL) { return (ENOMEM); } item->el_flags = NGQF_FN | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; return(ng_snd_item(item, flags)); } /* * Send ng_item_fn2 function call to the specified node. * * If an optional pitem parameter is supplied, its apply * callback will be copied to the new item. If also NG_REUSE_ITEM * flag is set, no new item will be allocated, but pitem will * be used. */ int ng_send_fn2(node_p node, hook_p hook, item_p pitem, ng_item_fn2 *fn, void *arg1, int arg2, int flags) { item_p item; KASSERT((pitem != NULL || (flags & NG_REUSE_ITEM) == 0), ("%s: NG_REUSE_ITEM but no pitem", __func__)); /* * Allocate a new item if no supplied or * if we can't use supplied one. */ if (pitem == NULL || (flags & NG_REUSE_ITEM) == 0) { if ((item = ng_getqblk(flags)) == NULL) return (ENOMEM); } else { if ((pitem->el_flags & NGQF_TYPE) == NGQF_DATA) atomic_subtract_int(&useddata, 1); item = pitem; } item->el_flags = NGQF_FN2 | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN2(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; if (pitem != NULL && (flags & NG_REUSE_ITEM) == 0) item->apply = pitem->apply; return(ng_snd_item(item, flags)); } /* * Official timeout routines for Netgraph nodes. */ static void ng_callout_trampoline(void *arg) { item_p item = arg; ng_snd_item(item, 0); } int ng_callout(struct callout *c, node_p node, hook_p hook, int ticks, ng_item_fn *fn, void * arg1, int arg2) { item_p item, oitem; if ((item = ng_getqblk(NG_NOFLAGS)) == NULL) return (ENOMEM); item->el_flags = NGQF_FN | NGQF_WRITER; NG_NODE_REF(node); /* and one for the item */ NGI_SET_NODE(item, node); if (hook) { NG_HOOK_REF(hook); NGI_SET_HOOK(item, hook); } NGI_FN(item) = fn; NGI_ARG1(item) = arg1; NGI_ARG2(item) = arg2; oitem = c->c_arg; if (callout_reset(c, ticks, &ng_callout_trampoline, item) == 1 && oitem != NULL) NG_FREE_ITEM(oitem); return (0); } /* A special modified version of untimeout() */ int ng_uncallout(struct callout *c, node_p node) { item_p item; int rval; KASSERT(c != NULL, ("ng_uncallout: NULL callout")); KASSERT(node != NULL, ("ng_uncallout: NULL node")); rval = callout_stop(c); item = c->c_arg; /* Do an extra check */ if ((rval > 0) && (c->c_func == &ng_callout_trampoline) && (NGI_NODE(item) == node)) { /* * We successfully removed it from the queue before it ran * So now we need to unreference everything that was * given extra references. (NG_FREE_ITEM does this). */ NG_FREE_ITEM(item); } c->c_arg = NULL; return (rval); } /* * Set the address, if none given, give the node here. */ void ng_replace_retaddr(node_p here, item_p item, ng_ID_t retaddr) { if (retaddr) { NGI_RETADDR(item) = retaddr; } else { /* * The old return address should be ok. * If there isn't one, use the address here. */ NGI_RETADDR(item) = ng_node2ID(here); } } #define TESTING #ifdef TESTING /* just test all the macros */ void ng_macro_test(item_p item); void ng_macro_test(item_p item) { node_p node = NULL; hook_p hook = NULL; struct mbuf *m; struct ng_mesg *msg; ng_ID_t retaddr; int error; NGI_GET_M(item, m); NGI_GET_MSG(item, msg); retaddr = NGI_RETADDR(item); NG_SEND_DATA(error, hook, m, NULL); NG_SEND_DATA_ONLY(error, hook, m); NG_FWD_NEW_DATA(error, item, hook, m); NG_FWD_ITEM_HOOK(error, item, hook); NG_SEND_MSG_HOOK(error, node, msg, hook, retaddr); NG_SEND_MSG_ID(error, node, msg, retaddr, retaddr); NG_SEND_MSG_PATH(error, node, msg, ".:", retaddr); NG_FWD_MSG_HOOK(error, node, item, hook, retaddr); } #endif /* TESTING */ Index: stable/7/sys/pc98/cbus/sio.c =================================================================== --- stable/7/sys/pc98/cbus/sio.c (revision 177733) +++ stable/7/sys/pc98/cbus/sio.c (revision 177734) @@ -1,4376 +1,4378 @@ /*- * Copyright (c) 1991 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * from: @(#)com.c 7.5 (Berkeley) 5/16/91 * from: i386/isa sio.c,v 1.234 */ #include "opt_comconsole.h" #include "opt_compat.h" #include "opt_gdb.h" #include "opt_kdb.h" #include "opt_sio.h" /* * Serial driver, based on 386BSD-0.1 com driver. * Mostly rewritten to use pseudo-DMA. * Works for National Semiconductor NS8250-NS16550AF UARTs. * COM driver, based on HP dca driver. * * Changes for PC Card integration: * - Added PC Card driver table and handlers */ /*=============================================================== * 386BSD(98),FreeBSD-1.1x(98) com driver. * ----- * modified for PC9801 by M.Ishii * Kyoto University Microcomputer Club (KMC) * Chou "TEFUTEFU" Hirotomi * Kyoto Univ. the faculty of medicine *=============================================================== * FreeBSD-2.0.1(98) sio driver. * ----- * modified for pc98 Internal i8251 and MICRO CORE MC16550II * T.Koike(hfc01340@niftyserve.or.jp) * implement kernel device configuration * aizu@orient.center.nitech.ac.jp * * Notes. * ----- * PC98 localization based on 386BSD(98) com driver. Using its PC98 local * functions. * This driver is under debugging,has bugs. */ /* * modified for AIWA B98-01 * by T.Hatanou last update: 15 Sep.1995 */ /* * Modified by Y.Takahashi of Kogakuin University. */ /* * modified for 8251(FIFO) by Seigo TANIMURA */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PC98 #include #include #endif #ifdef COM_ESP #include #endif #include #ifdef PC98 #include #include #endif #define LOTS_OF_EVENTS 64 /* helps separate urgent events from input */ /* * Meaning of flags: * * 0x00000001 shared IRQs * 0x00000002 disable FIFO * 0x00000008 recover sooner from lost output interrupts * 0x00000010 device is potential system console * 0x00000020 device is forced to become system console * 0x00000040 device is reserved for low-level IO * 0x00000080 use this port for remote kernel debugging * 0x0000??00 minor number of master port * 0x00010000 PPS timestamping on CTS instead of DCD * 0x00080000 IIR_TXRDY bug * 0x00400000 If no comconsole found then mark as a comconsole * 0x1?000000 interface type */ #ifdef COM_MULTIPORT /* checks in flags for multiport and which is multiport "master chip" * for a given card */ #define COM_ISMULTIPORT(flags) ((flags) & 0x01) #define COM_MPMASTER(flags) (((flags) >> 8) & 0x0ff) #ifndef PC98 #define COM_NOTAST4(flags) ((flags) & 0x04) #endif #else #define COM_ISMULTIPORT(flags) (0) #endif /* COM_MULTIPORT */ #define COM_C_IIR_TXRDYBUG 0x80000 #define COM_CONSOLE(flags) ((flags) & 0x10) #define COM_DEBUGGER(flags) ((flags) & 0x80) #ifndef PC98 #define COM_FIFOSIZE(flags) (((flags) & 0xff000000) >> 24) #endif #define COM_FORCECONSOLE(flags) ((flags) & 0x20) #define COM_IIR_TXRDYBUG(flags) ((flags) & COM_C_IIR_TXRDYBUG) #define COM_LLCONSOLE(flags) ((flags) & 0x40) #define COM_LOSESOUTINTS(flags) ((flags) & 0x08) #define COM_NOFIFO(flags) ((flags) & 0x02) #ifndef PC98 #define COM_NOSCR(flags) ((flags) & 0x100000) #endif #define COM_PPSCTS(flags) ((flags) & 0x10000) #ifndef PC98 #define COM_ST16650A(flags) ((flags) & 0x20000) #define COM_TI16754(flags) ((flags) & 0x200000) #endif #define sio_getreg(com, off) \ (bus_space_read_1((com)->bst, (com)->bsh, (off))) #define sio_setreg(com, off, value) \ (bus_space_write_1((com)->bst, (com)->bsh, (off), (value))) /* * com state bits. * (CS_BUSY | CS_TTGO) and (CS_BUSY | CS_TTGO | CS_ODEVREADY) must be higher * than the other bits so that they can be tested as a group without masking * off the low bits. * * The following com and tty flags correspond closely: * CS_BUSY = TS_BUSY (maintained by comstart(), siopoll() and * comstop()) * CS_TTGO = ~TS_TTSTOP (maintained by comparam() and comstart()) * CS_CTS_OFLOW = CCTS_OFLOW (maintained by comparam()) * CS_RTS_IFLOW = CRTS_IFLOW (maintained by comparam()) * TS_FLUSH is not used. * XXX I think TIOCSETA doesn't clear TS_TTSTOP when it clears IXON. * XXX CS_*FLOW should be CF_*FLOW in com->flags (control flags not state). */ #define CS_BUSY 0x80 /* output in progress */ #define CS_TTGO 0x40 /* output not stopped by XOFF */ #define CS_ODEVREADY 0x20 /* external device h/w ready (CTS) */ #define CS_CHECKMSR 1 /* check of MSR scheduled */ #define CS_CTS_OFLOW 2 /* use CTS output flow control */ #define CS_ODONE 4 /* output completed */ #define CS_RTS_IFLOW 8 /* use RTS input flow control */ #define CSE_BUSYCHECK 1 /* siobusycheck() scheduled */ static char const * const error_desc[] = { #define CE_OVERRUN 0 "silo overflow", #define CE_INTERRUPT_BUF_OVERFLOW 1 "interrupt-level buffer overflow", #define CE_TTY_BUF_OVERFLOW 2 "tty-level buffer overflow", }; #define CE_NTYPES 3 #define CE_RECORD(com, errnum) (++(com)->delta_error_counts[errnum]) /* types. XXX - should be elsewhere */ typedef u_int Port_t; /* hardware port */ typedef u_char bool_t; /* boolean */ /* queue of linear buffers */ struct lbq { u_char *l_head; /* next char to process */ u_char *l_tail; /* one past the last char to process */ struct lbq *l_next; /* next in queue */ bool_t l_queued; /* nonzero if queued */ }; /* com device structure */ struct com_s { u_char state; /* miscellaneous flag bits */ u_char cfcr_image; /* copy of value written to CFCR */ #ifdef COM_ESP bool_t esp; /* is this unit a hayes esp board? */ #endif u_char extra_state; /* more flag bits, separate for order trick */ u_char fifo_image; /* copy of value written to FIFO */ bool_t hasfifo; /* nonzero for 16550 UARTs */ bool_t loses_outints; /* nonzero if device loses output interrupts */ u_char mcr_image; /* copy of value written to MCR */ #ifdef COM_MULTIPORT bool_t multiport; /* is this unit part of a multiport device? */ #endif /* COM_MULTIPORT */ bool_t no_irq; /* nonzero if irq is not attached */ bool_t gone; /* hardware disappeared */ bool_t poll; /* nonzero if polling is required */ bool_t poll_output; /* nonzero if polling for output is required */ bool_t st16650a; /* nonzero if Startech 16650A compatible */ int unit; /* unit number */ u_int flags; /* copy of device flags */ u_int tx_fifo_size; /* * The high level of the driver never reads status registers directly * because there would be too many side effects to handle conveniently. * Instead, it reads copies of the registers stored here by the * interrupt handler. */ u_char last_modem_status; /* last MSR read by intr handler */ u_char prev_modem_status; /* last MSR handled by high level */ u_char *ibuf; /* start of input buffer */ u_char *ibufend; /* end of input buffer */ u_char *ibufold; /* old input buffer, to be freed */ u_char *ihighwater; /* threshold in input buffer */ u_char *iptr; /* next free spot in input buffer */ int ibufsize; /* size of ibuf (not include error bytes) */ int ierroff; /* offset of error bytes in ibuf */ struct lbq obufq; /* head of queue of output buffers */ struct lbq obufs[2]; /* output buffers */ bus_space_tag_t bst; bus_space_handle_t bsh; #ifdef PC98 Port_t cmd_port; Port_t sts_port; Port_t in_modem_port; Port_t intr_ctrl_port; Port_t rsabase; /* Iobase address of an I/O-DATA RSA board. */ int intr_enable; int pc98_prev_modem_status; int pc98_modem_delta; int modem_car_chg_timer; int pc98_prev_siocmd; int pc98_prev_siomod; int modem_checking; int pc98_if_type; bool_t pc98_8251fifo; bool_t pc98_8251fifo_enable; #endif /* PC98 */ Port_t data_port; /* i/o ports */ #ifdef COM_ESP Port_t esp_port; #endif Port_t int_ctl_port; Port_t int_id_port; Port_t modem_ctl_port; Port_t line_status_port; Port_t modem_status_port; struct tty *tp; /* cross reference */ struct pps_state pps; int pps_bit; #ifdef ALT_BREAK_TO_DEBUGGER int alt_brk_state; #endif u_long bytes_in; /* statistics */ u_long bytes_out; u_int delta_error_counts[CE_NTYPES]; u_long error_counts[CE_NTYPES]; u_long rclk; struct resource *irqres; struct resource *ioportres; int ioportrid; void *cookie; /* * Data area for output buffers. Someday we should build the output * buffer queue without copying data. */ #ifdef PC98 int obufsize; u_char *obuf1; u_char *obuf2; #else u_char obuf1[256]; u_char obuf2[256]; #endif }; #ifdef COM_ESP static int espattach(struct com_s *com, Port_t esp_port); #endif static void combreak(struct tty *tp, int sig); static timeout_t siobusycheck; static u_int siodivisor(u_long rclk, speed_t speed); static void comclose(struct tty *tp); static int comopen(struct tty *tp, struct cdev *dev); static void sioinput(struct com_s *com); static void siointr1(struct com_s *com); static int siointr(void *arg); static int commodem(struct tty *tp, int sigon, int sigoff); static int comparam(struct tty *tp, struct termios *t); static void siopoll(void *); static void siosettimeout(void); static int siosetwater(struct com_s *com, speed_t speed); static void comstart(struct tty *tp); static void comstop(struct tty *tp, int rw); static timeout_t comwakeup; char sio_driver_name[] = "sio"; static struct mtx sio_lock; static int sio_inited; /* table and macro for fast conversion from a unit number to its com struct */ devclass_t sio_devclass; #define com_addr(unit) ((struct com_s *) \ devclass_get_softc(sio_devclass, unit)) /* XXX */ int comconsole = -1; static volatile speed_t comdefaultrate = CONSPEED; static u_long comdefaultrclk = DEFAULT_RCLK; SYSCTL_ULONG(_machdep, OID_AUTO, conrclk, CTLFLAG_RW, &comdefaultrclk, 0, ""); static speed_t gdbdefaultrate = GDBSPEED; SYSCTL_UINT(_machdep, OID_AUTO, gdbspeed, CTLFLAG_RW, &gdbdefaultrate, GDBSPEED, ""); static u_int com_events; /* input chars + weighted output completions */ static Port_t siocniobase; static int siocnunit = -1; static void *sio_slow_ih; static void *sio_fast_ih; static int sio_timeout; static int sio_timeouts_until_log; static struct callout_handle sio_timeout_handle = CALLOUT_HANDLE_INITIALIZER(&sio_timeout_handle); static int sio_numunits; #ifdef PC98 struct siodev { short if_type; short irq; Port_t cmd, sts, ctrl, mod; }; static int sysclock; #define COM_INT_DISABLE {int previpri; previpri=spltty(); #define COM_INT_ENABLE splx(previpri);} #define IEN_TxFLAG IEN_Tx #define COM_CARRIER_DETECT_EMULATE 0 #define PC98_CHECK_MODEM_INTERVAL (hz/10) #define DCD_OFF_TOLERANCE 2 #define DCD_ON_RECOGNITION 2 #define IS_8251(if_type) (!(if_type & 0x10)) #define COM1_EXT_CLOCK 0x40000 static void commint(struct cdev *dev); static void com_tiocm_bis(struct com_s *com, int msr); static void com_tiocm_bic(struct com_s *com, int msr); static int com_tiocm_get(struct com_s *com); static int com_tiocm_get_delta(struct com_s *com); static void pc98_msrint_start(struct cdev *dev); static void com_cflag_and_speed_set(struct com_s *com, int cflag, int speed); static int pc98_ttspeedtab(struct com_s *com, int speed, u_int *divisor); static int pc98_get_modem_status(struct com_s *com); static timeout_t pc98_check_msr; static void pc98_set_baud_rate(struct com_s *com, u_int count); static void pc98_i8251_reset(struct com_s *com, int mode, int command); static void pc98_disable_i8251_interrupt(struct com_s *com, int mod); static void pc98_enable_i8251_interrupt(struct com_s *com, int mod); static int pc98_check_i8251_interrupt(struct com_s *com); static int pc98_i8251_get_cmd(struct com_s *com); static int pc98_i8251_get_mod(struct com_s *com); static void pc98_i8251_set_cmd(struct com_s *com, int x); static void pc98_i8251_or_cmd(struct com_s *com, int x); static void pc98_i8251_clear_cmd(struct com_s *com, int x); static void pc98_i8251_clear_or_cmd(struct com_s *com, int clr, int x); static int pc98_check_if_type(device_t dev, struct siodev *iod); static int pc98_check_8251vfast(void); static int pc98_check_8251fifo(void); static void pc98_check_sysclock(void); static void pc98_set_ioport(struct com_s *com); #define com_int_Tx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP) #define com_int_Tx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG) #define com_int_Rx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Rx) #define com_int_Rx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_Rx) #define com_int_TxRx_disable(com) \ pc98_disable_i8251_interrupt(com,IEN_Tx|IEN_TxEMP|IEN_Rx) #define com_int_TxRx_enable(com) \ pc98_enable_i8251_interrupt(com,IEN_TxFLAG|IEN_Rx) #define com_send_break_on(com) \ (IS_8251((com)->pc98_if_type) ? \ pc98_i8251_or_cmd((com), CMD8251_SBRK) : \ sio_setreg((com), com_cfcr, (com)->cfcr_image |= CFCR_SBREAK)) #define com_send_break_off(com) \ (IS_8251((com)->pc98_if_type) ? \ pc98_i8251_clear_cmd((com), CMD8251_SBRK) : \ sio_setreg((com), com_cfcr, (com)->cfcr_image &= ~CFCR_SBREAK)) static struct speedtab pc98speedtab[] = { /* internal RS232C interface */ { 0, 0, }, { 50, 50, }, { 75, 75, }, { 150, 150, }, { 200, 200, }, { 300, 300, }, { 600, 600, }, { 1200, 1200, }, { 2400, 2400, }, { 4800, 4800, }, { 9600, 9600, }, { 19200, 19200, }, { 38400, 38400, }, { 51200, 51200, }, { 76800, 76800, }, { 20800, 20800, }, { 31200, 31200, }, { 41600, 41600, }, { 62400, 62400, }, { -1, -1 } }; static struct speedtab pc98fast_speedtab[] = { { 9600, 0x80 | (DEFAULT_RCLK / (16 * (9600))), }, { 19200, 0x80 | (DEFAULT_RCLK / (16 * (19200))), }, { 38400, 0x80 | (DEFAULT_RCLK / (16 * (38400))), }, { 57600, 0x80 | (DEFAULT_RCLK / (16 * (57600))), }, { 115200, 0x80 | (DEFAULT_RCLK / (16 * (115200))), }, { -1, -1 } }; static struct speedtab comspeedtab_pio9032b[] = { { 300, 6, }, { 600, 5, }, { 1200, 4, }, { 2400, 3, }, { 4800, 2, }, { 9600, 1, }, { 19200, 0, }, { 38400, 7, }, { -1, -1 } }; static struct speedtab comspeedtab_b98_01[] = { { 75, 11, }, { 150, 10, }, { 300, 9, }, { 600, 8, }, { 1200, 7, }, { 2400, 6, }, { 4800, 5, }, { 9600, 4, }, { 19200, 3, }, { 38400, 2, }, { 76800, 1, }, { 153600, 0, }, { -1, -1 } }; static struct speedtab comspeedtab_ind[] = { { 300, 1536, }, { 600, 768, }, { 1200, 384, }, { 2400, 192, }, { 4800, 96, }, { 9600, 48, }, { 19200, 24, }, { 38400, 12, }, { 57600, 8, }, { 115200, 4, }, { 153600, 3, }, { 230400, 2, }, { 460800, 1, }, { -1, -1 } }; struct { char *name; short port_table[7]; short irr_mask; struct speedtab *speedtab; short check_irq; } if_8251_type[] = { /* COM_IF_INTERNAL */ { " (internal)", {0x30, 0x32, 0x32, 0x33, 0x35, -1, -1}, -1, pc98speedtab, 1 }, /* COM_IF_PC9861K_1 */ { " (PC9861K)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, -1, -1}, 3, NULL, 1 }, /* COM_IF_PC9861K_2 */ { " (PC9861K)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, -1, -1}, 3, NULL, 1 }, /* COM_IF_IND_SS_1 */ { " (IND-SS)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb3, -1}, 3, comspeedtab_ind, 1 }, /* COM_IF_IND_SS_2 */ { " (IND-SS)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xbb, -1}, 3, comspeedtab_ind, 1 }, /* COM_IF_PIO9032B_1 */ { " (PIO9032B)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xb8, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_PIO9032B_2 */ { " (PIO9032B)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xba, -1}, 7, comspeedtab_pio9032b, 1 }, /* COM_IF_B98_01_1 */ { " (B98-01)", {0xb1, 0xb3, 0xb3, 0xb0, 0xb0, 0xd1, 0xd3}, 7, comspeedtab_b98_01, 0 }, /* COM_IF_B98_01_2 */ { " (B98-01)", {0xb9, 0xbb, 0xbb, 0xb2, 0xb2, 0xd5, 0xd7}, 7, comspeedtab_b98_01, 0 }, }; #define PC98SIO_data_port(type) (if_8251_type[type].port_table[0]) #define PC98SIO_cmd_port(type) (if_8251_type[type].port_table[1]) #define PC98SIO_sts_port(type) (if_8251_type[type].port_table[2]) #define PC98SIO_in_modem_port(type) (if_8251_type[type].port_table[3]) #define PC98SIO_intr_ctrl_port(type) (if_8251_type[type].port_table[4]) #define PC98SIO_baud_rate_port(type) (if_8251_type[type].port_table[5]) #define PC98SIO_func_port(type) (if_8251_type[type].port_table[6]) #define I8251F_data 0x130 #define I8251F_lsr 0x132 #define I8251F_msr 0x134 #define I8251F_iir 0x136 #define I8251F_fcr 0x138 #define I8251F_div 0x13a static bus_addr_t port_table_0[] = {0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007}; static bus_addr_t port_table_1[] = {0x000, 0x002, 0x004, 0x006, 0x008, 0x00a, 0x00c, 0x00e}; static bus_addr_t port_table_8[] = {0x000, 0x100, 0x200, 0x300, 0x400, 0x500, 0x600, 0x700}; static bus_addr_t port_table_rsa[] = { 0x008, 0x009, 0x00a, 0x00b, 0x00c, 0x00d, 0x00e, 0x00f, 0x000, 0x001, 0x002, 0x003, 0x004, 0x005, 0x006, 0x007 }; struct { char *name; short irr_read; short irr_write; bus_addr_t *iat; bus_size_t iatsz; u_long rclk; } if_16550a_type[] = { /* COM_IF_RSA98 */ {" (RSA-98)", -1, -1, port_table_0, IO_COMSIZE, DEFAULT_RCLK}, /* COM_IF_NS16550 */ {"", -1, -1, port_table_0, IO_COMSIZE, DEFAULT_RCLK}, /* COM_IF_SECOND_CCU */ {"", -1, -1, port_table_0, IO_COMSIZE, DEFAULT_RCLK}, /* COM_IF_MC16550II */ {" (MC16550II)", -1, 0x1000, port_table_8, IO_COMSIZE, DEFAULT_RCLK * 4}, /* COM_IF_MCRS98 */ {" (MC-RS98)", -1, 0x1000, port_table_8, IO_COMSIZE, DEFAULT_RCLK * 4}, /* COM_IF_RSB3000 */ {" (RSB-3000)", 0xbf, -1, port_table_1, IO_COMSIZE, DEFAULT_RCLK * 10}, /* COM_IF_RSB384 */ {" (RSB-384)", 0xbf, -1, port_table_1, IO_COMSIZE, DEFAULT_RCLK * 10}, /* COM_IF_MODEM_CARD */ {"", -1, -1, port_table_0, IO_COMSIZE, DEFAULT_RCLK}, /* COM_IF_RSA98III */ {" (RSA-98III)", -1, -1, port_table_rsa, 16, DEFAULT_RCLK * 8}, /* COM_IF_ESP98 */ {" (ESP98)", -1, -1, port_table_1, IO_COMSIZE, DEFAULT_RCLK * 4}, }; #endif /* PC98 */ #ifdef GDB static Port_t siogdbiobase = 0; #endif #ifdef COM_ESP #ifdef PC98 /* XXX configure this properly. */ /* XXX quite broken for new-bus. */ static Port_t likely_com_ports[] = { 0, 0xb0, 0xb1, 0 }; static Port_t likely_esp_ports[] = { 0xc0d0, 0 }; #define ESP98_CMD1 (ESP_CMD1 * 0x100) #define ESP98_CMD2 (ESP_CMD2 * 0x100) #define ESP98_STATUS1 (ESP_STATUS1 * 0x100) #define ESP98_STATUS2 (ESP_STATUS2 * 0x100) #else /* PC98 */ /* XXX configure this properly. */ static Port_t likely_com_ports[] = { 0x3f8, 0x2f8, 0x3e8, 0x2e8, }; static Port_t likely_esp_ports[] = { 0x140, 0x180, 0x280, 0 }; #endif /* PC98 */ #endif /* * handle sysctl read/write requests for console speed * * In addition to setting comdefaultrate for I/O through /dev/console, * also set the initial and lock values for the /dev/ttyXX device * if there is one associated with the console. Finally, if the /dev/tty * device has already been open, change the speed on the open running port * itself. */ static int sysctl_machdep_comdefaultrate(SYSCTL_HANDLER_ARGS) { int error, s; speed_t newspeed; struct com_s *com; struct tty *tp; newspeed = comdefaultrate; error = sysctl_handle_opaque(oidp, &newspeed, sizeof newspeed, req); if (error || !req->newptr) return (error); comdefaultrate = newspeed; if (comconsole < 0) /* serial console not selected? */ return (0); com = com_addr(comconsole); if (com == NULL) return (ENXIO); tp = com->tp; if (tp == NULL) return (ENXIO); /* * set the initial and lock rates for /dev/ttydXX and /dev/cuaXX * (note, the lock rates really are boolean -- if non-zero, disallow * speed changes) */ tp->t_init_in.c_ispeed = tp->t_init_in.c_ospeed = tp->t_lock_in.c_ispeed = tp->t_lock_in.c_ospeed = tp->t_init_out.c_ispeed = tp->t_init_out.c_ospeed = tp->t_lock_out.c_ispeed = tp->t_lock_out.c_ospeed = comdefaultrate; if (tp->t_state & TS_ISOPEN) { tp->t_termios.c_ispeed = tp->t_termios.c_ospeed = comdefaultrate; s = spltty(); error = comparam(tp, &tp->t_termios); splx(s); } return error; } SYSCTL_PROC(_machdep, OID_AUTO, conspeed, CTLTYPE_INT | CTLFLAG_RW, 0, 0, sysctl_machdep_comdefaultrate, "I", ""); TUNABLE_INT("machdep.conspeed", __DEVOLATILE(int *, &comdefaultrate)); /* * Unload the driver and clear the table. * XXX this is mostly wrong. * XXX TODO: * This is usually called when the card is ejected, but * can be caused by a kldunload of a controller driver. * The idea is to reset the driver's view of the device * and ensure that any driver entry points such as * read and write do not hang. */ int siodetach(device_t dev) { struct com_s *com; com = (struct com_s *) device_get_softc(dev); if (com == NULL) { device_printf(dev, "NULL com in siounload\n"); return (0); } com->gone = TRUE; if (com->tp) ttyfree(com->tp); if (com->irqres) { bus_teardown_intr(dev, com->irqres, com->cookie); bus_release_resource(dev, SYS_RES_IRQ, 0, com->irqres); } if (com->ioportres) bus_release_resource(dev, SYS_RES_IOPORT, com->ioportrid, com->ioportres); if (com->ibuf != NULL) free(com->ibuf, M_DEVBUF); #ifdef PC98 if (com->obuf1 != NULL) free(com->obuf1, M_DEVBUF); #endif device_set_softc(dev, NULL); free(com, M_DEVBUF); return (0); } int sioprobe(dev, xrid, rclk, noprobe) device_t dev; int xrid; u_long rclk; int noprobe; { #if 0 static bool_t already_init; device_t xdev; #endif struct com_s *com; u_int divisor; bool_t failures[10]; int fn; device_t idev; Port_t iobase; intrmask_t irqmap[4]; intrmask_t irqs; u_char mcr_image; int result; u_long xirq; u_int flags = device_get_flags(dev); int rid; struct resource *port; #ifdef PC98 int tmp; struct siodev iod; #endif #ifdef PC98 iod.if_type = GET_IFTYPE(flags); if ((iod.if_type < 0 || iod.if_type > COM_IF_END1) && (iod.if_type < 0x10 || iod.if_type > COM_IF_END2)) return ENXIO; #endif rid = xrid; #ifdef PC98 if (IS_8251(iod.if_type)) { port = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); } else if (iod.if_type == COM_IF_MODEM_CARD || iod.if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(iod.if_type)) { if (isa_load_resourcev(port, if_16550a_type[iod.if_type & 0x0f].iat, if_16550a_type[iod.if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif com = malloc(sizeof(*com), M_DEVBUF, M_NOWAIT | M_ZERO); if (com == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } device_set_softc(dev, com); com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); #ifdef PC98 if (!IS_8251(iod.if_type) && rclk == 0) rclk = if_16550a_type[iod.if_type & 0x0f].rclk; #else if (rclk == 0) rclk = DEFAULT_RCLK; #endif com->rclk = rclk; while (sio_inited != 2) if (atomic_cmpset_int(&sio_inited, 0, 1)) { mtx_init(&sio_lock, sio_driver_name, NULL, (comconsole != -1) ? MTX_SPIN | MTX_QUIET : MTX_SPIN); atomic_store_rel_int(&sio_inited, 2); } #if 0 /* * XXX this is broken - when we are first called, there are no * previously configured IO ports. We could hard code * 0x3f8, 0x2f8, 0x3e8, 0x2e8 etc but that's probably worse. * This code has been doing nothing since the conversion since * "count" is zero the first time around. */ if (!already_init) { /* * Turn off MCR_IENABLE for all likely serial ports. An unused * port with its MCR_IENABLE gate open will inhibit interrupts * from any used port that shares the interrupt vector. * XXX the gate enable is elsewhere for some multiports. */ device_t *devs; int count, i, xioport; #ifdef PC98 int xiftype; #endif devclass_get_devices(sio_devclass, &devs, &count); #ifdef PC98 for (i = 0; i < count; i++) { xdev = devs[i]; xioport = bus_get_resource_start(xdev, SYS_RES_IOPORT, 0); xiftype = GET_IFTYPE(device_get_flags(xdev)); if (device_is_enabled(xdev) && xioport > 0) { if (IS_8251(xiftype)) outb((xioport & 0xff00) | PC98SIO_cmd_port(xiftype & 0x0f), 0xf2); else outb(xioport + if_16550a_type[xiftype & 0x0f].iat[com_mcr], 0); } } #else for (i = 0; i < count; i++) { xdev = devs[i]; if (device_is_enabled(xdev) && bus_get_resource(xdev, SYS_RES_IOPORT, 0, &xioport, NULL) == 0) outb(xioport + com_mcr, 0); } #endif free(devs, M_TEMP); already_init = TRUE; } #endif if (COM_LLCONSOLE(flags)) { printf("sio%d: reserved for low-level i/o\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (ENXIO); } #ifdef PC98 DELAY(10); /* * If the port is i8251 UART (internal, B98_01) */ if (pc98_check_if_type(dev, &iod) == -1) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (ENXIO); } if (iod.irq > 0) bus_set_resource(dev, SYS_RES_IRQ, 0, iod.irq, 1); if (IS_8251(iod.if_type)) { outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, 0); DELAY(10); outb(iod.cmd, CMD8251_RESET); DELAY(1000); /* for a while...*/ outb(iod.cmd, 0xf2); /* MODE (dummy) */ DELAY(10); outb(iod.cmd, 0x01); /* CMD (dummy) */ DELAY(1000); /* for a while...*/ if (( inb(iod.sts) & STS8251_TxEMP ) == 0 ) { result = (ENXIO); } if (if_8251_type[iod.if_type & 0x0f].check_irq) { COM_INT_DISABLE tmp = ( inb( iod.ctrl ) & ~(IEN_Rx|IEN_TxEMP|IEN_Tx)); outb( iod.ctrl, tmp|IEN_TxEMP ); DELAY(10); result = isa_irq_pending() ? 0 : ENXIO; outb( iod.ctrl, tmp ); COM_INT_ENABLE } else { /* * B98_01 doesn't activate TxEMP interrupt line * when being reset, so we can't check irq pending. */ result = 0; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); if (result) { device_set_softc(dev, NULL); free(com, M_DEVBUF); } return result; } #endif /* PC98 */ /* * If the device is on a multiport card and has an AST/4 * compatible interrupt control register, initialize this * register and prepare to leave MCR_IENABLE clear in the mcr. * Otherwise, prepare to set MCR_IENABLE in the mcr. * Point idev to the device struct giving the correct id_irq. * This is the struct for the master device if there is one. */ idev = dev; mcr_image = MCR_IENABLE; #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { #ifndef PC98 Port_t xiobase; u_long io; #endif idev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); if (idev == NULL) { printf("sio%d: master device %d not configured\n", device_get_unit(dev), COM_MPMASTER(flags)); idev = dev; } #ifndef PC98 if (!COM_NOTAST4(flags)) { if (bus_get_resource(idev, SYS_RES_IOPORT, 0, &io, NULL) == 0) { xiobase = io; if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) == 0) outb(xiobase + com_scr, 0x80); else outb(xiobase + com_scr, 0); } mcr_image = 0; } #endif } #endif /* COM_MULTIPORT */ if (bus_get_resource(idev, SYS_RES_IRQ, 0, NULL, NULL) != 0) mcr_image = 0; bzero(failures, sizeof failures); iobase = rman_get_start(port); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { mcr_image = 0; outb(iobase + rsa_msr, 0x04); outb(iobase + rsa_frr, 0x00); if ((inb(iobase + rsa_srr) & 0x36) != 0x36) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (ENXIO); } outb(iobase + rsa_ier, 0x00); outb(iobase + rsa_frr, 0x00); outb(iobase + rsa_tivsr, 0x00); outb(iobase + rsa_tcr, 0x00); } tmp = if_16550a_type[iod.if_type & 0x0f].irr_write; if (tmp != -1) { /* MC16550II */ int irqout; switch (isa_get_irq(idev)) { case 3: irqout = 4; break; case 5: irqout = 5; break; case 6: irqout = 6; break; case 12: irqout = 7; break; default: printf("sio%d: irq configuration error\n", device_get_unit(dev)); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); device_set_softc(dev, NULL); free(com, M_DEVBUF); return (ENXIO); } outb((iobase & 0x00ff) | tmp, irqout); } #endif /* * We don't want to get actual interrupts, just masked ones. * Interrupts from this line should already be masked in the ICU, * but mask them in the processor as well in case there are some * (misconfigured) shared interrupts. */ mtx_lock_spin(&sio_lock); /* EXTRA DELAY? */ /* * Initialize the speed and the word size and wait long enough to * drain the maximum of 16 bytes of junk in device output queues. * The speed is undefined after a master reset and must be set * before relying on anything related to output. There may be * junk after a (very fast) soft reboot and (apparently) after * master reset. * XXX what about the UART bug avoided by waiting in comparam()? * We don't want to to wait long enough to drain at 2 bps. */ if (iobase == siocniobase) DELAY((16 + 1) * 1000000 / (comdefaultrate / 10)); else { sio_setreg(com, com_cfcr, CFCR_DLAB | CFCR_8BITS); divisor = siodivisor(rclk, SIO_TEST_SPEED); sio_setreg(com, com_dlbl, divisor & 0xff); sio_setreg(com, com_dlbh, divisor >> 8); sio_setreg(com, com_cfcr, CFCR_8BITS); DELAY((16 + 1) * 1000000 / (SIO_TEST_SPEED / 10)); } /* * Enable the interrupt gate and disable device interupts. This * should leave the device driving the interrupt line low and * guarantee an edge trigger if an interrupt can be generated. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); sio_setreg(com, com_ier, 0); DELAY(1000); /* XXX */ irqmap[0] = isa_irq_pending(); /* * Attempt to set loopback mode so that we can send a null byte * without annoying any external device. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image | MCR_LOOPBACK); /* * Attempt to generate an output interrupt. On 8250's, setting * IER_ETXRDY generates an interrupt independent of the current * setting and independent of whether the THR is empty. On 16450's, * setting IER_ETXRDY generates an interrupt independent of the * current setting. On 16550A's, setting IER_ETXRDY only * generates an interrupt when IER_ETXRDY is not already set. */ sio_setreg(com, com_ier, IER_ETXRDY); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x04); #endif /* * On some 16x50 incompatibles, setting IER_ETXRDY doesn't generate * an interrupt. They'd better generate one for actually doing * output. Loopback may be broken on the same incompatibles but * it's unlikely to do more than allow the null byte out. */ sio_setreg(com, com_data, 0); if (iobase == siocniobase) DELAY((1 + 2) * 1000000 / (comdefaultrate / 10)); else DELAY((1 + 2) * 1000000 / (SIO_TEST_SPEED / 10)); /* * Turn off loopback mode so that the interrupt gate works again * (MCR_IENABLE was hidden). This should leave the device driving * an interrupt line high. It doesn't matter if the interrupt * line oscillates while we are not looking at it, since interrupts * are disabled. */ /* EXTRA DELAY? */ sio_setreg(com, com_mcr, mcr_image); /* * It seems my Xircom CBEM56G Cardbus modem wants to be reset * to 8 bits *again*, or else probe test 0 will fail. * gwk@sgi.com, 4/19/2001 */ sio_setreg(com, com_cfcr, CFCR_8BITS); /* * Some PCMCIA cards (Palido 321s, DC-1S, ...) have the "TXRDY bug", * so we probe for a buggy IIR_TXRDY implementation even in the * noprobe case. We don't probe for it in the !noprobe case because * noprobe is always set for PCMCIA cards and the problem is not * known to affect any other cards. */ if (noprobe) { /* Read IIR a few times. */ for (fn = 0; fn < 2; fn ++) { DELAY(10000); failures[6] = sio_getreg(com, com_iir); } /* IIR_TXRDY should be clear. Is it? */ result = 0; if (failures[6] & IIR_TXRDY) { /* * No. We seem to have the bug. Does our fix for * it work? */ sio_setreg(com, com_ier, 0); if (sio_getreg(com, com_iir) & IIR_NOPEND) { /* Yes. We discovered the TXRDY bug! */ SET_FLAG(dev, COM_C_IIR_TXRDYBUG); } else { /* No. Just fail. XXX */ result = ENXIO; sio_setreg(com, com_mcr, 0); } } else { /* Yes. No bug. */ CLR_FLAG(dev, COM_C_IIR_TXRDYBUG); } sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); mtx_unlock_spin(&sio_lock); bus_release_resource(dev, SYS_RES_IOPORT, rid, port); if (iobase == siocniobase) result = 0; if (result != 0) { device_set_softc(dev, NULL); free(com, M_DEVBUF); } return (result); } /* * Check that * o the CFCR, IER and MCR in UART hold the values written to them * (the values happen to be all distinct - this is good for * avoiding false positive tests from bus echoes). * o an output interrupt is generated and its vector is correct. * o the interrupt goes away when the IIR in the UART is read. */ /* EXTRA DELAY? */ failures[0] = sio_getreg(com, com_cfcr) - CFCR_8BITS; failures[1] = sio_getreg(com, com_ier) - IER_ETXRDY; failures[2] = sio_getreg(com, com_mcr) - mcr_image; DELAY(10000); /* Some internal modems need this time */ irqmap[1] = isa_irq_pending(); failures[4] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_TXRDY; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif DELAY(1000); /* XXX */ irqmap[2] = isa_irq_pending(); failures[6] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) inb(iobase + rsa_srr); #endif /* * Turn off all device interrupts and check that they go off properly. * Leave MCR_IENABLE alone. For ports without a master port, it gates * the OUT2 output of the UART to * the ICU input. Closing the gate would give a floating ICU input * (unless there is another device driving it) and spurious interrupts. * (On the system that this was first tested on, the input floats high * and gives a (masked) interrupt as soon as the gate is closed.) */ sio_setreg(com, com_ier, 0); sio_setreg(com, com_cfcr, CFCR_8BITS); /* dummy to avoid bus echo */ failures[7] = sio_getreg(com, com_ier); #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) outb(iobase + rsa_ier, 0x00); #endif DELAY(1000); /* XXX */ irqmap[3] = isa_irq_pending(); failures[9] = (sio_getreg(com, com_iir) & IIR_IMASK) - IIR_NOPEND; #ifdef PC98 if (iod.if_type == COM_IF_RSA98III) { inb(iobase + rsa_srr); outb(iobase + rsa_frr, 0x00); } #endif mtx_unlock_spin(&sio_lock); irqs = irqmap[1] & ~irqmap[0]; if (bus_get_resource(idev, SYS_RES_IRQ, 0, &xirq, NULL) == 0 && ((1 << xirq) & irqs) == 0) { printf( "sio%d: configured irq %ld not in bitmap of probed irqs %#x\n", device_get_unit(dev), xirq, irqs); printf( "sio%d: port may not be enabled\n", device_get_unit(dev)); } if (bootverbose) printf("sio%d: irq maps: %#x %#x %#x %#x\n", device_get_unit(dev), irqmap[0], irqmap[1], irqmap[2], irqmap[3]); result = 0; for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) { sio_setreg(com, com_mcr, 0); result = ENXIO; if (bootverbose) { printf("sio%d: probe failed test(s):", device_get_unit(dev)); for (fn = 0; fn < sizeof failures; ++fn) if (failures[fn]) printf(" %d", fn); printf("\n"); } break; } bus_release_resource(dev, SYS_RES_IOPORT, rid, port); if (iobase == siocniobase) result = 0; if (result != 0) { device_set_softc(dev, NULL); free(com, M_DEVBUF); } return (result); } #ifdef COM_ESP static int espattach(com, esp_port) struct com_s *com; Port_t esp_port; { u_char dips; u_char val; /* * Check the ESP-specific I/O port to see if we're an ESP * card. If not, return failure immediately. */ if ((inb(esp_port) & 0xf3) == 0) { printf(" port 0x%x is not an ESP board?\n", esp_port); return (0); } /* * We've got something that claims to be a Hayes ESP card. * Let's hope so. */ /* Get the dip-switch configuration */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP98_STATUS1); #else outb(esp_port + ESP_CMD1, ESP_GETDIPS); dips = inb(esp_port + ESP_STATUS1); #endif /* * Bits 0,1 of dips say which COM port we are. */ #ifdef PC98 if ((rman_get_start(com->ioportres) & 0xff) == likely_com_ports[dips & 0x03]) #else if (rman_get_start(com->ioportres) == likely_com_ports[dips & 0x03]) #endif printf(" : ESP"); else { printf(" esp_port has com %d\n", dips & 0x03); return (0); } /* * Check for ESP version 2.0 or later: bits 4,5,6 = 010. */ #ifdef PC98 outb(esp_port + ESP98_CMD1, ESP_GETTEST); val = inb(esp_port + ESP98_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP98_STATUS2); #else outb(esp_port + ESP_CMD1, ESP_GETTEST); val = inb(esp_port + ESP_STATUS1); /* clear reg 1 */ val = inb(esp_port + ESP_STATUS2); #endif if ((val & 0x70) < 0x20) { printf("-old (%o)", val & 0x70); return (0); } /* * Check for ability to emulate 16550: bit 7 == 1 */ if ((dips & 0x80) == 0) { printf(" slave"); return (0); } /* * Okay, we seem to be a Hayes ESP card. Whee. */ com->esp = TRUE; com->esp_port = esp_port; return (1); } #endif /* COM_ESP */ int sioattach(dev, xrid, rclk) device_t dev; int xrid; u_long rclk; { struct com_s *com; #ifdef COM_ESP Port_t *espp; #endif Port_t iobase; int unit; u_int flags; int rid; struct resource *port; int ret; int error; struct tty *tp; #ifdef PC98 u_char *obuf; u_long obufsize; int if_type = GET_IFTYPE(device_get_flags(dev)); #endif rid = xrid; #ifdef PC98 if (IS_8251(if_type)) { port = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid, RF_ACTIVE); } else if (if_type == COM_IF_MODEM_CARD || if_type == COM_IF_RSA98III || isa_get_vendorid(dev)) { port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } else { port = isa_alloc_resourcev(dev, SYS_RES_IOPORT, &rid, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz, RF_ACTIVE); } #else port = bus_alloc_resource(dev, SYS_RES_IOPORT, &rid, 0, ~0, IO_COMSIZE, RF_ACTIVE); #endif if (!port) return (ENXIO); #ifdef PC98 if (!IS_8251(if_type)) { if (isa_load_resourcev(port, if_16550a_type[if_type & 0x0f].iat, if_16550a_type[if_type & 0x0f].iatsz) != 0) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } } #endif iobase = rman_get_start(port); unit = device_get_unit(dev); com = device_get_softc(dev); flags = device_get_flags(dev); if (unit >= sio_numunits) sio_numunits = unit + 1; #ifdef PC98 obufsize = 256; if (if_type == COM_IF_RSA98III) obufsize = 2048; if ((obuf = malloc(obufsize * 2, M_DEVBUF, M_NOWAIT)) == NULL) { bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return ENXIO; } bzero(obuf, obufsize * 2); #endif /* * sioprobe() has initialized the device registers as follows: * o cfcr = CFCR_8BITS. * It is most important that CFCR_DLAB is off, so that the * data port is not hidden when we enable interrupts. * o ier = 0. * Interrupts are only enabled when the line is open. * o mcr = MCR_IENABLE, or 0 if the port has AST/4 compatible * interrupt control register or the config specifies no irq. * Keeping MCR_DTR and MCR_RTS off might stop the external * device from sending before we are ready. */ bzero(com, sizeof *com); com->unit = unit; com->ioportres = port; com->ioportrid = rid; com->bst = rman_get_bustag(port); com->bsh = rman_get_bushandle(port); com->cfcr_image = CFCR_8BITS; com->loses_outints = COM_LOSESOUTINTS(flags) != 0; com->no_irq = bus_get_resource(dev, SYS_RES_IRQ, 0, NULL, NULL) != 0; com->tx_fifo_size = 1; #ifdef PC98 com->obufsize = obufsize; com->obuf1 = obuf; com->obuf2 = obuf + obufsize; #endif com->obufs[0].l_head = com->obuf1; com->obufs[1].l_head = com->obuf2; #ifdef PC98 com->pc98_if_type = if_type; if (IS_8251(if_type)) { pc98_set_ioport(com); if (if_type == COM_IF_INTERNAL && pc98_check_8251fifo()) { com->pc98_8251fifo = 1; com->pc98_8251fifo_enable = 0; } } else { bus_addr_t *iat = if_16550a_type[if_type & 0x0f].iat; com->data_port = iobase + iat[com_data]; com->int_ctl_port = iobase + iat[com_ier]; com->int_id_port = iobase + iat[com_iir]; com->modem_ctl_port = iobase + iat[com_mcr]; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + iat[com_lsr]; com->modem_status_port = iobase + iat[com_msr]; } #else /* not PC98 */ com->data_port = iobase + com_data; com->int_ctl_port = iobase + com_ier; com->int_id_port = iobase + com_iir; com->modem_ctl_port = iobase + com_mcr; com->mcr_image = inb(com->modem_ctl_port); com->line_status_port = iobase + com_lsr; com->modem_status_port = iobase + com_msr; #endif tp = com->tp = ttyalloc(); tp->t_oproc = comstart; tp->t_param = comparam; tp->t_stop = comstop; tp->t_modem = commodem; tp->t_break = combreak; tp->t_close = comclose; tp->t_open = comopen; tp->t_sc = com; #ifdef PC98 if (!IS_8251(if_type) && rclk == 0) rclk = if_16550a_type[if_type & 0x0f].rclk; #else if (rclk == 0) rclk = DEFAULT_RCLK; #endif com->rclk = rclk; if (unit == comconsole) ttyconsolemode(tp, comdefaultrate); error = siosetwater(com, tp->t_init_in.c_ispeed); mtx_unlock_spin(&sio_lock); if (error) { /* * Leave i/o resources allocated if this is a `cn'-level * console, so that other devices can't snarf them. */ if (iobase != siocniobase) bus_release_resource(dev, SYS_RES_IOPORT, rid, port); return (ENOMEM); } /* attempt to determine UART type */ printf("sio%d: type", unit); #ifndef PC98 if (!COM_ISMULTIPORT(flags) && !COM_IIR_TXRDYBUG(flags) && !COM_NOSCR(flags)) { u_char scr; u_char scr1; u_char scr2; scr = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0xa5); scr1 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, 0x5a); scr2 = sio_getreg(com, com_scr); sio_setreg(com, com_scr, scr); if (scr1 != 0xa5 || scr2 != 0x5a) { printf(" 8250 or not responding"); goto determined_type; } } #endif /* !PC98 */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo && !COM_NOFIFO(flags)) com->tx_fifo_size = 16; com_int_TxRx_disable( com ); com_cflag_and_speed_set( com, tp->t_init_in.c_cflag, comdefaultrate ); com_tiocm_bic( com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE ); com_send_break_off( com ); if (com->pc98_if_type == COM_IF_INTERNAL) { printf(" (internal%s%s)", com->pc98_8251fifo ? " fifo" : "", PC98SIO_baud_rate_port(com->pc98_if_type) != -1 ? " v-fast" : ""); } else { printf(" 8251%s", if_8251_type[com->pc98_if_type & 0x0f].name); } } else { #endif /* PC98 */ sio_setreg(com, com_fifo, FIFO_ENABLE | FIFO_RX_HIGH); DELAY(100); switch (inb(com->int_id_port) & IIR_FIFO_MASK) { case FIFO_RX_LOW: printf(" 16450"); break; case FIFO_RX_MEDL: printf(" 16450?"); break; case FIFO_RX_MEDH: printf(" 16550?"); break; case FIFO_RX_HIGH: if (COM_NOFIFO(flags)) { printf(" 16550A fifo disabled"); break; } com->hasfifo = TRUE; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { com->tx_fifo_size = 2048; com->rsabase = iobase; outb(com->rsabase + rsa_ier, 0x00); outb(com->rsabase + rsa_frr, 0x00); } #else if (COM_ST16650A(flags)) { printf(" ST16650A"); com->st16650a = TRUE; com->tx_fifo_size = 32; break; } if (COM_TI16754(flags)) { printf(" TI16754"); com->tx_fifo_size = 64; break; } #endif printf(" 16550A"); #ifdef COM_ESP #ifdef PC98 if (com->pc98_if_type == COM_IF_ESP98) #endif for (espp = likely_esp_ports; *espp != 0; espp++) if (espattach(com, *espp)) { com->tx_fifo_size = 1024; break; } if (com->esp) break; #endif #ifdef PC98 com->tx_fifo_size = 16; #else com->tx_fifo_size = COM_FIFOSIZE(flags); if (com->tx_fifo_size == 0) com->tx_fifo_size = 16; else printf(" lookalike with %u bytes FIFO", com->tx_fifo_size); #endif break; } #ifdef PC98 if (com->pc98_if_type == COM_IF_RSB3000) { /* Set RSB-2000/3000 Extended Buffer mode. */ u_char lcr; lcr = sio_getreg(com, com_cfcr); sio_setreg(com, com_cfcr, lcr | CFCR_DLAB); sio_setreg(com, com_emr, EMR_EXBUFF | EMR_EFMODE); sio_setreg(com, com_cfcr, lcr); } #endif #ifdef COM_ESP if (com->esp) { /* * Set 16550 compatibility mode. * We don't use the ESP_MODE_SCALE bit to increase the * fifo trigger levels because we can't handle large * bursts of input. * XXX flow control should be set in comparam(), not here. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETMODE); outb(com->esp_port + ESP98_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #else outb(com->esp_port + ESP_CMD1, ESP_SETMODE); outb(com->esp_port + ESP_CMD2, ESP_MODE_RTS | ESP_MODE_FIFO); #endif /* Set RTS/CTS flow control. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP98_CMD2, ESP_FLOW_CTS); #else outb(com->esp_port + ESP_CMD1, ESP_SETFLOWTYPE); outb(com->esp_port + ESP_CMD2, ESP_FLOW_RTS); outb(com->esp_port + ESP_CMD2, ESP_FLOW_CTS); #endif /* Set flow-control levels. */ #ifdef PC98 outb(com->esp_port + ESP98_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP98_CMD2, HIBYTE(768)); outb(com->esp_port + ESP98_CMD2, LOBYTE(768)); outb(com->esp_port + ESP98_CMD2, HIBYTE(512)); outb(com->esp_port + ESP98_CMD2, LOBYTE(512)); #else outb(com->esp_port + ESP_CMD1, ESP_SETRXFLOW); outb(com->esp_port + ESP_CMD2, HIBYTE(768)); outb(com->esp_port + ESP_CMD2, LOBYTE(768)); outb(com->esp_port + ESP_CMD2, HIBYTE(512)); outb(com->esp_port + ESP_CMD2, LOBYTE(512)); #endif #ifdef PC98 /* Set UART clock prescaler. */ outb(com->esp_port + ESP98_CMD1, ESP_SETCLOCK); outb(com->esp_port + ESP98_CMD2, 2); /* 4 times */ #endif } #endif /* COM_ESP */ sio_setreg(com, com_fifo, 0); #ifdef PC98 printf("%s", if_16550a_type[com->pc98_if_type & 0x0f].name); #else determined_type: ; #endif #ifdef COM_MULTIPORT if (COM_ISMULTIPORT(flags)) { device_t masterdev; com->multiport = TRUE; printf(" (multiport"); if (unit == COM_MPMASTER(flags)) printf(" master"); printf(")"); masterdev = devclass_get_device(sio_devclass, COM_MPMASTER(flags)); com->no_irq = (masterdev == NULL || bus_get_resource(masterdev, SYS_RES_IRQ, 0, NULL, NULL) != 0); } #endif /* COM_MULTIPORT */ #ifdef PC98 } #endif if (unit == comconsole) printf(", console"); if (COM_IIR_TXRDYBUG(flags)) printf(" with a buggy IIR_TXRDY implementation"); printf("\n"); if (sio_fast_ih == NULL) { swi_add(&tty_intr_event, "sio", siopoll, NULL, SWI_TTY, 0, &sio_fast_ih); swi_add(&clk_intr_event, "sio", siopoll, NULL, SWI_CLOCK, 0, &sio_slow_ih); } com->flags = flags; com->pps.ppscap = PPS_CAPTUREASSERT | PPS_CAPTURECLEAR; tp->t_pps = &com->pps; if (COM_PPSCTS(flags)) com->pps_bit = MSR_CTS; else com->pps_bit = MSR_DCD; pps_init(&com->pps); rid = 0; com->irqres = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (com->irqres) { ret = bus_setup_intr(dev, com->irqres, INTR_TYPE_TTY, siointr, NULL, com, &com->cookie); if (ret) { ret = bus_setup_intr(dev, com->irqres, INTR_TYPE_TTY, NULL, (driver_intr_t *)siointr, com, &com->cookie); if (ret == 0) device_printf(dev, "unable to activate interrupt in fast mode - using normal mode\n"); } if (ret) device_printf(dev, "could not activate interrupt\n"); #if defined(KDB) && (defined(BREAK_TO_DEBUGGER) || \ defined(ALT_BREAK_TO_DEBUGGER)) /* * Enable interrupts for early break-to-debugger support * on the console. */ if (ret == 0 && unit == comconsole) outb(siocniobase + com_ier, IER_ERXRDY | IER_ERLS | IER_EMSC); #endif } /* We're ready, open the doors... */ ttycreate(tp, TS_CALLOUT, "d%r", unit); return (0); } static int comopen(struct tty *tp, struct cdev *dev) { struct com_s *com; int i; com = tp->t_sc; com->poll = com->no_irq; com->poll_output = com->loses_outints; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS); pc98_msrint_start(dev); if (com->pc98_8251fifo) { com->pc98_8251fifo_enable = 1; outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } } #endif if (com->hasfifo) { /* * (Re)enable and drain fifos. * * Certain SMC chips cause problems if the fifos * are enabled while input is ready. Turn off the * fifo if necessary to clear the input. We test * the input ready bit after enabling the fifos * since we've already enabled them in comparam() * and to handle races between enabling and fresh * input. */ for (i = 0; i < 500; i++) { sio_setreg(com, com_fifo, FIFO_RCV_RST | FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_frr , 0x00); #endif /* * XXX the delays are for superstitious * historical reasons. It must be less than * the character time at the maximum * supported speed (87 usec at 115200 bps * 8N1). Otherwise we might loop endlessly * if data is streaming in. We used to use * delays of 100. That usually worked * because DELAY(100) used to usually delay * for about 85 usec instead of 100. */ DELAY(50); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III ? !(inb(com->rsabase + rsa_srr) & 0x08) : !(inb(com->line_status_port) & LSR_RXRDY)) break; #else if (!(inb(com->line_status_port) & LSR_RXRDY)) break; #endif sio_setreg(com, com_fifo, 0); DELAY(50); (void) inb(com->data_port); } if (i == 500) return (EIO); } mtx_lock_spin(&sio_lock); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { com_tiocm_bis(com, TIOCM_LE); com->pc98_prev_modem_status = pc98_get_modem_status(com); com_int_Rx_enable(com); } else { #endif (void) inb(com->line_status_port); (void) inb(com->data_port); com->prev_modem_status = com->last_modem_status = inb(com->modem_status_port); outb(com->int_ctl_port, IER_ERXRDY | IER_ERLS | IER_EMSC | (COM_IIR_TXRDYBUG(com->flags) ? 0 : IER_ETXRDY)); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { outb(com->rsabase + rsa_ier, 0x1d); outb(com->int_ctl_port, IER_ERLS | IER_EMSC); } #endif #ifdef PC98 } #endif mtx_unlock_spin(&sio_lock); siosettimeout(); /* XXX: should be generic ? */ #ifdef PC98 if ((IS_8251(com->pc98_if_type) && (pc98_get_modem_status(com) & TIOCM_CAR)) || (!IS_8251(com->pc98_if_type) && (com->prev_modem_status & MSR_DCD)) || ISCALLOUT(dev)) ttyld_modem(tp, 1); #else if (com->prev_modem_status & MSR_DCD || ISCALLOUT(dev)) ttyld_modem(tp, 1); #endif return (0); } static void comclose(tp) struct tty *tp; { int s; struct com_s *com; s = spltty(); com = tp->t_sc; com->poll = FALSE; com->poll_output = FALSE; #ifdef PC98 com_send_break_off(com); #else sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); #endif #if defined(KDB) && (defined(BREAK_TO_DEBUGGER) || \ defined(ALT_BREAK_TO_DEBUGGER)) /* * Leave interrupts enabled and don't clear DTR if this is the * console. This allows us to detect break-to-debugger events * while the console device is closed. */ if (com->unit != comconsole) #endif { #ifdef PC98 int tmp; if (IS_8251(com->pc98_if_type)) com_int_TxRx_disable(com); else sio_setreg(com, com_ier, 0); if (com->pc98_if_type == COM_IF_RSA98III) outb(com->rsabase + rsa_ier, 0x00); if (IS_8251(com->pc98_if_type)) tmp = pc98_get_modem_status(com) & TIOCM_CAR; else tmp = com->prev_modem_status & MSR_DCD; #else sio_setreg(com, com_ier, 0); #endif if (tp->t_cflag & HUPCL /* * XXX we will miss any carrier drop between here and the * next open. Perhaps we should watch DCD even when the * port is closed; it is not sufficient to check it at * the next open because it might go up and down while * we're not watching. */ || (!tp->t_actout #ifdef PC98 && !(tmp) #else && !(com->prev_modem_status & MSR_DCD) #endif && !(tp->t_init_in.c_cflag & CLOCAL)) || !(tp->t_state & TS_ISOPEN)) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else #endif (void)commodem(tp, 0, SER_DTR); ttydtrwaitstart(tp); } #ifdef PC98 else { if (IS_8251(com->pc98_if_type)) com_tiocm_bic(com, TIOCM_LE); } #endif } #ifdef PC98 if (com->pc98_8251fifo) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); com->pc98_8251fifo_enable = 0; } #endif if (com->hasfifo) { /* * Disable fifos so that they are off after controlled * reboots. Some BIOSes fail to detect 16550s when the * fifos are enabled. */ sio_setreg(com, com_fifo, 0); } tp->t_actout = FALSE; wakeup(&tp->t_actout); wakeup(TSA_CARR_ON(tp)); /* restart any wopeners */ siosettimeout(); splx(s); } static void siobusycheck(chan) void *chan; { struct com_s *com; int s; com = (struct com_s *)chan; /* * Clear TS_BUSY if low-level output is complete. * spl locking is sufficient because siointr1() does not set CS_BUSY. * If siointr1() clears CS_BUSY after we look at it, then we'll get * called again. Reading the line status port outside of siointr1() * is safe because CS_BUSY is clear so there are no output interrupts * to lose. */ s = spltty(); if (com->state & CS_BUSY) com->extra_state &= ~CSE_BUSYCHECK; /* False alarm. */ #ifdef PC98 else if ((IS_8251(com->pc98_if_type) && ((com->pc98_8251fifo_enable && (inb(I8251F_lsr) & (STS8251F_TxRDY | STS8251F_TxEMP)) == (STS8251F_TxRDY | STS8251F_TxEMP)) || (!com->pc98_8251fifo_enable && (inb(com->sts_port) & (STS8251_TxRDY | STS8251_TxEMP)) == (STS8251_TxRDY | STS8251_TxEMP)))) || ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY))) { #else else if ((inb(com->line_status_port) & (LSR_TSRE | LSR_TXRDY)) == (LSR_TSRE | LSR_TXRDY)) { #endif com->tp->t_state &= ~TS_BUSY; ttwwakeup(com->tp); com->extra_state &= ~CSE_BUSYCHECK; } else timeout(siobusycheck, com, hz / 100); splx(s); } static u_int siodivisor(rclk, speed) u_long rclk; speed_t speed; { long actual_speed; u_int divisor; int error; if (speed == 0) return (0); #if UINT_MAX > (ULONG_MAX - 1) / 8 if (speed > (ULONG_MAX - 1) / 8) return (0); #endif divisor = (rclk / (8UL * speed) + 1) / 2; if (divisor == 0 || divisor >= 65536) return (0); actual_speed = rclk / (16UL * divisor); /* 10 times error in percent: */ error = ((actual_speed - (long)speed) * 2000 / (long)speed + 1) / 2; /* 3.0% maximum error tolerance: */ if (error < -30 || error > 30) return (0); return (divisor); } /* * Call this function with the sio_lock mutex held. It will return with the * lock still held. */ static void sioinput(com) struct com_s *com; { u_char *buf; int incc; u_char line_status; int recv_data; struct tty *tp; buf = com->ibuf; tp = com->tp; if (!(tp->t_state & TS_ISOPEN) || !(tp->t_cflag & CREAD)) { com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; return; } if (tp->t_state & TS_CAN_BYPASS_L_RINT) { /* * Avoid the grotesquely inefficient lineswitch routine * (ttyinput) in "raw" mode. It usually takes about 450 * instructions (that's without canonical processing or echo!). * slinput is reasonably fast (usually 40 instructions plus * call overhead). */ do { /* * This may look odd, but it is using save-and-enable * semantics instead of the save-and-disable semantics * that are used everywhere else. */ mtx_unlock_spin(&sio_lock); incc = com->iptr - buf; if (tp->t_rawq.c_cc + incc > tp->t_ihiwat && (com->state & CS_RTS_IFLOW || tp->t_iflag & IXOFF) && !(tp->t_state & TS_TBLOCK)) ttyblock(tp); com->delta_error_counts[CE_TTY_BUF_OVERFLOW] += b_to_q((char *)buf, incc, &tp->t_rawq); buf += incc; tk_nin += incc; tk_rawcc += incc; tp->t_rawcc += incc; ttwakeup(tp); if (tp->t_state & TS_TTSTOP && (tp->t_iflag & IXANY || tp->t_cc[VSTART] == tp->t_cc[VSTOP])) { tp->t_state &= ~TS_TTSTOP; tp->t_lflag &= ~FLUSHO; comstart(tp); } mtx_lock_spin(&sio_lock); } while (buf < com->iptr); } else { do { /* * This may look odd, but it is using save-and-enable * semantics instead of the save-and-disable semantics * that are used everywhere else. */ mtx_unlock_spin(&sio_lock); line_status = buf[com->ierroff]; recv_data = *buf++; if (line_status & (LSR_BI | LSR_FE | LSR_OE | LSR_PE)) { if (line_status & LSR_BI) recv_data |= TTY_BI; if (line_status & LSR_FE) recv_data |= TTY_FE; if (line_status & LSR_OE) recv_data |= TTY_OE; if (line_status & LSR_PE) recv_data |= TTY_PE; } ttyld_rint(tp, recv_data); mtx_lock_spin(&sio_lock); } while (buf < com->iptr); } com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; /* * There is now room for another low-level buffer full of input, * so enable RTS if it is now disabled and there is room in the * high-level buffer. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com->state & CS_RTS_IFLOW) && !(com_tiocm_get(com) & TIOCM_RTS) && !(tp->t_state & TS_TBLOCK)) com_tiocm_bis(com, TIOCM_RTS); } else { if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if ((com->state & CS_RTS_IFLOW) && !(com->mcr_image & MCR_RTS) && !(tp->t_state & TS_TBLOCK)) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } static int siointr(arg) void *arg; { struct com_s *com; #if defined(PC98) && defined(COM_MULTIPORT) u_char rsa_buf_status; #endif #ifndef COM_MULTIPORT com = (struct com_s *)arg; mtx_lock_spin(&sio_lock); siointr1(com); mtx_unlock_spin(&sio_lock); #else /* COM_MULTIPORT */ bool_t possibly_more_intrs; int unit; /* * Loop until there is no activity on any port. This is necessary * to get an interrupt edge more than to avoid another interrupt. * If the IRQ signal is just an OR of the IRQ signals from several * devices, then the edge from one may be lost because another is * on. */ mtx_lock_spin(&sio_lock); do { possibly_more_intrs = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); /* * XXX COM_LOCK(); * would it work here, or be counter-productive? */ #ifdef PC98 if (com != NULL && !com->gone && IS_8251(com->pc98_if_type)) { siointr1(com); } else if (com != NULL && !com->gone && com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr) & 0xc9; if ((rsa_buf_status & 0xc8) || !(rsa_buf_status & 0x01)) { siointr1(com); if (rsa_buf_status != (inb(com->rsabase + rsa_srr) & 0xc9)) possibly_more_intrs = TRUE; } } else #endif if (com != NULL && !com->gone && (inb(com->int_id_port) & IIR_IMASK) != IIR_NOPEND) { siointr1(com); possibly_more_intrs = TRUE; } /* XXX COM_UNLOCK(); */ } } while (possibly_more_intrs); mtx_unlock_spin(&sio_lock); #endif /* COM_MULTIPORT */ return (FILTER_HANDLED); } static struct timespec siots[8]; static int siotso; static int volatile siotsunit = -1; static int sysctl_siots(SYSCTL_HANDLER_ARGS) { char buf[128]; long long delta; size_t len; int error, i, tso; for (i = 1, tso = siotso; i < tso; i++) { delta = (long long)(siots[i].tv_sec - siots[i - 1].tv_sec) * 1000000000 + (siots[i].tv_nsec - siots[i - 1].tv_nsec); len = sprintf(buf, "%lld\n", delta); if (delta >= 110000) len += sprintf(buf + len - 1, ": *** %ld.%09ld\n", (long)siots[i].tv_sec, siots[i].tv_nsec) - 1; if (i == tso - 1) buf[len - 1] = '\0'; error = SYSCTL_OUT(req, buf, len); if (error != 0) return (error); uio_yield(); } return (0); } SYSCTL_PROC(_machdep, OID_AUTO, siots, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_siots, "A", "sio timestamps"); static void siointr1(com) struct com_s *com; { u_char int_ctl; u_char int_ctl_new; u_char line_status; u_char modem_status; u_char *ioptr; u_char recv_data; #ifdef PC98 u_char tmp = 0; u_char rsa_buf_status = 0; int rsa_tx_fifo_size = 0; #endif /* PC98 */ if (COM_IIR_TXRDYBUG(com->flags)) { int_ctl = inb(com->int_ctl_port); int_ctl_new = int_ctl; } else { int_ctl = 0; int_ctl_new = 0; } while (!com->gone) { #ifdef PC98 status_read:; if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) tmp = inb(I8251F_lsr); else tmp = inb(com->sts_port); more_intr: line_status = 0; if (com->pc98_8251fifo_enable) { if (tmp & STS8251F_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251F_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251F_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251F_PE) line_status |= LSR_PE; if (tmp & STS8251F_OE) line_status |= LSR_OE; if (tmp & STS8251F_BD_SD) line_status |= LSR_BI; } else { if (tmp & STS8251_TxRDY) line_status |= LSR_TXRDY; if (tmp & STS8251_RxRDY) line_status |= LSR_RXRDY; if (tmp & STS8251_TxEMP) line_status |= LSR_TSRE; if (tmp & STS8251_PE) line_status |= LSR_PE; if (tmp & STS8251_OE) line_status |= LSR_OE; if (tmp & STS8251_FE) line_status |= LSR_FE; if (tmp & STS8251_BD_SD) line_status |= LSR_BI; } } else { #endif /* PC98 */ if (com->pps.ppsparam.mode & PPS_CAPTUREBOTH) { modem_status = inb(com->modem_status_port); if ((modem_status ^ com->last_modem_status) & com->pps_bit) { pps_capture(&com->pps); pps_event(&com->pps, (modem_status & com->pps_bit) ? PPS_CAPTUREASSERT : PPS_CAPTURECLEAR); } } line_status = inb(com->line_status_port); #ifdef PC98 } if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ /* input event? (check first to help avoid overruns) */ #ifndef PC98 while (line_status & LSR_RCV_MASK) { #else while ((line_status & LSR_RCV_MASK) || (com->pc98_if_type == COM_IF_RSA98III && (rsa_buf_status & 0x08))) { #endif /* PC98 */ /* break/unnattached error bits or real input? */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { recv_data = inb(I8251F_data); if (tmp & (STS8251F_PE | STS8251F_OE | STS8251F_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } else { recv_data = inb(com->data_port); if (tmp & (STS8251_PE | STS8251_OE | STS8251_FE | STS8251_BD_SD)) { pc98_i8251_or_cmd(com, CMD8251_ER); recv_data = 0; } } } else if (com->pc98_if_type == COM_IF_RSA98III) { if (!(rsa_buf_status & 0x08)) recv_data = 0; else recv_data = inb(com->data_port); } else #endif if (!(line_status & LSR_RXRDY)) recv_data = 0; else recv_data = inb(com->data_port); #ifdef KDB #ifdef ALT_BREAK_TO_DEBUGGER if (com->unit == comconsole && kdb_alt_break(recv_data, &com->alt_brk_state) != 0) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); #endif /* ALT_BREAK_TO_DEBUGGER */ #endif /* KDB */ if (line_status & (LSR_BI | LSR_FE | LSR_PE)) { /* * Don't store BI if IGNBRK or FE/PE if IGNPAR. * Otherwise, push the work to a higher level * (to handle PARMRK) if we're bypassing. * Otherwise, convert BI/FE and PE+INPCK to 0. * * This makes bypassing work right in the * usual "raw" case (IGNBRK set, and IGNPAR * and INPCK clear). * * Note: BI together with FE/PE means just BI. */ if (line_status & LSR_BI) { #if defined(KDB) && defined(BREAK_TO_DEBUGGER) if (com->unit == comconsole) { - kdb_enter("Line break on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Line break on console"); goto cont; } #endif if (com->tp == NULL || com->tp->t_iflag & IGNBRK) goto cont; } else { if (com->tp == NULL || com->tp->t_iflag & IGNPAR) goto cont; } if (com->tp->t_state & TS_CAN_BYPASS_L_RINT && (line_status & (LSR_BI | LSR_FE) || com->tp->t_iflag & INPCK)) recv_data = 0; } ++com->bytes_in; if (com->tp != NULL && com->tp->t_hotchar != 0 && recv_data == com->tp->t_hotchar) swi_sched(sio_fast_ih, 0); ioptr = com->iptr; if (ioptr >= com->ibufend) CE_RECORD(com, CE_INTERRUPT_BUF_OVERFLOW); else { if (com->tp != NULL && com->tp->t_do_timestamp) microtime(&com->tp->t_timestamp); ++com_events; swi_sched(sio_slow_ih, SWI_DELAY); #if 0 /* for testing input latency vs efficiency */ if (com->iptr - com->ibuf == 8) swi_sched(sio_fast_ih, 0); #endif ioptr[0] = recv_data; ioptr[com->ierroff] = line_status; com->iptr = ++ioptr; if (ioptr == com->ihighwater && com->state & CS_RTS_IFLOW) #ifdef PC98 IS_8251(com->pc98_if_type) ? com_tiocm_bic(com, TIOCM_RTS) : #endif outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); if (line_status & LSR_OE) CE_RECORD(com, CE_OVERRUN); } cont: if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) goto txrdy; /* * "& 0x7F" is to avoid the gcc-1.40 generating a slow * jump from the top of the loop to here */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) goto status_read; else #endif line_status = inb(com->line_status_port) & 0x7F; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) rsa_buf_status = inb(com->rsabase + rsa_srr); #endif /* PC98 */ } /* modem status change? (always check before doing output) */ #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif modem_status = inb(com->modem_status_port); if (modem_status != com->last_modem_status) { /* * Schedule high level to handle DCD changes. Note * that we don't use the delta bits anywhere. Some * UARTs mess them up, and it's easy to remember the * previous bits and calculate the delta. */ com->last_modem_status = modem_status; if (!(com->state & CS_CHECKMSR)) { com_events += LOTS_OF_EVENTS; com->state |= CS_CHECKMSR; swi_sched(sio_fast_ih, 0); } /* handle CTS change immediately for crisp flow ctl */ if (com->state & CS_CTS_OFLOW) { if (modem_status & MSR_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } } #ifdef PC98 } #endif txrdy: /* output queued and everything ready? */ #ifndef PC98 if (line_status & LSR_TXRDY && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #else if (((com->pc98_if_type == COM_IF_RSA98III) ? (rsa_buf_status & 0x02) : (line_status & LSR_TXRDY)) && com->state >= (CS_BUSY | CS_TTGO | CS_ODEVREADY)) { #endif #ifdef PC98 Port_t tmp_data_port; if (IS_8251(com->pc98_if_type) && com->pc98_8251fifo_enable) tmp_data_port = I8251F_data; else tmp_data_port = com->data_port; #endif ioptr = com->obufq.l_head; if (com->tx_fifo_size > 1 && com->unit != siotsunit) { u_int ocount; ocount = com->obufq.l_tail - ioptr; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { rsa_buf_status = inb(com->rsabase + rsa_srr); rsa_tx_fifo_size = 1024; if (!(rsa_buf_status & 0x01)) rsa_tx_fifo_size = 2048; if (ocount > rsa_tx_fifo_size) ocount = rsa_tx_fifo_size; } else #endif if (ocount > com->tx_fifo_size) ocount = com->tx_fifo_size; com->bytes_out += ocount; do #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif while (--ocount != 0); } else { #ifdef PC98 outb(tmp_data_port, *ioptr++); #else outb(com->data_port, *ioptr++); #endif ++com->bytes_out; if (com->unit == siotsunit && siotso < sizeof siots / sizeof siots[0]) nanouptime(&siots[siotso++]); } #ifdef PC98 if (IS_8251(com->pc98_if_type)) if (!(pc98_check_i8251_interrupt(com) & IEN_TxFLAG)) com_int_Tx_enable(com); #endif com->obufq.l_head = ioptr; if (COM_IIR_TXRDYBUG(com->flags)) int_ctl_new = int_ctl | IER_ETXRDY; if (ioptr >= com->obufq.l_tail) { struct lbq *qp; qp = com->obufq.l_next; qp->l_queued = FALSE; qp = qp->l_next; if (qp != NULL) { com->obufq.l_head = qp->l_head; com->obufq.l_tail = qp->l_tail; com->obufq.l_next = qp; } else { /* output just completed */ if (COM_IIR_TXRDYBUG(com->flags)) int_ctl_new = int_ctl & ~IER_ETXRDY; com->state &= ~CS_BUSY; #if defined(PC98) if (IS_8251(com->pc98_if_type) && pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); #endif } if (!(com->state & CS_ODONE)) { com_events += LOTS_OF_EVENTS; com->state |= CS_ODONE; /* handle at high level ASAP */ swi_sched(sio_fast_ih, 0); } } #ifdef PC98 if (COM_IIR_TXRDYBUG(com->flags) && int_ctl != int_ctl_new) { if (com->pc98_if_type == COM_IF_RSA98III) { int_ctl_new &= ~(IER_ETXRDY | IER_ERXRDY); outb(com->int_ctl_port, int_ctl_new); outb(com->rsabase + rsa_ier, 0x1d); } else outb(com->int_ctl_port, int_ctl_new); } #else if (COM_IIR_TXRDYBUG(com->flags) && int_ctl != int_ctl_new) outb(com->int_ctl_port, int_ctl_new); #endif } #ifdef PC98 else if (line_status & LSR_TXRDY) { if (IS_8251(com->pc98_if_type)) if (pc98_check_i8251_interrupt(com) & IEN_TxFLAG) com_int_Tx_disable(com); } if (IS_8251(com->pc98_if_type)) { if (com->pc98_8251fifo_enable) { if ((tmp = inb(I8251F_lsr)) & STS8251F_RxRDY) goto more_intr; } else { if ((tmp = inb(com->sts_port)) & STS8251_RxRDY) goto more_intr; } } #endif /* finished? */ #ifndef COM_MULTIPORT #ifdef PC98 if (IS_8251(com->pc98_if_type)) return; #endif if ((inb(com->int_id_port) & IIR_IMASK) == IIR_NOPEND) #endif /* COM_MULTIPORT */ return; } } /* software interrupt handler for SWI_TTY */ static void siopoll(void *dummy) { int unit; if (com_events == 0) return; repeat: for (unit = 0; unit < sio_numunits; ++unit) { struct com_s *com; int incc; struct tty *tp; com = com_addr(unit); if (com == NULL) continue; tp = com->tp; if (tp == NULL || com->gone) { /* * Discard any events related to never-opened or * going-away devices. */ mtx_lock_spin(&sio_lock); incc = com->iptr - com->ibuf; com->iptr = com->ibuf; if (com->state & CS_CHECKMSR) { incc += LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; } com_events -= incc; mtx_unlock_spin(&sio_lock); continue; } if (com->iptr != com->ibuf) { mtx_lock_spin(&sio_lock); sioinput(com); mtx_unlock_spin(&sio_lock); } if (com->state & CS_CHECKMSR) { u_char delta_modem_status; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif mtx_lock_spin(&sio_lock); delta_modem_status = com->last_modem_status ^ com->prev_modem_status; com->prev_modem_status = com->last_modem_status; com_events -= LOTS_OF_EVENTS; com->state &= ~CS_CHECKMSR; mtx_unlock_spin(&sio_lock); if (delta_modem_status & MSR_DCD) ttyld_modem(tp, com->prev_modem_status & MSR_DCD); #ifdef PC98 } #endif } if (com->state & CS_ODONE) { mtx_lock_spin(&sio_lock); com_events -= LOTS_OF_EVENTS; com->state &= ~CS_ODONE; mtx_unlock_spin(&sio_lock); if (!(com->state & CS_BUSY) && !(com->extra_state & CSE_BUSYCHECK)) { timeout(siobusycheck, com, hz / 100); com->extra_state |= CSE_BUSYCHECK; } ttyld_start(tp); } if (com_events == 0) break; } if (com_events >= LOTS_OF_EVENTS) goto repeat; } static void combreak(tp, sig) struct tty *tp; int sig; { struct com_s *com; com = tp->t_sc; #ifdef PC98 if (sig) com_send_break_on(com); else com_send_break_off(com); #else if (sig) sio_setreg(com, com_cfcr, com->cfcr_image |= CFCR_SBREAK); else sio_setreg(com, com_cfcr, com->cfcr_image &= ~CFCR_SBREAK); #endif } static int comparam(tp, t) struct tty *tp; struct termios *t; { u_int cfcr; int cflag; struct com_s *com; u_int divisor; u_char dlbh; u_char dlbl; u_char efr_flowbits; int s; #ifdef PC98 u_char param = 0; #endif com = tp->t_sc; if (com == NULL) return (ENODEV); #ifdef PC98 cfcr = 0; if (IS_8251(com->pc98_if_type)) { if (pc98_ttspeedtab(com, t->c_ospeed, &divisor) != 0) return (EINVAL); } else { #endif /* check requested parameters */ if (t->c_ispeed != (t->c_ospeed != 0 ? t->c_ospeed : tp->t_ospeed)) return (EINVAL); divisor = siodivisor(com->rclk, t->c_ispeed); if (divisor == 0) return (EINVAL); #ifdef PC98 } #endif /* parameters are OK, convert them to the com struct and the device */ s = spltty(); #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (t->c_ospeed == 0) com_tiocm_bic(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); else com_tiocm_bis(com, TIOCM_DTR|TIOCM_RTS|TIOCM_LE); } else #endif if (t->c_ospeed == 0) (void)commodem(tp, 0, SER_DTR); /* hang up line */ else (void)commodem(tp, SER_DTR, 0); cflag = t->c_cflag; #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif switch (cflag & CSIZE) { case CS5: cfcr = CFCR_5BITS; break; case CS6: cfcr = CFCR_6BITS; break; case CS7: cfcr = CFCR_7BITS; break; default: cfcr = CFCR_8BITS; break; } if (cflag & PARENB) { cfcr |= CFCR_PENAB; if (!(cflag & PARODD)) cfcr |= CFCR_PEVEN; } if (cflag & CSTOPB) cfcr |= CFCR_STOPB; if (com->hasfifo) { /* * Use a fifo trigger level low enough so that the input * latency from the fifo is less than about 16 msec and * the total latency is less than about 30 msec. These * latencies are reasonable for humans. Serial comms * protocols shouldn't expect anything better since modem * latencies are larger. * * The fifo trigger level cannot be set at RX_HIGH for high * speed connections without further work on reducing * interrupt disablement times in other parts of the system, * without producing silo overflow errors. */ com->fifo_image = com->unit == siotsunit ? 0 : t->c_ispeed <= 4800 ? FIFO_ENABLE : FIFO_ENABLE | FIFO_RX_MEDH; #ifdef COM_ESP /* * The Hayes ESP card needs the fifo DMA mode bit set * in compatibility mode. If not, it will interrupt * for each character received. */ if (com->esp) com->fifo_image |= FIFO_DMA_MODE; #endif sio_setreg(com, com_fifo, com->fifo_image); } #ifdef PC98 } #endif /* * This returns with interrupts disabled so that we can complete * the speed change atomically. Keeping interrupts disabled is * especially important while com_data is hidden. */ (void) siosetwater(com, t->c_ispeed); #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_cflag_and_speed_set(com, cflag, t->c_ospeed); else { #endif sio_setreg(com, com_cfcr, cfcr | CFCR_DLAB); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (UMC8669F), setting them while input * is arriving loses sync until data stops arriving. */ dlbl = divisor & 0xFF; if (sio_getreg(com, com_dlbl) != dlbl) sio_setreg(com, com_dlbl, dlbl); dlbh = divisor >> 8; if (sio_getreg(com, com_dlbh) != dlbh) sio_setreg(com, com_dlbh, dlbh); #ifdef PC98 } #endif efr_flowbits = 0; if (cflag & CRTS_IFLOW) { com->state |= CS_RTS_IFLOW; efr_flowbits |= EFR_AUTORTS; /* * If CS_RTS_IFLOW just changed from off to on, the change * needs to be propagated to MCR_RTS. This isn't urgent, * so do it later by calling comstart() instead of repeating * a lot of code from comstart() here. */ } else if (com->state & CS_RTS_IFLOW) { com->state &= ~CS_RTS_IFLOW; /* * CS_RTS_IFLOW just changed from on to off. Force MCR_RTS * on here, since comstart() won't do it later. */ #ifdef PC98 if (IS_8251(com->pc98_if_type)) com_tiocm_bis(com, TIOCM_RTS); else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #else outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } /* * Set up state to handle output flow control. * XXX - worth handling MDMBUF (DCD) flow control at the lowest level? * Now has 10+ msec latency, while CTS flow has 50- usec latency. */ com->state |= CS_ODEVREADY; com->state &= ~CS_CTS_OFLOW; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) { param = inb(com->rsabase + rsa_msr); outb(com->rsabase + rsa_msr, param & 0x14); } #endif if (cflag & CCTS_OFLOW) { com->state |= CS_CTS_OFLOW; efr_flowbits |= EFR_AUTOCTS; #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(pc98_get_modem_status(com) & TIOCM_CTS)) com->state &= ~CS_ODEVREADY; } else if (com->pc98_if_type == COM_IF_RSA98III) { /* Set automatic flow control mode */ outb(com->rsabase + rsa_msr, param | 0x08); } else #endif if (!(com->last_modem_status & MSR_CTS)) com->state &= ~CS_ODEVREADY; } #ifdef PC98 if (!IS_8251(com->pc98_if_type)) sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); #else if (com->st16650a) { sio_setreg(com, com_lcr, LCR_EFR_ENABLE); sio_setreg(com, com_efr, (sio_getreg(com, com_efr) & ~(EFR_AUTOCTS | EFR_AUTORTS)) | efr_flowbits); } sio_setreg(com, com_cfcr, com->cfcr_image = cfcr); #endif /* XXX shouldn't call functions while intrs are disabled. */ ttyldoptim(tp); mtx_unlock_spin(&sio_lock); splx(s); comstart(tp); if (com->ibufold != NULL) { free(com->ibufold, M_DEVBUF); com->ibufold = NULL; } return (0); } /* * This function must be called with the sio_lock mutex released and will * return with it obtained. */ static int siosetwater(com, speed) struct com_s *com; speed_t speed; { int cp4ticks; u_char *ibuf; int ibufsize; struct tty *tp; /* * Make the buffer size large enough to handle a softtty interrupt * latency of about 2 ticks without loss of throughput or data * (about 3 ticks if input flow control is not used or not honoured, * but a bit less for CS5-CS7 modes). */ cp4ticks = speed / 10 / hz * 4; for (ibufsize = 128; ibufsize < cp4ticks;) ibufsize <<= 1; #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) ibufsize = 2048; #endif if (ibufsize == com->ibufsize) { mtx_lock_spin(&sio_lock); return (0); } /* * Allocate input buffer. The extra factor of 2 in the size is * to allow for an error byte for each input byte. */ ibuf = malloc(2 * ibufsize, M_DEVBUF, M_NOWAIT); if (ibuf == NULL) { mtx_lock_spin(&sio_lock); return (ENOMEM); } /* Initialize non-critical variables. */ com->ibufold = com->ibuf; com->ibufsize = ibufsize; tp = com->tp; if (tp != NULL) { tp->t_ififosize = 2 * ibufsize; tp->t_ispeedwat = (speed_t)-1; tp->t_ospeedwat = (speed_t)-1; } /* * Read current input buffer, if any. Continue with interrupts * disabled. */ mtx_lock_spin(&sio_lock); if (com->iptr != com->ibuf) sioinput(com); /*- * Initialize critical variables, including input buffer watermarks. * The external device is asked to stop sending when the buffer * exactly reaches high water, or when the high level requests it. * The high level is notified immediately (rather than at a later * clock tick) when this watermark is reached. * The buffer size is chosen so the watermark should almost never * be reached. * The low watermark is invisibly 0 since the buffer is always * emptied all at once. */ com->iptr = com->ibuf = ibuf; com->ibufend = ibuf + ibufsize; com->ierroff = ibufsize; com->ihighwater = ibuf + 3 * ibufsize / 4; return (0); } static void comstart(tp) struct tty *tp; { struct com_s *com; int s; com = tp->t_sc; if (com == NULL) return; s = spltty(); mtx_lock_spin(&sio_lock); if (tp->t_state & TS_TTSTOP) com->state &= ~CS_TTGO; else com->state |= CS_TTGO; if (tp->t_state & TS_TBLOCK) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if ((com_tiocm_get(com) & TIOCM_RTS) && (com->state & CS_RTS_IFLOW)) com_tiocm_bic(com, TIOCM_RTS); } else { if ((com->mcr_image & MCR_RTS) && (com->state & CS_RTS_IFLOW)) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); } #else if (com->mcr_image & MCR_RTS && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image &= ~MCR_RTS); #endif } else { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { if (!(com_tiocm_get(com) & TIOCM_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) com_tiocm_bis(com, TIOCM_RTS); } else { if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); } #else if (!(com->mcr_image & MCR_RTS) && com->iptr < com->ihighwater && com->state & CS_RTS_IFLOW) outb(com->modem_ctl_port, com->mcr_image |= MCR_RTS); #endif } mtx_unlock_spin(&sio_lock); if (tp->t_state & (TS_TIMEOUT | TS_TTSTOP)) { ttwwakeup(tp); splx(s); return; } if (tp->t_outq.c_cc != 0) { struct lbq *qp; struct lbq *next; if (!com->obufs[0].l_queued) { com->obufs[0].l_tail = com->obuf1 + q_to_b(&tp->t_outq, com->obuf1, #ifdef PC98 com->obufsize); #else sizeof com->obuf1); #endif com->obufs[0].l_next = NULL; com->obufs[0].l_queued = TRUE; mtx_lock_spin(&sio_lock); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[0]; } else { com->obufq.l_head = com->obufs[0].l_head; com->obufq.l_tail = com->obufs[0].l_tail; com->obufq.l_next = &com->obufs[0]; com->state |= CS_BUSY; } mtx_unlock_spin(&sio_lock); } if (tp->t_outq.c_cc != 0 && !com->obufs[1].l_queued) { com->obufs[1].l_tail = com->obuf2 + q_to_b(&tp->t_outq, com->obuf2, #ifdef PC98 com->obufsize); #else sizeof com->obuf2); #endif com->obufs[1].l_next = NULL; com->obufs[1].l_queued = TRUE; mtx_lock_spin(&sio_lock); if (com->state & CS_BUSY) { qp = com->obufq.l_next; while ((next = qp->l_next) != NULL) qp = next; qp->l_next = &com->obufs[1]; } else { com->obufq.l_head = com->obufs[1].l_head; com->obufq.l_tail = com->obufs[1].l_tail; com->obufq.l_next = &com->obufs[1]; com->state |= CS_BUSY; } mtx_unlock_spin(&sio_lock); } tp->t_state |= TS_BUSY; } mtx_lock_spin(&sio_lock); if (com->state >= (CS_BUSY | CS_TTGO)) siointr1(com); /* fake interrupt to start output */ mtx_unlock_spin(&sio_lock); ttwwakeup(tp); splx(s); } static void comstop(tp, rw) struct tty *tp; int rw; { struct com_s *com; #ifdef PC98 int rsa98_tmp = 0; #endif com = tp->t_sc; if (com == NULL || com->gone) return; mtx_lock_spin(&sio_lock); if (rw & FWRITE) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); #ifdef PC98 if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_setreg(com, com_fifo, FIFO_XMT_RST | com->fifo_image); } #endif com->obufs[0].l_queued = FALSE; com->obufs[1].l_queued = FALSE; if (com->state & CS_ODONE) com_events -= LOTS_OF_EVENTS; com->state &= ~(CS_ODONE | CS_BUSY); com->tp->t_state &= ~TS_BUSY; } if (rw & FREAD) { #ifdef PC98 if (!IS_8251(com->pc98_if_type)) { if (com->pc98_if_type == COM_IF_RSA98III) for (rsa98_tmp = 0; rsa98_tmp < 2048; rsa98_tmp++) sio_getreg(com, com_data); #endif if (com->hasfifo) #ifdef COM_ESP /* XXX avoid h/w bug. */ if (!com->esp) #endif sio_setreg(com, com_fifo, FIFO_RCV_RST | com->fifo_image); #ifdef PC98 } #endif com_events -= (com->iptr - com->ibuf); com->iptr = com->ibuf; } mtx_unlock_spin(&sio_lock); comstart(tp); } static int commodem(struct tty *tp, int sigon, int sigoff) { struct com_s *com; int bitand, bitor, msr; #ifdef PC98 int clr, set; #endif com = tp->t_sc; if (com->gone) return(0); if (sigon != 0 || sigoff != 0) { #ifdef PC98 if (IS_8251(com->pc98_if_type)) { bitand = bitor = 0; clr = set = 0; if (sigoff & SER_DTR) { bitand |= TIOCM_DTR; clr |= CMD8251_DTR; } if (sigoff & SER_RTS) { bitand |= TIOCM_RTS; clr |= CMD8251_RxEN | CMD8251_RTS; } if (sigon & SER_DTR) { bitor |= TIOCM_DTR; set |= CMD8251_TxEN | CMD8251_RxEN | CMD8251_DTR; } if (sigon & SER_RTS) { bitor |= TIOCM_RTS; set |= CMD8251_TxEN | CMD8251_RxEN | CMD8251_RTS; } bitand = ~bitand; mtx_lock_spin(&sio_lock); com->pc98_prev_modem_status &= bitand; com->pc98_prev_modem_status |= bitor; pc98_i8251_clear_or_cmd(com, clr, set); mtx_unlock_spin(&sio_lock); return (0); } else { #endif bitand = bitor = 0; if (sigoff & SER_DTR) bitand |= MCR_DTR; if (sigoff & SER_RTS) bitand |= MCR_RTS; if (sigon & SER_DTR) bitor |= MCR_DTR; if (sigon & SER_RTS) bitor |= MCR_RTS; bitand = ~bitand; mtx_lock_spin(&sio_lock); com->mcr_image &= bitand; com->mcr_image |= bitor; outb(com->modem_ctl_port, com->mcr_image); mtx_unlock_spin(&sio_lock); return (0); #ifdef PC98 } #endif } else { #ifdef PC98 if (IS_8251(com->pc98_if_type)) return (com_tiocm_get(com)); else { #endif bitor = 0; if (com->mcr_image & MCR_DTR) bitor |= SER_DTR; if (com->mcr_image & MCR_RTS) bitor |= SER_RTS; msr = com->prev_modem_status; if (msr & MSR_CTS) bitor |= SER_CTS; if (msr & MSR_DCD) bitor |= SER_DCD; if (msr & MSR_DSR) bitor |= SER_DSR; if (msr & MSR_DSR) bitor |= SER_DSR; if (msr & (MSR_RI | MSR_TERI)) bitor |= SER_RI; return (bitor); #ifdef PC98 } #endif } } static void siosettimeout() { struct com_s *com; bool_t someopen; int unit; /* * Set our timeout period to 1 second if no polled devices are open. * Otherwise set it to max(1/200, 1/hz). * Enable timeouts iff some device is open. */ untimeout(comwakeup, (void *)NULL, sio_timeout_handle); sio_timeout = hz; someopen = FALSE; for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && com->tp != NULL && com->tp->t_state & TS_ISOPEN && !com->gone) { someopen = TRUE; if (com->poll || com->poll_output) { sio_timeout = hz > 200 ? hz / 200 : 1; break; } } } if (someopen) { sio_timeouts_until_log = hz / sio_timeout; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); } else { /* Flush error messages, if any. */ sio_timeouts_until_log = 1; comwakeup((void *)NULL); untimeout(comwakeup, (void *)NULL, sio_timeout_handle); } } static void comwakeup(chan) void *chan; { struct com_s *com; int unit; sio_timeout_handle = timeout(comwakeup, (void *)NULL, sio_timeout); /* * Recover from lost output interrupts. * Poll any lines that don't use interrupts. */ for (unit = 0; unit < sio_numunits; ++unit) { com = com_addr(unit); if (com != NULL && !com->gone && (com->state >= (CS_BUSY | CS_TTGO) || com->poll)) { mtx_lock_spin(&sio_lock); siointr1(com); mtx_unlock_spin(&sio_lock); } } /* * Check for and log errors, but not too often. */ if (--sio_timeouts_until_log > 0) return; sio_timeouts_until_log = hz / sio_timeout; for (unit = 0; unit < sio_numunits; ++unit) { int errnum; com = com_addr(unit); if (com == NULL) continue; if (com->gone) continue; for (errnum = 0; errnum < CE_NTYPES; ++errnum) { u_int delta; u_long total; mtx_lock_spin(&sio_lock); delta = com->delta_error_counts[errnum]; com->delta_error_counts[errnum] = 0; mtx_unlock_spin(&sio_lock); if (delta == 0) continue; total = com->error_counts[errnum] += delta; log(LOG_ERR, "sio%d: %u more %s%s (total %lu)\n", unit, delta, error_desc[errnum], delta == 1 ? "" : "s", total); } } } #ifdef PC98 /* commint is called when modem control line changes */ static void commint(struct cdev *dev) { register struct tty *tp; int stat,delta; struct com_s *com; com = dev->si_drv1; tp = com->tp; stat = com_tiocm_get(com); delta = com_tiocm_get_delta(com); if (com->state & CS_CTS_OFLOW) { if (stat & TIOCM_CTS) com->state |= CS_ODEVREADY; else com->state &= ~CS_ODEVREADY; } if ((delta & TIOCM_CAR) && (ISCALLOUT(dev)) == 0) { if (stat & TIOCM_CAR ) (void)ttyld_modem(tp, 1); else if (ttyld_modem(tp, 0) == 0) { /* negate DTR, RTS */ com_tiocm_bic(com, (tp->t_cflag & HUPCL) ? TIOCM_DTR|TIOCM_RTS|TIOCM_LE : TIOCM_LE ); /* disable IENABLE */ com_int_TxRx_disable( com ); } } } #endif /* * Following are all routines needed for SIO to act as console */ struct siocnstate { u_char dlbl; u_char dlbh; u_char ier; u_char cfcr; u_char mcr; }; /* * This is a function in order to not replicate "ttyd%d" more * places than absolutely necessary. */ static void siocnset(struct consdev *cd, int unit) { cd->cn_unit = unit; sprintf(cd->cn_name, "ttyd%d", unit); } static speed_t siocngetspeed(Port_t, u_long rclk); static void siocnclose(struct siocnstate *sp, Port_t iobase); static void siocnopen(struct siocnstate *sp, Port_t iobase, int speed); static void siocntxwait(Port_t iobase); static cn_probe_t sio_cnprobe; static cn_init_t sio_cninit; static cn_term_t sio_cnterm; static cn_getc_t sio_cngetc; static cn_putc_t sio_cnputc; CONSOLE_DRIVER(sio); static void siocntxwait(iobase) Port_t iobase; { int timo; /* * Wait for any pending transmission to finish. Required to avoid * the UART lockup bug when the speed is changed, and for normal * transmits. */ timo = 100000; while ((inb(iobase + com_lsr) & (LSR_TSRE | LSR_TXRDY)) != (LSR_TSRE | LSR_TXRDY) && --timo != 0) ; } /* * Read the serial port specified and try to figure out what speed * it's currently running at. We're assuming the serial port has * been initialized and is basicly idle. This routine is only intended * to be run at system startup. * * If the value read from the serial port doesn't make sense, return 0. */ static speed_t siocngetspeed(iobase, rclk) Port_t iobase; u_long rclk; { u_int divisor; u_char dlbh; u_char dlbl; u_char cfcr; cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); dlbl = inb(iobase + com_dlbl); dlbh = inb(iobase + com_dlbh); outb(iobase + com_cfcr, cfcr); divisor = dlbh << 8 | dlbl; /* XXX there should be more sanity checking. */ if (divisor == 0) return (CONSPEED); return (rclk / (16UL * divisor)); } static void siocnopen(sp, iobase, speed) struct siocnstate *sp; Port_t iobase; int speed; { u_int divisor; u_char dlbh; u_char dlbl; /* * Save all the device control registers except the fifo register * and set our default ones (cs8 -parenb speed=comdefaultrate). * We can't save the fifo register since it is read-only. */ sp->ier = inb(iobase + com_ier); outb(iobase + com_ier, 0); /* spltty() doesn't stop siointr() */ siocntxwait(iobase); sp->cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); sp->dlbl = inb(iobase + com_dlbl); sp->dlbh = inb(iobase + com_dlbh); /* * Only set the divisor registers if they would change, since on * some 16550 incompatibles (Startech), setting them clears the * data input register. This also reduces the effects of the * UMC8669F bug. */ divisor = siodivisor(comdefaultrclk, speed); dlbl = divisor & 0xFF; if (sp->dlbl != dlbl) outb(iobase + com_dlbl, dlbl); dlbh = divisor >> 8; if (sp->dlbh != dlbh) outb(iobase + com_dlbh, dlbh); outb(iobase + com_cfcr, CFCR_8BITS); sp->mcr = inb(iobase + com_mcr); /* * We don't want interrupts, but must be careful not to "disable" * them by clearing the MCR_IENABLE bit, since that might cause * an interrupt by floating the IRQ line. */ outb(iobase + com_mcr, (sp->mcr & MCR_IENABLE) | MCR_DTR | MCR_RTS); } static void siocnclose(sp, iobase) struct siocnstate *sp; Port_t iobase; { /* * Restore the device control registers. */ siocntxwait(iobase); outb(iobase + com_cfcr, CFCR_DLAB | CFCR_8BITS); if (sp->dlbl != inb(iobase + com_dlbl)) outb(iobase + com_dlbl, sp->dlbl); if (sp->dlbh != inb(iobase + com_dlbh)) outb(iobase + com_dlbh, sp->dlbh); outb(iobase + com_cfcr, sp->cfcr); /* * XXX damp oscillations of MCR_DTR and MCR_RTS by not restoring them. */ outb(iobase + com_mcr, sp->mcr | MCR_DTR | MCR_RTS); outb(iobase + com_ier, sp->ier); } static void sio_cnprobe(cp) struct consdev *cp; { speed_t boot_speed; u_char cfcr; u_int divisor; int s, unit; struct siocnstate sp; /* * Find our first enabled console, if any. If it is a high-level * console device, then initialize it and return successfully. * If it is a low-level console device, then initialize it and * return unsuccessfully. It must be initialized in both cases * for early use by console drivers and debuggers. Initializing * the hardware is not necessary in all cases, since the i/o * routines initialize it on the fly, but it is necessary if * input might arrive while the hardware is switched back to an * uninitialized state. We can't handle multiple console devices * yet because our low-level routines don't take a device arg. * We trust the user to set the console flags properly so that we * don't need to probe. */ cp->cn_pri = CN_DEAD; for (unit = 0; unit < 16; unit++) { /* XXX need to know how many */ int flags; if (resource_disabled("sio", unit)) continue; if (resource_int_value("sio", unit, "flags", &flags)) continue; if (COM_CONSOLE(flags) || COM_DEBUGGER(flags)) { int port; Port_t iobase; if (resource_int_value("sio", unit, "port", &port)) continue; iobase = port; s = spltty(); if (boothowto & RB_SERIAL) { boot_speed = siocngetspeed(iobase, comdefaultrclk); if (boot_speed) comdefaultrate = boot_speed; } /* * Initialize the divisor latch. We can't rely on * siocnopen() to do this the first time, since it * avoids writing to the latch if the latch appears * to have the correct value. Also, if we didn't * just read the speed from the hardware, then we * need to set the speed in hardware so that * switching it later is null. */ cfcr = inb(iobase + com_cfcr); outb(iobase + com_cfcr, CFCR_DLAB | cfcr); divisor = siodivisor(comdefaultrclk, comdefaultrate); outb(iobase + com_dlbl, divisor & 0xff); outb(iobase + com_dlbh, divisor >> 8); outb(iobase + com_cfcr, cfcr); siocnopen(&sp, iobase, comdefaultrate); splx(s); if (COM_CONSOLE(flags) && !COM_LLCONSOLE(flags)) { siocnset(cp, unit); cp->cn_pri = COM_FORCECONSOLE(flags) || boothowto & RB_SERIAL ? CN_REMOTE : CN_NORMAL; siocniobase = iobase; siocnunit = unit; } #ifdef GDB if (COM_DEBUGGER(flags)) siogdbiobase = iobase; #endif } } } static void sio_cninit(cp) struct consdev *cp; { comconsole = cp->cn_unit; } static void sio_cnterm(cp) struct consdev *cp; { comconsole = -1; } static int sio_cngetc(struct consdev *cd) { int c; Port_t iobase; int s; struct siocnstate sp; speed_t speed; if (cd != NULL && cd->cn_unit == siocnunit) { iobase = siocniobase; speed = comdefaultrate; } else { #ifdef GDB iobase = siogdbiobase; speed = gdbdefaultrate; #else return (-1); #endif } s = spltty(); siocnopen(&sp, iobase, speed); if (inb(iobase + com_lsr) & LSR_RXRDY) c = inb(iobase + com_data); else c = -1; siocnclose(&sp, iobase); splx(s); return (c); } static void sio_cnputc(struct consdev *cd, int c) { int need_unlock; int s; struct siocnstate sp; Port_t iobase; speed_t speed; if (cd != NULL && cd->cn_unit == siocnunit) { iobase = siocniobase; speed = comdefaultrate; } else { #ifdef GDB iobase = siogdbiobase; speed = gdbdefaultrate; #else return; #endif } s = spltty(); need_unlock = 0; if (!kdb_active && sio_inited == 2 && !mtx_owned(&sio_lock)) { mtx_lock_spin(&sio_lock); need_unlock = 1; } siocnopen(&sp, iobase, speed); siocntxwait(iobase); outb(iobase + com_data, c); siocnclose(&sp, iobase); if (need_unlock) mtx_unlock_spin(&sio_lock); splx(s); } /* * Remote gdb(1) support. */ #if defined(GDB) #include static gdb_probe_f siogdbprobe; static gdb_init_f siogdbinit; static gdb_term_f siogdbterm; static gdb_getc_f siogdbgetc; static gdb_putc_f siogdbputc; GDB_DBGPORT(sio, siogdbprobe, siogdbinit, siogdbterm, siogdbgetc, siogdbputc); static int siogdbprobe(void) { return ((siogdbiobase != 0) ? 0 : -1); } static void siogdbinit(void) { } static void siogdbterm(void) { } static void siogdbputc(int c) { sio_cnputc(NULL, c); } static int siogdbgetc(void) { return (sio_cngetc(NULL)); } #endif #ifdef PC98 /* * pc98 local function */ static void com_tiocm_bis(struct com_s *com, int msr) { int s; int tmp = 0; s=spltty(); com->pc98_prev_modem_status |= ( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); tmp |= CMD8251_TxEN|CMD8251_RxEN; if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_or_cmd( com, tmp ); splx(s); } static void com_tiocm_bic(struct com_s *com, int msr) { int s; int tmp = msr; s=spltty(); com->pc98_prev_modem_status &= ~( msr & (TIOCM_LE|TIOCM_DTR|TIOCM_RTS) ); if ( msr & TIOCM_DTR ) tmp |= CMD8251_DTR; if ( msr & TIOCM_RTS ) tmp |= CMD8251_RTS; pc98_i8251_clear_cmd( com, tmp ); splx(s); } static int com_tiocm_get(struct com_s *com) { return( com->pc98_prev_modem_status ); } static int com_tiocm_get_delta(struct com_s *com) { int tmp; tmp = com->pc98_modem_delta; com->pc98_modem_delta = 0; return( tmp ); } /* convert to TIOCM_?? ( ioctl.h ) */ static int pc98_get_modem_status(struct com_s *com) { register int msr; msr = com->pc98_prev_modem_status & ~(TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); if (com->pc98_8251fifo_enable) { int stat2; stat2 = inb(I8251F_msr); if ( stat2 & CICSCDF_CD ) msr |= TIOCM_CAR; if ( stat2 & CICSCDF_CI ) msr |= TIOCM_RI; if ( stat2 & CICSCDF_DR ) msr |= TIOCM_DSR; if ( stat2 & CICSCDF_CS ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } else { int stat, stat2; stat = inb(com->sts_port); stat2 = inb(com->in_modem_port); if ( !(stat2 & CICSCD_CD) ) msr |= TIOCM_CAR; if ( !(stat2 & CICSCD_CI) ) msr |= TIOCM_RI; if ( stat & STS8251_DSR ) msr |= TIOCM_DSR; if ( !(stat2 & CICSCD_CS) ) msr |= TIOCM_CTS; #if COM_CARRIER_DETECT_EMULATE if ( msr & (TIOCM_DSR|TIOCM_CTS) ) { msr |= TIOCM_CAR; } #endif } return(msr); } static void pc98_check_msr(void* chan) { int msr, delta; int s; register struct tty *tp; struct com_s *com; struct cdev *dev; dev=(struct cdev *)chan; com = dev->si_drv1; tp = dev->si_tty; s = spltty(); msr = pc98_get_modem_status(com); /* make change flag */ delta = msr ^ com->pc98_prev_modem_status; if ( delta & TIOCM_CAR ) { if ( com->modem_car_chg_timer ) { if ( -- com->modem_car_chg_timer ) msr ^= TIOCM_CAR; } else { if ((com->modem_car_chg_timer = (msr & TIOCM_CAR) ? DCD_ON_RECOGNITION : DCD_OFF_TOLERANCE) != 0) msr ^= TIOCM_CAR; } } else com->modem_car_chg_timer = 0; delta = ( msr ^ com->pc98_prev_modem_status ) & (TIOCM_CAR|TIOCM_RI|TIOCM_DSR|TIOCM_CTS); com->pc98_prev_modem_status = msr; delta = ( com->pc98_modem_delta |= delta ); splx(s); if ( com->modem_checking || (tp->t_state & (TS_ISOPEN)) ) { if ( delta ) { commint(dev); } timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); } else { com->modem_checking = 0; } } static void pc98_msrint_start(struct cdev *dev) { struct com_s *com; int s = spltty(); com = dev->si_drv1; /* modem control line check routine envoke interval is 1/10 sec */ if ( com->modem_checking == 0 ) { com->pc98_prev_modem_status = pc98_get_modem_status(com); com->pc98_modem_delta = 0; timeout(pc98_check_msr, (caddr_t)dev, PC98_CHECK_MODEM_INTERVAL); com->modem_checking = 1; } splx(s); } static void pc98_disable_i8251_interrupt(struct com_s *com, int mod) { /* disable interrupt */ register int tmp; mod |= ~(IEN_Tx|IEN_TxEMP|IEN_Rx); COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable&=~mod) | tmp ); COM_INT_ENABLE } static void pc98_enable_i8251_interrupt(struct com_s *com, int mod) { register int tmp; COM_INT_DISABLE tmp = inb( com->intr_ctrl_port ) & ~(IEN_Tx|IEN_TxEMP|IEN_Rx); outb( com->intr_ctrl_port, (com->intr_enable|=mod) | tmp ); COM_INT_ENABLE } static int pc98_check_i8251_interrupt(struct com_s *com) { return ( com->intr_enable & 0x07 ); } static void pc98_i8251_clear_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE tmp = com->pc98_prev_siocmd & ~(x); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_or_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd | (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_set_cmd(struct com_s *com, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static void pc98_i8251_clear_or_cmd(struct com_s *com, int clr, int x) { int tmp; COM_INT_DISABLE if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); tmp = com->pc98_prev_siocmd & ~(clr); tmp |= (x); outb(com->cmd_port, tmp); com->pc98_prev_siocmd = tmp & ~(CMD8251_ER|CMD8251_RESET|CMD8251_EH); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE); COM_INT_ENABLE } static int pc98_i8251_get_cmd(struct com_s *com) { return com->pc98_prev_siocmd; } static int pc98_i8251_get_mod(struct com_s *com) { return com->pc98_prev_siomod; } static void pc98_i8251_reset(struct com_s *com, int mode, int command) { if (com->pc98_8251fifo_enable) outb(I8251F_fcr, 0); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, 0); /* dummy */ DELAY(2); outb(com->cmd_port, CMD8251_RESET); /* internal reset */ DELAY(2); outb(com->cmd_port, mode ); /* mode register */ com->pc98_prev_siomod = mode; DELAY(2); pc98_i8251_set_cmd( com, (command|CMD8251_ER) ); DELAY(10); if (com->pc98_8251fifo_enable) outb(I8251F_fcr, CTRL8251F_ENABLE | CTRL8251F_XMT_RST | CTRL8251F_RCV_RST); } static void pc98_check_sysclock(void) { /* get system clock from port */ if ( pc98_machine_type & M_8M ) { /* 8 MHz system & H98 */ sysclock = 8; } else { /* 5 MHz system */ sysclock = 5; } } static void com_cflag_and_speed_set( struct com_s *com, int cflag, int speed) { int cfcr=0; int previnterrupt; int tmp; u_int count; if (pc98_ttspeedtab(com, speed, &count) != 0) return; previnterrupt = pc98_check_i8251_interrupt(com); pc98_disable_i8251_interrupt( com, IEN_Tx|IEN_TxEMP|IEN_Rx ); switch ( cflag&CSIZE ) { case CS5: cfcr = MOD8251_5BITS; break; case CS6: cfcr = MOD8251_6BITS; break; case CS7: cfcr = MOD8251_7BITS; break; case CS8: cfcr = MOD8251_8BITS; break; } if ( cflag&PARENB ) { if ( cflag&PARODD ) cfcr |= MOD8251_PODD; else cfcr |= MOD8251_PEVEN; } else cfcr |= MOD8251_PDISAB; if ( cflag&CSTOPB ) cfcr |= MOD8251_STOP2; else cfcr |= MOD8251_STOP1; if ( count & 0x10000 ) cfcr |= MOD8251_CLKX1; else cfcr |= MOD8251_CLKX16; while (!((tmp = inb(com->sts_port)) & STS8251_TxEMP)) ; /* set baud rate from ospeed */ pc98_set_baud_rate( com, count ); if ( cfcr != pc98_i8251_get_mod(com) ) pc98_i8251_reset(com, cfcr, pc98_i8251_get_cmd(com) ); pc98_enable_i8251_interrupt( com, previnterrupt ); } static int pc98_ttspeedtab(struct com_s *com, int speed, u_int *divisor) { int if_type, effect_sp, count = -1, mod; if_type = com->pc98_if_type & 0x0f; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { count = ttspeedtab(speed, if_8251_type[if_type].speedtab); if (count > 0) { count |= COM1_EXT_CLOCK; break; } } /* for *1CLK asynchronous! mode, TEFUTEFU */ mod = (sysclock == 5) ? 2457600 : 1996800; effect_sp = ttspeedtab( speed, pc98speedtab ); if ( effect_sp < 0 ) /* XXX */ effect_sp = ttspeedtab( (speed - 1), pc98speedtab ); if ( effect_sp <= 0 ) return effect_sp; if ( effect_sp == speed ) mod /= 16; if ( mod % effect_sp ) return(-1); count = mod / effect_sp; if ( count > 65535 ) return(-1); if ( effect_sp != speed ) count |= 0x10000; break; case COM_IF_PC9861K_1: case COM_IF_PC9861K_2: count = 1; break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: count = ttspeedtab( speed, if_8251_type[if_type].speedtab ); #ifdef B98_01_OLD if (count == 0 || count == 1) { count += 4; count |= 0x20000; /* x1 mode for 76800 and 153600 */ } #endif break; } if (count < 0) return count; *divisor = (u_int) count; return 0; } static void pc98_set_baud_rate( struct com_s *com, u_int count ) { int if_type, io, s; if_type = com->pc98_if_type & 0x0f; io = rman_get_start(com->ioportres) & 0xff00; switch (com->pc98_if_type) { case COM_IF_INTERNAL: if (PC98SIO_baud_rate_port(if_type) != -1) { if (count & COM1_EXT_CLOCK) { outb((Port_t)PC98SIO_baud_rate_port(if_type), count & 0xff); break; } else { outb((Port_t)PC98SIO_baud_rate_port(if_type), 0x09); } } if (count == 0) return; /* set i8253 */ s = splclock(); if (count != 3) outb( 0x77, 0xb6 ); else outb( 0x77, 0xb4 ); outb( 0x5f, 0); outb( 0x75, count & 0xff ); outb( 0x5f, 0); outb( 0x75, (count >> 8) & 0xff ); splx(s); break; case COM_IF_IND_SS_1: case COM_IF_IND_SS_2: outb(io | PC98SIO_intr_ctrl_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0); outb(io | PC98SIO_baud_rate_port(if_type), 0xc0); outb(io | PC98SIO_baud_rate_port(if_type), (count >> 8) | 0x80); outb(io | PC98SIO_baud_rate_port(if_type), count & 0xff); break; case COM_IF_PIO9032B_1: case COM_IF_PIO9032B_2: outb(io | PC98SIO_baud_rate_port(if_type), count); break; case COM_IF_B98_01_1: case COM_IF_B98_01_2: outb(io | PC98SIO_baud_rate_port(if_type), count & 0x0f); #ifdef B98_01_OLD /* * Some old B98_01 board should be controlled * in different way, but this hasn't been tested yet. */ outb(io | PC98SIO_func_port(if_type), (count & 0x20000) ? 0xf0 : 0xf2); #endif break; } } static int pc98_check_if_type(device_t dev, struct siodev *iod) { int irr, io, if_type, tmp; static short irq_tab[2][8] = { { 3, 5, 6, 9, 10, 12, 13, -1}, { 3, 10, 12, 13, 5, 6, 9, -1} }; if_type = iod->if_type & 0x0f; iod->irq = 0; io = isa_get_port(dev) & 0xff00; if (IS_8251(iod->if_type)) { if (PC98SIO_func_port(if_type) != -1) { outb(io | PC98SIO_func_port(if_type), 0xf2); tmp = ttspeedtab(9600, if_8251_type[if_type].speedtab); if (tmp != -1 && PC98SIO_baud_rate_port(if_type) != -1) outb(io | PC98SIO_baud_rate_port(if_type), tmp); } iod->cmd = io | PC98SIO_cmd_port(if_type); iod->sts = io | PC98SIO_sts_port(if_type); iod->mod = io | PC98SIO_in_modem_port(if_type); iod->ctrl = io | PC98SIO_intr_ctrl_port(if_type); if (iod->if_type == COM_IF_INTERNAL) { iod->irq = 4; if (pc98_check_8251vfast()) { PC98SIO_baud_rate_port(if_type) = I8251F_div; if_8251_type[if_type].speedtab = pc98fast_speedtab; } } else { tmp = inb( iod->mod ) & if_8251_type[if_type].irr_mask; if ((isa_get_port(dev) & 0xff) == IO_COM2) iod->irq = irq_tab[0][tmp]; else iod->irq = irq_tab[1][tmp]; } } else { irr = if_16550a_type[if_type].irr_read; #ifdef COM_MULTIPORT if (!COM_ISMULTIPORT(device_get_flags(dev)) || device_get_unit(dev) == COM_MPMASTER(device_get_flags(dev))) #endif if (irr != -1) { tmp = inb(io | irr); if (isa_get_port(dev) & 0x01) /* XXX depend on RSB-384 */ iod->irq = irq_tab[1][tmp >> 3]; else iod->irq = irq_tab[0][tmp & 0x07]; } iod->cmd = 0; iod->sts = 0; iod->mod = 0; iod->ctrl = 0; } if ( iod->irq == -1 ) return -1; return 0; } static void pc98_set_ioport(struct com_s *com) { int if_type = com->pc98_if_type & 0x0f; Port_t io = rman_get_start(com->ioportres) & 0xff00; pc98_check_sysclock(); com->data_port = io | PC98SIO_data_port(if_type); com->cmd_port = io | PC98SIO_cmd_port(if_type); com->sts_port = io | PC98SIO_sts_port(if_type); com->in_modem_port = io | PC98SIO_in_modem_port(if_type); com->intr_ctrl_port = io | PC98SIO_intr_ctrl_port(if_type); } static int pc98_check_8251vfast(void) { int i; outb(I8251F_div, 0x8c); DELAY(10); for (i = 0; i < 100; i++) { if ((inb(I8251F_div) & 0x80) != 0) { i = 0; break; } DELAY(1); } outb(I8251F_div, 0); DELAY(10); for (; i < 100; i++) { if ((inb(I8251F_div) & 0x80) == 0) return 1; DELAY(1); } return 0; } static int pc98_check_8251fifo(void) { u_char tmp1, tmp2; tmp1 = inb(I8251F_iir); DELAY(10); tmp2 = inb(I8251F_iir); if (((tmp1 ^ tmp2) & 0x40) != 0 && ((tmp1 | tmp2) & 0x20) == 0) return 1; return 0; } #endif /* PC98 defined */ Index: stable/7/sys/pc98/pc98/machdep.c =================================================================== --- stable/7/sys/pc98/pc98/machdep.c (revision 177733) +++ stable/7/sys/pc98/pc98/machdep.c (revision 177734) @@ -1,2819 +1,2820 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #include #endif #ifdef DEV_ISA #include #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int _udatasel, _ucodesel; u_int basemem; static int ispc98 = 1; SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; #ifndef SMP static struct pcpu __pcpu; #endif struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(td->td_sigmask, scp->sc_mask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if (!tsc_present) return (EOPNOTSUPP); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(void) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Clear registers on exec */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUFS_SEL 2 %fs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUGS_SEL 3 %gs Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GCODE_SEL 4 Code Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GDATA_SEL 5 Data Descriptor for kernel */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUCODE_SEL 6 Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GUDATA_SEL 7 Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { 0x400, /* segment base address */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { 0x0, /* segment base address */ sizeof(struct i386tss)-1,/* length */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GLDT_SEL 10 LDT Descriptor */ { (int) ldt, /* segment base address */ sizeof(ldt)-1, /* length - all address space */ SDT_SYSLDT, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { (int) ldt, /* segment base address */ (512 * sizeof(union descriptor)-1), /* length */ SDT_SYSLDT, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { (int) &dblfault_tss, /* segment base address */ sizeof(struct i386tss)-1,/* length - all address space */ SDT_SYS386TSS, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* unused - default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMERA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { 0, /* segment base address (overwritten) */ 0xfffff, /* length */ SDT_MEMRWA, /* segment type */ 0, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* GNDIS_SEL 18 NDIS Descriptor */ { 0x0, /* segment base address */ 0x0, /* length */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Code Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMERA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, /* Null Descriptor - overwritten by call gate */ { 0x0, /* segment base address */ 0x0, /* length - all address space */ 0, /* segment type */ 0, /* segment descriptor priority level */ 0, /* segment descriptor present */ 0, 0, 0, /* default 32 vs 16 bit size */ 0 /* limit granularity (byte/page units)*/ }, /* Data Descriptor for user */ { 0x0, /* segment base address */ 0xfffff, /* length - all address space */ SDT_MEMRWA, /* segment type */ SEL_UPL, /* segment descriptor priority level */ 1, /* segment descriptor present */ 0, 0, 1, /* default 32 vs 16 bit size */ 1 /* limit granularity (byte/page units)*/ }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, off, physmap_idx, pa_indx, da_indx; int pg_n; u_long physmem_tunable; u_int extmem, under16; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size; bzero(physmap, sizeof(physmap)); /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case EPSON_PC486_HX: case EPSON_PC486_HG: case EPSON_PC486_HA: pg_n = 0; break; } } /* * Perform "base memory" related probes & setup */ under16 = pc98_getmemsize(&basemem, &extmem); if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* * We need to divide chunk if Maxmem is larger than 16MB and * under 16MB area is not full of memory. * (1) system area (15-16MB region) is cut off * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY") */ if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) { /* 15M - 16M region is cut off, so need to divide chunk */ physmap[physmap_idx + 1] = under16 * 1024; physmap_idx += 2; physmap[physmap_idx] = 0x1000000; physmap[physmap_idx + 1] = physmap[2] + extmem * 1024; } /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | pg_n; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); /* * Initialize DMAC */ pc98_init_dmac(); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); #ifdef SMP pc = &SMP_prvspace[0].pcpu; #else pc = &__pcpu; #endif gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; /* XXXKSE */ thread0.td_pcb->pcb_cr3 = (int)IdlePTD; thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL) static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only * available as macros calling inlined functions, thus cannot be * called from the debugger. * * The actual code is stolen from , and de-inlined. */ #undef inb #undef outb /* silence compiler warnings */ u_char inb(u_int); void outb(u_int, u_char); u_char inb(u_int port) { u_char data; /* * We use %%dx and not %1 here because i/o is done at %dx and not at * %edx, while gcc generates inferior code (movw instead of movl) * if we tell it to load (u_short) port. */ __asm __volatile("inb %%dx,%0" : "=a" (data) : "d" (port)); return (data); } void outb(u_int port, u_char data) { u_char al; /* * Use an unnecessary assignment to help gcc's register allocator. * This make a large difference for gcc-1.40 and a tiny difference * for gcc-2.6.0. For gcc-1.40, al had to be ``asm("ax")'' for * best results. gcc-2.6.0 can't handle this. */ al = data; __asm __volatile("outb %0,%%dx" : : "a" (al), "d" (port)); } #endif /* KDB */ Index: stable/7/sys/powerpc/powermac/pswitch.c =================================================================== --- stable/7/sys/powerpc/powermac/pswitch.c (revision 177733) +++ stable/7/sys/powerpc/powermac/pswitch.c (revision 177734) @@ -1,144 +1,144 @@ /*- * Copyright (C) 2002 Benno Rice. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include struct pswitch_softc { int sc_irqrid; struct resource *sc_irq; void *sc_ih; }; static int pswitch_probe(device_t); static int pswitch_attach(device_t); static int pswitch_intr(void *); static device_method_t pswitch_methods[] = { /* Device interface */ DEVMETHOD(device_probe, pswitch_probe), DEVMETHOD(device_attach, pswitch_attach), { 0, 0 } }; static driver_t pswitch_driver = { "pswitch", pswitch_methods, sizeof(struct pswitch_softc) }; static devclass_t pswitch_devclass; DRIVER_MODULE(pswitch, macio, pswitch_driver, pswitch_devclass, 0, 0); static int pswitch_probe(device_t dev) { char *type = macio_get_devtype(dev); if (strcmp(type, "gpio") != 0) return (ENXIO); device_set_desc(dev, "GPIO Programmer's Switch"); return (0); } static int pswitch_attach(device_t dev) { struct pswitch_softc *sc; phandle_t node, child; char type[32]; u_int irq[2]; sc = device_get_softc(dev); node = macio_get_node(dev); for (child = OF_child(node); child != 0; child = OF_peer(child)) { if (OF_getprop(child, "device_type", type, 32) == -1) continue; if (strcmp(type, "programmer-switch") == 0) break; } if (child == 0) { device_printf(dev, "could not find correct node\n"); return (ENXIO); } if (OF_getprop(child, "interrupts", irq, sizeof(irq)) == -1) { device_printf(dev, "could not get interrupt\n"); return (ENXIO); } sc->sc_irqrid = 0; sc->sc_irq = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->sc_irqrid, irq[0], irq[0], 1, RF_ACTIVE); if (sc->sc_irq == NULL) { device_printf(dev, "could not allocate interrupt\n"); return (ENXIO); } if (bus_setup_intr(dev, sc->sc_irq, INTR_TYPE_MISC, pswitch_intr, NULL, dev, &sc->sc_ih) != 0) { device_printf(dev, "could not setup interrupt\n"); bus_release_resource(dev, SYS_RES_IRQ, sc->sc_irqrid, sc->sc_irq); return (ENXIO); } return (0); } static int pswitch_intr(void *arg) { device_t dev; dev = (device_t)arg; - kdb_enter(device_get_nameunit(dev)); + kdb_enter_why(KDB_WHY_POWERPC, device_get_nameunit(dev)); return (FILTER_HANDLED); } Index: stable/7/sys/powerpc/powerpc/machdep.c =================================================================== --- stable/7/sys/powerpc/powerpc/machdep.c (revision 177733) +++ stable/7/sys/powerpc/powerpc/machdep.c (revision 177734) @@ -1,995 +1,996 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int cold = 1; struct pcpu __pcpu[MAXCPU]; struct trapframe frame0; vm_offset_t kstack0; vm_offset_t kstack0_phys; char machine[] = "powerpc"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static int cacheline_size = CACHELINESIZE; SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, CTLFLAG_RD, &cacheline_size, 0, ""); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL) void powerpc_init(u_int, u_int, u_int, void *); int save_ofw_mapping(void); int restore_ofw_mapping(void); void install_extint(void (*)(void)); int setfault(faultbuf); /* defined in locore.S */ static int grab_mcontext(struct thread *, mcontext_t *, int); void asm_panic(char *); long Maxmem = 0; long realmem = 0; struct pmap ofw_pmap; extern int ofmsr; struct bat battable[16]; struct kva_md_info kmi; void setPQL2(int *const size, int *const ways); void setPQL2(int *const size, int *const ways) { return; } static void powerpc_ofw_shutdown(void *junk, int howto) { if (howto & RB_HALT) { OF_halt(); } OF_reboot(); } static void cpu_startup(void *dummy) { /* * Initialise the decrementer-based clock. */ decr_init(); /* * Good {morning,afternoon,evening,night}. */ cpu_setup(PCPU_GET(cpuid)); /* startrtclock(); */ #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ld (%ld MB)\n", ptoa(physmem), ptoa(physmem) / 1048576); realmem = physmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, powerpc_ofw_shutdown, 0, SHUTDOWN_PRI_LAST); #ifdef SMP /* * OK, enough kmem_alloc/malloc state should be up, lets get on with it! */ mp_start(); /* fire up the secondaries */ mp_announce(); #endif /* SMP */ } extern char kernel_text[], _end[]; extern void *trapcode, *trapsize; extern void *alitrap, *alisize; extern void *dsitrap, *dsisize; extern void *decrint, *decrsize; extern void *extint, *extsize; extern void *dblow, *dbsize; extern void *vectrap, *vectrapsize; void powerpc_init(u_int startkernel, u_int endkernel, u_int basekernel, void *mdp) { struct pcpu *pc; vm_offset_t end, off; void *kmdp; char *env; end = 0; kmdp = NULL; /* * Parse metadata if present and fetch parameters. Must be done * before console is inited so cninit gets the right value of * boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif } } /* * Init params/tunables that can be overridden by the loader */ init_param1(); /* * Start initializing proc0 and thread0. */ proc_linkup0(&proc0, &thread0); thread0.td_frame = &frame0; /* * Set up per-cpu data. */ pc = &__pcpu[0]; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; pc->pc_cpuid = 0; __asm __volatile("mtsprg 0, %0" :: "r"(pc)); mutex_init(); /* * Initialize the console before printing anything. */ cninit(); /* * Complain if there is no metadata. */ if (mdp == NULL || kmdp == NULL) { printf("powerpc_init: no loader metadata.\n"); } kdb_init(); kobj_machdep_init(); /* * XXX: Initialize the interrupt tables. * Disable translation in case the vector area * hasn't been mapped (G5) */ mtmsr(mfmsr() & ~(PSL_IR | PSL_DR)); isync(); bcopy(&trapcode, (void *)EXC_RST, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_MCHK, (size_t)&trapsize); bcopy(&dsitrap, (void *)EXC_DSI, (size_t)&dsisize); bcopy(&trapcode, (void *)EXC_ISI, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_EXI, (size_t)&trapsize); bcopy(&alitrap, (void *)EXC_ALI, (size_t)&alisize); bcopy(&trapcode, (void *)EXC_PGM, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_FPU, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_DECR, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_SC, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_TRC, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_FPA, (size_t)&trapsize); bcopy(&vectrap, (void *)EXC_VEC, (size_t)&vectrapsize); bcopy(&trapcode, (void *)EXC_VECAST, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_THRM, (size_t)&trapsize); bcopy(&trapcode, (void *)EXC_BPT, (size_t)&trapsize); #ifdef KDB bcopy(&dblow, (void *)EXC_RST, (size_t)&dbsize); bcopy(&dblow, (void *)EXC_MCHK, (size_t)&dbsize); bcopy(&dblow, (void *)EXC_PGM, (size_t)&dbsize); bcopy(&dblow, (void *)EXC_TRC, (size_t)&dbsize); bcopy(&dblow, (void *)EXC_BPT, (size_t)&dbsize); #endif __syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD); /* * Make sure translation has been enabled */ mtmsr(mfmsr() | PSL_IR|PSL_DR|PSL_ME|PSL_RI); isync(); /* * Initialise virtual memory. */ pmap_mmu_install(MMU_TYPE_OEA, 0); /* XXX temporary */ pmap_bootstrap(startkernel, endkernel); /* * Initialize params/tunables that are derived from memsize */ init_param2(physmem); /* * Grab booted kernel's name */ env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Finish setting up thread0. */ thread0.td_kstack = kstack0; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * Map and initialise the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, msgbuf_phys + off); msgbufinit(msgbufp, MSGBUF_SIZE); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif } void bzero(void *buf, size_t len) { caddr_t p; p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct proc *p; int oonstack, rndfsize; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->fixreg[1]); rndfsize = ((sizeof(sf) + 15) / 16) * 16; CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* * Save user context */ memset(&sf, 0, sizeof(sf)); grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; /* * Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - rndfsize); } else { sfp = (struct sigframe *)(tf->fixreg[1] - rndfsize); } /* * Translate the signal if appropriate (Linux emu ?) */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Save the floating-point state, if necessary, then copy it. */ /* XXX */ /* * Set up the registers to return to sigcode. * * r1/sp - sigframe ptr * lr - sig function, dispatched to by blrl in trampoline * r3 - sig number * r4 - SIGINFO ? &siginfo : exception code * r5 - user context * srr0 - trampoline function addr */ tf->lr = (register_t)catcher; tf->fixreg[1] = (register_t)sfp; tf->fixreg[FIRSTARG] = sig; tf->fixreg[FIRSTARG+2] = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. */ tf->fixreg[FIRSTARG+1] = (register_t)&sfp->sf_si; /* * Fill siginfo structure. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; sf.sf_si.si_addr = (void *) ((tf->exc == EXC_DSI) ? tf->dar : tf->srr0); } else { /* Old FreeBSD-style arguments. */ tf->fixreg[FIRSTARG+1] = code; tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? tf->dar : tf->srr0; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); tf->srr0 = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); /* * copy the frame out to userland. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); } CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->srr0, tf->fixreg[1]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; ucontext_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); p = td->td_proc; PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_lr = tf->srr0; pcb->pcb_sp = tf->fixreg[1]; } /* * get_mcontext/sendsig helper routine that doesn't touch the * proc lock */ static int grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; pcb = td->td_pcb; memset(mcp, 0, sizeof(mcontext_t)); mcp->mc_vers = _MC_VERSION; mcp->mc_flags = 0; memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); if (flags & GET_MC_CLEAR_RET) { mcp->mc_gpr[3] = 0; mcp->mc_gpr[4] = 0; } /* * This assumes that floating-point context is *not* lazy, * so if the thread has used FP there would have been a * FP-unavailable exception that would have set things up * correctly. */ if (pcb->pcb_flags & PCB_FPU) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_fpu(td); critical_exit(); mcp->mc_flags |= _MC_FP_VALID; memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); memcpy(mcp->mc_fpreg, pcb->pcb_fpu.fpr, 32*sizeof(double)); } /* XXX Altivec context ? */ mcp->mc_len = sizeof(*mcp); return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { int error; error = grab_mcontext(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tf; pcb = td->td_pcb; tf = td->td_frame; if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) return (EINVAL); /* * Don't let the user set privileged MSR bits */ if ((mcp->mc_srr1 & PSL_USERSTATIC) != (tf->srr1 & PSL_USERSTATIC)) { return (EINVAL); } memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); if (mcp->mc_flags & _MC_FP_VALID) { if ((pcb->pcb_flags & PCB_FPU) != PCB_FPU) { critical_enter(); enable_fpu(td); critical_exit(); } memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double)); memcpy(pcb->pcb_fpu.fpr, mcp->mc_fpreg, 32*sizeof(double)); } /* XXX Altivec context? */ return (0); } void cpu_boot(int howto) { } void cpu_initclocks(void) { decr_tc_init(); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Shutdown the CPU as much as possible. */ void cpu_halt(void) { OF_exit(); } void cpu_idle(void) { /* TODO: Insert code to halt (until next interrupt) */ #ifdef INVARIANTS if ((mfmsr() & PSL_EE) != PSL_EE) { struct thread *td = curthread; printf("td msr %x\n", td->td_md.md_saved_msr); panic("ints disabled in idleproc!"); } #endif } /* * Set set up registers on exec. */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct ps_strings arginfo; tf = trapframe(td); bzero(tf, sizeof *tf); tf->fixreg[1] = -roundup(-stack + 8, 16); /* * XXX Machine-independent code has already copied arguments and * XXX environment to userland. Get them back here. */ (void)copyin((char *)PS_STRINGS, &arginfo, sizeof(arginfo)); /* * Set up arguments for _start(): * _start(argc, argv, envp, obj, cleanup, ps_strings); * * Notes: * - obj and cleanup are the auxilliary and termination * vectors. They are fixed up by ld.elf_so. * - ps_strings is a NetBSD extention, and will be * ignored by executables which are strictly * compliant with the SVR4 ABI. * * XXX We have to set both regs and retval here due to different * XXX calling convention in trap.c and init_main.c. */ /* * XXX PG: these get overwritten in the syscall return code. * execve() should return EJUSTRETURN, like it does on NetBSD. * Emulate by setting the syscall return value cells. The * registers still have to be set for init's fork trampoline. */ td->td_retval[0] = arginfo.ps_nargvstr; td->td_retval[1] = (register_t)arginfo.ps_argvstr; tf->fixreg[3] = arginfo.ps_nargvstr; tf->fixreg[4] = (register_t)arginfo.ps_argvstr; tf->fixreg[5] = (register_t)arginfo.ps_envstr; tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)PS_STRINGS; /* NetBSD extension */ tf->srr0 = entry; tf->srr1 = PSL_MBO | PSL_USERSET | PSL_FE_DFLT; td->td_pcb->pcb_flags = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(regs, tf, sizeof(struct reg)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPU) == 0) memset(fpregs, 0, sizeof(struct fpreg)); else memcpy(fpregs, &pcb->pcb_fpu, sizeof(struct fpreg)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(tf, regs, sizeof(struct reg)); return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPU) == 0) enable_fpu(td); memcpy(&pcb->pcb_fpu, fpregs, sizeof(struct fpreg)); return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { struct trapframe *tf; tf = td->td_frame; tf->srr0 = (register_t)addr; return (0); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_SE; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_SE; return (0); } /* * Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_msr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_msr); } /* * kcopy(const void *src, void *dst, size_t len); * * Copy len bytes from src to dst, aborting if we encounter a fatal * page fault. * * kcopy() _must_ save and restore the old fault handler since it is * called by uiomove(), which may be in the path of servicing a non-fatal * page fault. */ int kcopy(const void *src, void *dst, size_t len) { struct thread *td; faultbuf env, *oldfault; int rv; td = PCPU_GET(curthread); oldfault = td->td_pcb->pcb_onfault; if ((rv = setfault(env)) != 0) { td->td_pcb->pcb_onfault = oldfault; return rv; } memcpy(dst, src, len); td->td_pcb->pcb_onfault = oldfault; return (0); } void asm_panic(char *pstr) { panic(pstr); } int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC || (frame->exc == EXC_PGM && (frame->srr1 & 0x20000)) || frame->exc == EXC_BPT || frame->exc == EXC_DSI)) { int type = frame->exc; if (type == EXC_PGM && (frame->srr1 & 0x20000)) { type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } Index: stable/7/sys/security/mac_test/mac_test.c =================================================================== --- stable/7/sys/security/mac_test/mac_test.c (revision 177733) +++ stable/7/sys/security/mac_test/mac_test.c (revision 177734) @@ -1,2689 +1,2689 @@ /*- * Copyright (c) 1999-2002, 2007 Robert N. M. Watson * Copyright (c) 2001-2005 McAfee, Inc. * All rights reserved. * * This software was developed by Robert Watson for the TrustedBSD Project. * * This software was developed for the FreeBSD Project in part by McAfee * Research, the Security Research Division of McAfee, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Developed by the TrustedBSD Project. * * MAC Test policy - tests MAC Framework labeling by assigning object class * magic numbers to each label and validates that each time an object label * is passed into the policy, it has a consistent object type, catching * incorrectly passed labels, labels passed after free, etc. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_security_mac); SYSCTL_NODE(_security_mac, OID_AUTO, test, CTLFLAG_RW, 0, "TrustedBSD mac_test policy controls"); #define MAGIC_BPF 0xfe1ad1b6 #define MAGIC_DEVFS 0x9ee79c32 #define MAGIC_IFNET 0xc218b120 #define MAGIC_INPCB 0x4440f7bb #define MAGIC_IPQ 0x206188ef #define MAGIC_MBUF 0xbbefa5bb #define MAGIC_MOUNT 0xc7c46e47 #define MAGIC_SOCKET 0x9199c6cd #define MAGIC_SYSV_MSG 0x8bbba61e #define MAGIC_SYSV_MSQ 0xea672391 #define MAGIC_SYSV_SEM 0x896e8a0b #define MAGIC_SYSV_SHM 0x76119ab0 #define MAGIC_PIPE 0xdc6c9919 #define MAGIC_POSIX_SEM 0x78ae980c #define MAGIC_PROC 0x3b4be98f #define MAGIC_CRED 0x9a5a4987 #define MAGIC_VNODE 0x1a67a45c #define MAGIC_FREE 0x849ba1fd #define SLOT(x) mac_label_get((x), test_slot) #define SLOT_SET(x, v) mac_label_set((x), test_slot, (v)) static int test_slot; SYSCTL_INT(_security_mac_test, OID_AUTO, slot, CTLFLAG_RD, &test_slot, 0, "Slot allocated by framework"); SYSCTL_NODE(_security_mac_test, OID_AUTO, counter, CTLFLAG_RW, 0, "TrustedBSD mac_test counters controls"); #define COUNTER_DECL(variable) \ static int counter_##variable; \ SYSCTL_INT(_security_mac_test_counter, OID_AUTO, variable, \ CTLFLAG_RD, &counter_##variable, 0, #variable) #define COUNTER_INC(variable) atomic_add_int(&counter_##variable, 1) #ifdef KDB -#define DEBUGGER(func, string) kdb_enter((string)) +#define DEBUGGER(func, string) kdb_enter_why(KDB_WHY_MAC, (string)) #else #define DEBUGGER(func, string) printf("mac_test: %s: %s\n", (func), (string)) #endif #define LABEL_CHECK(label, magic) do { \ if (label != NULL) { \ KASSERT(SLOT(label) == magic || SLOT(label) == 0, \ ("%s: bad %s label", __func__, #magic)); \ } \ } while (0) #define LABEL_DESTROY(label, magic) do { \ if (SLOT(label) == magic || SLOT(label) == 0) { \ SLOT_SET(label, MAGIC_FREE); \ } else if (SLOT(label) == MAGIC_FREE) { \ DEBUGGER("%s: dup destroy", __func__); \ } else { \ DEBUGGER("%s: corrupted label", __func__); \ } \ } while (0) #define LABEL_INIT(label, magic) do { \ SLOT_SET(label, magic); \ } while (0) #define LABEL_NOTFREE(label) do { \ KASSERT(SLOT(label) != MAGIC_FREE, \ ("%s: destroyed label", __func__)); \ } while (0) /* * Label operations. */ COUNTER_DECL(init_bpfdesc_label); static void mac_test_init_bpfdesc_label(struct label *label) { LABEL_INIT(label, MAGIC_BPF); COUNTER_INC(init_bpfdesc_label); } COUNTER_DECL(init_cred_label); static void mac_test_init_cred_label(struct label *label) { LABEL_INIT(label, MAGIC_CRED); COUNTER_INC(init_cred_label); } COUNTER_DECL(init_devfs_label); static void mac_test_init_devfs_label(struct label *label) { LABEL_INIT(label, MAGIC_DEVFS); COUNTER_INC(init_devfs_label); } COUNTER_DECL(init_ifnet_label); static void mac_test_init_ifnet_label(struct label *label) { LABEL_INIT(label, MAGIC_IFNET); COUNTER_INC(init_ifnet_label); } COUNTER_DECL(init_inpcb_label); static int mac_test_init_inpcb_label(struct label *label, int flag) { if (flag & M_WAITOK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_test_init_inpcb_label() at %s:%d", __FILE__, __LINE__); LABEL_INIT(label, MAGIC_INPCB); COUNTER_INC(init_inpcb_label); return (0); } COUNTER_DECL(init_sysv_msg_label); static void mac_test_init_sysv_msgmsg_label(struct label *label) { LABEL_INIT(label, MAGIC_SYSV_MSG); COUNTER_INC(init_sysv_msg_label); } COUNTER_DECL(init_sysv_msq_label); static void mac_test_init_sysv_msgqueue_label(struct label *label) { LABEL_INIT(label, MAGIC_SYSV_MSQ); COUNTER_INC(init_sysv_msq_label); } COUNTER_DECL(init_sysv_sem_label); static void mac_test_init_sysv_sem_label(struct label *label) { LABEL_INIT(label, MAGIC_SYSV_SEM); COUNTER_INC(init_sysv_sem_label); } COUNTER_DECL(init_sysv_shm_label); static void mac_test_init_sysv_shm_label(struct label *label) { LABEL_INIT(label, MAGIC_SYSV_SHM); COUNTER_INC(init_sysv_shm_label); } COUNTER_DECL(init_ipq_label); static int mac_test_init_ipq_label(struct label *label, int flag) { if (flag & M_WAITOK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_test_init_ipq_label() at %s:%d", __FILE__, __LINE__); LABEL_INIT(label, MAGIC_IPQ); COUNTER_INC(init_ipq_label); return (0); } COUNTER_DECL(init_mbuf_label); static int mac_test_init_mbuf_label(struct label *label, int flag) { if (flag & M_WAITOK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_test_init_mbuf_label() at %s:%d", __FILE__, __LINE__); LABEL_INIT(label, MAGIC_MBUF); COUNTER_INC(init_mbuf_label); return (0); } COUNTER_DECL(init_mount_label); static void mac_test_init_mount_label(struct label *label) { LABEL_INIT(label, MAGIC_MOUNT); COUNTER_INC(init_mount_label); } COUNTER_DECL(init_socket_label); static int mac_test_init_socket_label(struct label *label, int flag) { if (flag & M_WAITOK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_test_init_socket_label() at %s:%d", __FILE__, __LINE__); LABEL_INIT(label, MAGIC_SOCKET); COUNTER_INC(init_socket_label); return (0); } COUNTER_DECL(init_socket_peer_label); static int mac_test_init_socket_peer_label(struct label *label, int flag) { if (flag & M_WAITOK) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "mac_test_init_socket_peer_label() at %s:%d", __FILE__, __LINE__); LABEL_INIT(label, MAGIC_SOCKET); COUNTER_INC(init_socket_peer_label); return (0); } COUNTER_DECL(init_pipe_label); static void mac_test_init_pipe_label(struct label *label) { LABEL_INIT(label, MAGIC_PIPE); COUNTER_INC(init_pipe_label); } COUNTER_DECL(init_posix_sem_label); static void mac_test_init_posix_sem_label(struct label *label) { LABEL_INIT(label, MAGIC_POSIX_SEM); COUNTER_INC(init_posix_sem_label); } COUNTER_DECL(init_proc_label); static void mac_test_init_proc_label(struct label *label) { LABEL_INIT(label, MAGIC_PROC); COUNTER_INC(init_proc_label); } COUNTER_DECL(init_vnode_label); static void mac_test_init_vnode_label(struct label *label) { LABEL_INIT(label, MAGIC_VNODE); COUNTER_INC(init_vnode_label); } COUNTER_DECL(destroy_bpfdesc_label); static void mac_test_destroy_bpfdesc_label(struct label *label) { LABEL_DESTROY(label, MAGIC_BPF); COUNTER_INC(destroy_bpfdesc_label); } COUNTER_DECL(destroy_cred_label); static void mac_test_destroy_cred_label(struct label *label) { LABEL_DESTROY(label, MAGIC_CRED); COUNTER_INC(destroy_cred_label); } COUNTER_DECL(destroy_devfs_label); static void mac_test_destroy_devfs_label(struct label *label) { LABEL_DESTROY(label, MAGIC_DEVFS); COUNTER_INC(destroy_devfs_label); } COUNTER_DECL(destroy_ifnet_label); static void mac_test_destroy_ifnet_label(struct label *label) { LABEL_DESTROY(label, MAGIC_IFNET); COUNTER_INC(destroy_ifnet_label); } COUNTER_DECL(destroy_inpcb_label); static void mac_test_destroy_inpcb_label(struct label *label) { LABEL_DESTROY(label, MAGIC_INPCB); COUNTER_INC(destroy_inpcb_label); } COUNTER_DECL(destroy_sysv_msg_label); static void mac_test_destroy_sysv_msgmsg_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SYSV_MSG); COUNTER_INC(destroy_sysv_msg_label); } COUNTER_DECL(destroy_sysv_msq_label); static void mac_test_destroy_sysv_msgqueue_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SYSV_MSQ); COUNTER_INC(destroy_sysv_msq_label); } COUNTER_DECL(destroy_sysv_sem_label); static void mac_test_destroy_sysv_sem_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SYSV_SEM); COUNTER_INC(destroy_sysv_sem_label); } COUNTER_DECL(destroy_sysv_shm_label); static void mac_test_destroy_sysv_shm_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SYSV_SHM); COUNTER_INC(destroy_sysv_shm_label); } COUNTER_DECL(destroy_ipq_label); static void mac_test_destroy_ipq_label(struct label *label) { LABEL_DESTROY(label, MAGIC_IPQ); COUNTER_INC(destroy_ipq_label); } COUNTER_DECL(destroy_mbuf_label); static void mac_test_destroy_mbuf_label(struct label *label) { /* * If we're loaded dynamically, there may be mbufs in flight that * didn't have label storage allocated for them. Handle this * gracefully. */ if (label == NULL) return; LABEL_DESTROY(label, MAGIC_MBUF); COUNTER_INC(destroy_mbuf_label); } COUNTER_DECL(destroy_mount_label); static void mac_test_destroy_mount_label(struct label *label) { LABEL_DESTROY(label, MAGIC_MOUNT); COUNTER_INC(destroy_mount_label); } COUNTER_DECL(destroy_socket_label); static void mac_test_destroy_socket_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SOCKET); COUNTER_INC(destroy_socket_label); } COUNTER_DECL(destroy_socket_peer_label); static void mac_test_destroy_socket_peer_label(struct label *label) { LABEL_DESTROY(label, MAGIC_SOCKET); COUNTER_INC(destroy_socket_peer_label); } COUNTER_DECL(destroy_pipe_label); static void mac_test_destroy_pipe_label(struct label *label) { LABEL_DESTROY(label, MAGIC_PIPE); COUNTER_INC(destroy_pipe_label); } COUNTER_DECL(destroy_posix_sem_label); static void mac_test_destroy_posix_sem_label(struct label *label) { LABEL_DESTROY(label, MAGIC_POSIX_SEM); COUNTER_INC(destroy_posix_sem_label); } COUNTER_DECL(destroy_proc_label); static void mac_test_destroy_proc_label(struct label *label) { LABEL_DESTROY(label, MAGIC_PROC); COUNTER_INC(destroy_proc_label); } COUNTER_DECL(destroy_vnode_label); static void mac_test_destroy_vnode_label(struct label *label) { LABEL_DESTROY(label, MAGIC_VNODE); COUNTER_INC(destroy_vnode_label); } COUNTER_DECL(copy_cred_label); static void mac_test_copy_cred_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_CRED); LABEL_CHECK(dest, MAGIC_CRED); COUNTER_INC(copy_cred_label); } COUNTER_DECL(copy_ifnet_label); static void mac_test_copy_ifnet_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_IFNET); LABEL_CHECK(dest, MAGIC_IFNET); COUNTER_INC(copy_ifnet_label); } COUNTER_DECL(copy_mbuf_label); static void mac_test_copy_mbuf_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_MBUF); LABEL_CHECK(dest, MAGIC_MBUF); COUNTER_INC(copy_mbuf_label); } COUNTER_DECL(copy_pipe_label); static void mac_test_copy_pipe_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_PIPE); LABEL_CHECK(dest, MAGIC_PIPE); COUNTER_INC(copy_pipe_label); } COUNTER_DECL(copy_socket_label); static void mac_test_copy_socket_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_SOCKET); LABEL_CHECK(dest, MAGIC_SOCKET); COUNTER_INC(copy_socket_label); } COUNTER_DECL(copy_vnode_label); static void mac_test_copy_vnode_label(struct label *src, struct label *dest) { LABEL_CHECK(src, MAGIC_VNODE); LABEL_CHECK(dest, MAGIC_VNODE); COUNTER_INC(copy_vnode_label); } COUNTER_DECL(externalize_label); static int mac_test_externalize_label(struct label *label, char *element_name, struct sbuf *sb, int *claimed) { LABEL_NOTFREE(label); COUNTER_INC(externalize_label); return (0); } COUNTER_DECL(internalize_label); static int mac_test_internalize_label(struct label *label, char *element_name, char *element_data, int *claimed) { LABEL_NOTFREE(label); COUNTER_INC(internalize_label); return (0); } /* * Labeling event operations: file system objects, and things that look * a lot like file system objects. */ COUNTER_DECL(associate_vnode_devfs); static void mac_test_associate_vnode_devfs(struct mount *mp, struct label *mplabel, struct devfs_dirent *de, struct label *delabel, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(mplabel, MAGIC_MOUNT); LABEL_CHECK(delabel, MAGIC_DEVFS); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(associate_vnode_devfs); } COUNTER_DECL(associate_vnode_extattr); static int mac_test_associate_vnode_extattr(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(mplabel, MAGIC_MOUNT); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(associate_vnode_extattr); return (0); } COUNTER_DECL(associate_vnode_singlelabel); static void mac_test_associate_vnode_singlelabel(struct mount *mp, struct label *mplabel, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(mplabel, MAGIC_MOUNT); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(associate_vnode_singlelabel); } COUNTER_DECL(create_devfs_device); static void mac_test_create_devfs_device(struct ucred *cred, struct mount *mp, struct cdev *dev, struct devfs_dirent *de, struct label *delabel) { if (cred != NULL) LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(delabel, MAGIC_DEVFS); COUNTER_INC(create_devfs_device); } COUNTER_DECL(create_devfs_directory); static void mac_test_create_devfs_directory(struct mount *mp, char *dirname, int dirnamelen, struct devfs_dirent *de, struct label *delabel) { LABEL_CHECK(delabel, MAGIC_DEVFS); COUNTER_INC(create_devfs_directory); } COUNTER_DECL(create_devfs_symlink); static void mac_test_create_devfs_symlink(struct ucred *cred, struct mount *mp, struct devfs_dirent *dd, struct label *ddlabel, struct devfs_dirent *de, struct label *delabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(ddlabel, MAGIC_DEVFS); LABEL_CHECK(delabel, MAGIC_DEVFS); COUNTER_INC(create_devfs_symlink); } COUNTER_DECL(create_vnode_extattr); static int mac_test_create_vnode_extattr(struct ucred *cred, struct mount *mp, struct label *mplabel, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(mplabel, MAGIC_MOUNT); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(create_vnode_extattr); return (0); } COUNTER_DECL(create_mount); static void mac_test_create_mount(struct ucred *cred, struct mount *mp, struct label *mplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(mplabel, MAGIC_MOUNT); COUNTER_INC(create_mount); } COUNTER_DECL(relabel_vnode); static void mac_test_relabel_vnode(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *label) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); LABEL_CHECK(label, MAGIC_VNODE); COUNTER_INC(relabel_vnode); } COUNTER_DECL(setlabel_vnode_extattr); static int mac_test_setlabel_vnode_extattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *intlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); LABEL_CHECK(intlabel, MAGIC_VNODE); COUNTER_INC(setlabel_vnode_extattr); return (0); } COUNTER_DECL(update_devfs); static void mac_test_update_devfs(struct mount *mp, struct devfs_dirent *devfs_dirent, struct label *direntlabel, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(direntlabel, MAGIC_DEVFS); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(update_devfs); } /* * Labeling event operations: IPC object. */ COUNTER_DECL(create_mbuf_from_socket); static void mac_test_create_mbuf_from_socket(struct socket *so, struct label *socketlabel, struct mbuf *m, struct label *mbuflabel) { LABEL_CHECK(socketlabel, MAGIC_SOCKET); LABEL_CHECK(mbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_from_socket); } COUNTER_DECL(create_socket); static void mac_test_create_socket(struct ucred *cred, struct socket *socket, struct label *socketlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(socketlabel, MAGIC_SOCKET); COUNTER_INC(create_socket); } COUNTER_DECL(create_pipe); static void mac_test_create_pipe(struct ucred *cred, struct pipepair *pp, struct label *pipelabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(create_pipe); } COUNTER_DECL(create_posix_sem); static void mac_test_create_posix_sem(struct ucred *cred, struct ksem *ks, struct label *kslabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(kslabel, MAGIC_POSIX_SEM); COUNTER_INC(create_posix_sem); } COUNTER_DECL(create_socket_from_socket); static void mac_test_create_socket_from_socket(struct socket *oldsocket, struct label *oldsocketlabel, struct socket *newsocket, struct label *newsocketlabel) { LABEL_CHECK(oldsocketlabel, MAGIC_SOCKET); LABEL_CHECK(newsocketlabel, MAGIC_SOCKET); COUNTER_INC(create_socket_from_socket); } COUNTER_DECL(relabel_socket); static void mac_test_relabel_socket(struct ucred *cred, struct socket *socket, struct label *socketlabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(newlabel, MAGIC_SOCKET); COUNTER_INC(relabel_socket); } COUNTER_DECL(relabel_pipe); static void mac_test_relabel_pipe(struct ucred *cred, struct pipepair *pp, struct label *pipelabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); LABEL_CHECK(newlabel, MAGIC_PIPE); COUNTER_INC(relabel_pipe); } COUNTER_DECL(set_socket_peer_from_mbuf); static void mac_test_set_socket_peer_from_mbuf(struct mbuf *mbuf, struct label *mbuflabel, struct socket *socket, struct label *socketpeerlabel) { LABEL_CHECK(mbuflabel, MAGIC_MBUF); LABEL_CHECK(socketpeerlabel, MAGIC_SOCKET); COUNTER_INC(set_socket_peer_from_mbuf); } /* * Labeling event operations: network objects. */ COUNTER_DECL(set_socket_peer_from_socket); static void mac_test_set_socket_peer_from_socket(struct socket *oldsocket, struct label *oldsocketlabel, struct socket *newsocket, struct label *newsocketpeerlabel) { LABEL_CHECK(oldsocketlabel, MAGIC_SOCKET); LABEL_CHECK(newsocketpeerlabel, MAGIC_SOCKET); COUNTER_INC(set_socket_peer_from_socket); } COUNTER_DECL(create_bpfdesc); static void mac_test_create_bpfdesc(struct ucred *cred, struct bpf_d *bpf_d, struct label *bpflabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(bpflabel, MAGIC_BPF); COUNTER_INC(create_bpfdesc); } COUNTER_DECL(create_datagram_from_ipq); static void mac_test_create_datagram_from_ipq(struct ipq *ipq, struct label *ipqlabel, struct mbuf *datagram, struct label *datagramlabel) { LABEL_CHECK(ipqlabel, MAGIC_IPQ); LABEL_CHECK(datagramlabel, MAGIC_MBUF); COUNTER_INC(create_datagram_from_ipq); } COUNTER_DECL(create_fragment); static void mac_test_create_fragment(struct mbuf *datagram, struct label *datagramlabel, struct mbuf *fragment, struct label *fragmentlabel) { LABEL_CHECK(datagramlabel, MAGIC_MBUF); LABEL_CHECK(fragmentlabel, MAGIC_MBUF); COUNTER_INC(create_fragment); } COUNTER_DECL(create_ifnet); static void mac_test_create_ifnet(struct ifnet *ifnet, struct label *ifnetlabel) { LABEL_CHECK(ifnetlabel, MAGIC_IFNET); COUNTER_INC(create_ifnet); } COUNTER_DECL(create_inpcb_from_socket); static void mac_test_create_inpcb_from_socket(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { LABEL_CHECK(solabel, MAGIC_SOCKET); LABEL_CHECK(inplabel, MAGIC_INPCB); COUNTER_INC(create_inpcb_from_socket); } COUNTER_DECL(create_sysv_msgmsg); static void mac_test_create_sysv_msgmsg(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqlabel, struct msg *msgptr, struct label *msglabel) { LABEL_CHECK(msglabel, MAGIC_SYSV_MSG); LABEL_CHECK(msqlabel, MAGIC_SYSV_MSQ); COUNTER_INC(create_sysv_msgmsg); } COUNTER_DECL(create_sysv_msgqueue); static void mac_test_create_sysv_msgqueue(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqlabel) { LABEL_CHECK(msqlabel, MAGIC_SYSV_MSQ); COUNTER_INC(create_sysv_msgqueue); } COUNTER_DECL(create_sysv_sem); static void mac_test_create_sysv_sem(struct ucred *cred, struct semid_kernel *semakptr, struct label *semalabel) { LABEL_CHECK(semalabel, MAGIC_SYSV_SEM); COUNTER_INC(create_sysv_sem); } COUNTER_DECL(create_sysv_shm); static void mac_test_create_sysv_shm(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmlabel) { LABEL_CHECK(shmlabel, MAGIC_SYSV_SHM); COUNTER_INC(create_sysv_shm); } COUNTER_DECL(create_ipq); static void mac_test_create_ipq(struct mbuf *fragment, struct label *fragmentlabel, struct ipq *ipq, struct label *ipqlabel) { LABEL_CHECK(fragmentlabel, MAGIC_MBUF); LABEL_CHECK(ipqlabel, MAGIC_IPQ); COUNTER_INC(create_ipq); } COUNTER_DECL(create_mbuf_from_inpcb); static void mac_test_create_mbuf_from_inpcb(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { LABEL_CHECK(inplabel, MAGIC_INPCB); LABEL_CHECK(mlabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_from_inpcb); } COUNTER_DECL(create_mbuf_linklayer); static void mac_test_create_mbuf_linklayer(struct ifnet *ifnet, struct label *ifnetlabel, struct mbuf *mbuf, struct label *mbuflabel) { LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(mbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_linklayer); } COUNTER_DECL(create_mbuf_from_bpfdesc); static void mac_test_create_mbuf_from_bpfdesc(struct bpf_d *bpf_d, struct label *bpflabel, struct mbuf *mbuf, struct label *mbuflabel) { LABEL_CHECK(bpflabel, MAGIC_BPF); LABEL_CHECK(mbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_from_bpfdesc); } COUNTER_DECL(create_mbuf_from_ifnet); static void mac_test_create_mbuf_from_ifnet(struct ifnet *ifnet, struct label *ifnetlabel, struct mbuf *m, struct label *mbuflabel) { LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(mbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_from_ifnet); } COUNTER_DECL(create_mbuf_multicast_encap); static void mac_test_create_mbuf_multicast_encap(struct mbuf *oldmbuf, struct label *oldmbuflabel, struct ifnet *ifnet, struct label *ifnetlabel, struct mbuf *newmbuf, struct label *newmbuflabel) { LABEL_CHECK(oldmbuflabel, MAGIC_MBUF); LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(newmbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_multicast_encap); } COUNTER_DECL(create_mbuf_netlayer); static void mac_test_create_mbuf_netlayer(struct mbuf *oldmbuf, struct label *oldmbuflabel, struct mbuf *newmbuf, struct label *newmbuflabel) { LABEL_CHECK(oldmbuflabel, MAGIC_MBUF); LABEL_CHECK(newmbuflabel, MAGIC_MBUF); COUNTER_INC(create_mbuf_netlayer); } COUNTER_DECL(fragment_match); static int mac_test_fragment_match(struct mbuf *fragment, struct label *fragmentlabel, struct ipq *ipq, struct label *ipqlabel) { LABEL_CHECK(fragmentlabel, MAGIC_MBUF); LABEL_CHECK(ipqlabel, MAGIC_IPQ); COUNTER_INC(fragment_match); return (1); } COUNTER_DECL(reflect_mbuf_icmp); static void mac_test_reflect_mbuf_icmp(struct mbuf *m, struct label *mlabel) { LABEL_CHECK(mlabel, MAGIC_MBUF); COUNTER_INC(reflect_mbuf_icmp); } COUNTER_DECL(reflect_mbuf_tcp); static void mac_test_reflect_mbuf_tcp(struct mbuf *m, struct label *mlabel) { LABEL_CHECK(mlabel, MAGIC_MBUF); COUNTER_INC(reflect_mbuf_tcp); } COUNTER_DECL(relabel_ifnet); static void mac_test_relabel_ifnet(struct ucred *cred, struct ifnet *ifnet, struct label *ifnetlabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(newlabel, MAGIC_IFNET); COUNTER_INC(relabel_ifnet); } COUNTER_DECL(update_ipq); static void mac_test_update_ipq(struct mbuf *fragment, struct label *fragmentlabel, struct ipq *ipq, struct label *ipqlabel) { LABEL_CHECK(fragmentlabel, MAGIC_MBUF); LABEL_CHECK(ipqlabel, MAGIC_IPQ); COUNTER_INC(update_ipq); } COUNTER_DECL(inpcb_sosetlabel); static void mac_test_inpcb_sosetlabel(struct socket *so, struct label *solabel, struct inpcb *inp, struct label *inplabel) { LABEL_CHECK(solabel, MAGIC_SOCKET); LABEL_CHECK(inplabel, MAGIC_INPCB); COUNTER_INC(inpcb_sosetlabel); } /* * Labeling event operations: processes. */ COUNTER_DECL(execve_transition); static void mac_test_execve_transition(struct ucred *old, struct ucred *new, struct vnode *vp, struct label *filelabel, struct label *interpvplabel, struct image_params *imgp, struct label *execlabel) { LABEL_CHECK(old->cr_label, MAGIC_CRED); LABEL_CHECK(new->cr_label, MAGIC_CRED); LABEL_CHECK(filelabel, MAGIC_VNODE); LABEL_CHECK(interpvplabel, MAGIC_VNODE); LABEL_CHECK(execlabel, MAGIC_CRED); COUNTER_INC(execve_transition); } COUNTER_DECL(execve_will_transition); static int mac_test_execve_will_transition(struct ucred *old, struct vnode *vp, struct label *filelabel, struct label *interpvplabel, struct image_params *imgp, struct label *execlabel) { LABEL_CHECK(old->cr_label, MAGIC_CRED); LABEL_CHECK(filelabel, MAGIC_VNODE); LABEL_CHECK(interpvplabel, MAGIC_VNODE); LABEL_CHECK(execlabel, MAGIC_CRED); COUNTER_INC(execve_will_transition); return (0); } COUNTER_DECL(create_proc0); static void mac_test_create_proc0(struct ucred *cred) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(create_proc0); } COUNTER_DECL(create_proc1); static void mac_test_create_proc1(struct ucred *cred) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(create_proc1); } COUNTER_DECL(relabel_cred); static void mac_test_relabel_cred(struct ucred *cred, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(newlabel, MAGIC_CRED); COUNTER_INC(relabel_cred); } COUNTER_DECL(thread_userret); static void mac_test_thread_userret(struct thread *td) { COUNTER_INC(thread_userret); } /* * Label cleanup/flush operations */ COUNTER_DECL(cleanup_sysv_msgmsg); static void mac_test_cleanup_sysv_msgmsg(struct label *msglabel) { LABEL_CHECK(msglabel, MAGIC_SYSV_MSG); COUNTER_INC(cleanup_sysv_msgmsg); } COUNTER_DECL(cleanup_sysv_msgqueue); static void mac_test_cleanup_sysv_msgqueue(struct label *msqlabel) { LABEL_CHECK(msqlabel, MAGIC_SYSV_MSQ); COUNTER_INC(cleanup_sysv_msgqueue); } COUNTER_DECL(cleanup_sysv_sem); static void mac_test_cleanup_sysv_sem(struct label *semalabel) { LABEL_CHECK(semalabel, MAGIC_SYSV_SEM); COUNTER_INC(cleanup_sysv_sem); } COUNTER_DECL(cleanup_sysv_shm); static void mac_test_cleanup_sysv_shm(struct label *shmlabel) { LABEL_CHECK(shmlabel, MAGIC_SYSV_SHM); COUNTER_INC(cleanup_sysv_shm); } /* * Access control checks. */ COUNTER_DECL(check_bpfdesc_receive); static int mac_test_check_bpfdesc_receive(struct bpf_d *bpf_d, struct label *bpflabel, struct ifnet *ifnet, struct label *ifnetlabel) { LABEL_CHECK(bpflabel, MAGIC_BPF); LABEL_CHECK(ifnetlabel, MAGIC_IFNET); COUNTER_INC(check_bpfdesc_receive); return (0); } COUNTER_DECL(check_cred_relabel); static int mac_test_check_cred_relabel(struct ucred *cred, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(newlabel, MAGIC_CRED); COUNTER_INC(check_cred_relabel); return (0); } COUNTER_DECL(check_cred_visible); static int mac_test_check_cred_visible(struct ucred *u1, struct ucred *u2) { LABEL_CHECK(u1->cr_label, MAGIC_CRED); LABEL_CHECK(u2->cr_label, MAGIC_CRED); COUNTER_INC(check_cred_visible); return (0); } COUNTER_DECL(check_ifnet_relabel); static int mac_test_check_ifnet_relabel(struct ucred *cred, struct ifnet *ifnet, struct label *ifnetlabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(newlabel, MAGIC_IFNET); COUNTER_INC(check_ifnet_relabel); return (0); } COUNTER_DECL(check_ifnet_transmit); static int mac_test_check_ifnet_transmit(struct ifnet *ifnet, struct label *ifnetlabel, struct mbuf *m, struct label *mbuflabel) { LABEL_CHECK(ifnetlabel, MAGIC_IFNET); LABEL_CHECK(mbuflabel, MAGIC_MBUF); COUNTER_INC(check_ifnet_transmit); return (0); } COUNTER_DECL(check_inpcb_deliver); static int mac_test_check_inpcb_deliver(struct inpcb *inp, struct label *inplabel, struct mbuf *m, struct label *mlabel) { LABEL_CHECK(inplabel, MAGIC_INPCB); LABEL_CHECK(mlabel, MAGIC_MBUF); COUNTER_INC(check_inpcb_deliver); return (0); } COUNTER_DECL(check_sysv_msgmsq); static int mac_test_check_sysv_msgmsq(struct ucred *cred, struct msg *msgptr, struct label *msglabel, struct msqid_kernel *msqkptr, struct label *msqklabel) { LABEL_CHECK(msqklabel, MAGIC_SYSV_MSQ); LABEL_CHECK(msglabel, MAGIC_SYSV_MSG); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msgmsq); return (0); } COUNTER_DECL(check_sysv_msgrcv); static int mac_test_check_sysv_msgrcv(struct ucred *cred, struct msg *msgptr, struct label *msglabel) { LABEL_CHECK(msglabel, MAGIC_SYSV_MSG); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msgrcv); return (0); } COUNTER_DECL(check_sysv_msgrmid); static int mac_test_check_sysv_msgrmid(struct ucred *cred, struct msg *msgptr, struct label *msglabel) { LABEL_CHECK(msglabel, MAGIC_SYSV_MSG); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msgrmid); return (0); } COUNTER_DECL(check_sysv_msqget); static int mac_test_check_sysv_msqget(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { LABEL_CHECK(msqklabel, MAGIC_SYSV_MSQ); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msqget); return (0); } COUNTER_DECL(check_sysv_msqsnd); static int mac_test_check_sysv_msqsnd(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { LABEL_CHECK(msqklabel, MAGIC_SYSV_MSQ); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msqsnd); return (0); } COUNTER_DECL(check_sysv_msqrcv); static int mac_test_check_sysv_msqrcv(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel) { LABEL_CHECK(msqklabel, MAGIC_SYSV_MSQ); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msqrcv); return (0); } COUNTER_DECL(check_sysv_msqctl); static int mac_test_check_sysv_msqctl(struct ucred *cred, struct msqid_kernel *msqkptr, struct label *msqklabel, int cmd) { LABEL_CHECK(msqklabel, MAGIC_SYSV_MSQ); LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_sysv_msqctl); return (0); } COUNTER_DECL(check_sysv_semctl); static int mac_test_check_sysv_semctl(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel, int cmd) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(semaklabel, MAGIC_SYSV_SEM); COUNTER_INC(check_sysv_semctl); return (0); } COUNTER_DECL(check_sysv_semget); static int mac_test_check_sysv_semget(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(semaklabel, MAGIC_SYSV_SEM); COUNTER_INC(check_sysv_semget); return (0); } COUNTER_DECL(check_sysv_semop); static int mac_test_check_sysv_semop(struct ucred *cred, struct semid_kernel *semakptr, struct label *semaklabel, size_t accesstype) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(semaklabel, MAGIC_SYSV_SEM); COUNTER_INC(check_sysv_semop); return (0); } COUNTER_DECL(check_sysv_shmat); static int mac_test_check_sysv_shmat(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int shmflg) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(shmseglabel, MAGIC_SYSV_SHM); COUNTER_INC(check_sysv_shmat); return (0); } COUNTER_DECL(check_sysv_shmctl); static int mac_test_check_sysv_shmctl(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int cmd) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(shmseglabel, MAGIC_SYSV_SHM); COUNTER_INC(check_sysv_shmctl); return (0); } COUNTER_DECL(check_sysv_shmdt); static int mac_test_check_sysv_shmdt(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(shmseglabel, MAGIC_SYSV_SHM); COUNTER_INC(check_sysv_shmdt); return (0); } COUNTER_DECL(check_sysv_shmget); static int mac_test_check_sysv_shmget(struct ucred *cred, struct shmid_kernel *shmsegptr, struct label *shmseglabel, int shmflg) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(shmseglabel, MAGIC_SYSV_SHM); COUNTER_INC(check_sysv_shmget); return (0); } COUNTER_DECL(check_kenv_dump); static int mac_test_check_kenv_dump(struct ucred *cred) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_kenv_dump); return (0); } COUNTER_DECL(check_kenv_get); static int mac_test_check_kenv_get(struct ucred *cred, char *name) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_kenv_get); return (0); } COUNTER_DECL(check_kenv_set); static int mac_test_check_kenv_set(struct ucred *cred, char *name, char *value) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_kenv_set); return (0); } COUNTER_DECL(check_kenv_unset); static int mac_test_check_kenv_unset(struct ucred *cred, char *name) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_kenv_unset); return (0); } COUNTER_DECL(check_kld_load); static int mac_test_check_kld_load(struct ucred *cred, struct vnode *vp, struct label *label) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(label, MAGIC_VNODE); COUNTER_INC(check_kld_load); return (0); } COUNTER_DECL(check_kld_stat); static int mac_test_check_kld_stat(struct ucred *cred) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_kld_stat); return (0); } COUNTER_DECL(check_mount_stat); static int mac_test_check_mount_stat(struct ucred *cred, struct mount *mp, struct label *mplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(mplabel, MAGIC_MOUNT); COUNTER_INC(check_mount_stat); return (0); } COUNTER_DECL(check_pipe_ioctl); static int mac_test_check_pipe_ioctl(struct ucred *cred, struct pipepair *pp, struct label *pipelabel, unsigned long cmd, void /* caddr_t */ *data) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(check_pipe_ioctl); return (0); } COUNTER_DECL(check_pipe_poll); static int mac_test_check_pipe_poll(struct ucred *cred, struct pipepair *pp, struct label *pipelabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(check_pipe_poll); return (0); } COUNTER_DECL(check_pipe_read); static int mac_test_check_pipe_read(struct ucred *cred, struct pipepair *pp, struct label *pipelabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(check_pipe_read); return (0); } COUNTER_DECL(check_pipe_relabel); static int mac_test_check_pipe_relabel(struct ucred *cred, struct pipepair *pp, struct label *pipelabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); LABEL_CHECK(newlabel, MAGIC_PIPE); COUNTER_INC(check_pipe_relabel); return (0); } COUNTER_DECL(check_pipe_stat); static int mac_test_check_pipe_stat(struct ucred *cred, struct pipepair *pp, struct label *pipelabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(check_pipe_stat); return (0); } COUNTER_DECL(check_pipe_write); static int mac_test_check_pipe_write(struct ucred *cred, struct pipepair *pp, struct label *pipelabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(pipelabel, MAGIC_PIPE); COUNTER_INC(check_pipe_write); return (0); } COUNTER_DECL(check_posix_sem); static int mac_test_check_posix_sem(struct ucred *cred, struct ksem *ks, struct label *kslabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(kslabel, MAGIC_POSIX_SEM); COUNTER_INC(check_posix_sem); return (0); } COUNTER_DECL(check_proc_debug); static int mac_test_check_proc_debug(struct ucred *cred, struct proc *p) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(p->p_ucred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_debug); return (0); } COUNTER_DECL(check_proc_sched); static int mac_test_check_proc_sched(struct ucred *cred, struct proc *p) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(p->p_ucred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_sched); return (0); } COUNTER_DECL(check_proc_signal); static int mac_test_check_proc_signal(struct ucred *cred, struct proc *p, int signum) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(p->p_ucred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_signal); return (0); } COUNTER_DECL(check_proc_setaudit); static int mac_test_check_proc_setaudit(struct ucred *cred, struct auditinfo *ai) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setaudit); return (0); } COUNTER_DECL(check_proc_setaudit_addr); static int mac_test_check_proc_setaudit_addr(struct ucred *cred, struct auditinfo_addr *aia) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setaudit_addr); return (0); } COUNTER_DECL(check_proc_setauid); static int mac_test_check_proc_setauid(struct ucred *cred, uid_t auid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setauid); return (0); } COUNTER_DECL(check_proc_setuid); static int mac_test_check_proc_setuid(struct ucred *cred, uid_t uid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setuid); return (0); } COUNTER_DECL(check_proc_euid); static int mac_test_check_proc_seteuid(struct ucred *cred, uid_t euid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_euid); return (0); } COUNTER_DECL(check_proc_setgid); static int mac_test_check_proc_setgid(struct ucred *cred, gid_t gid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setgid); return (0); } COUNTER_DECL(check_proc_setegid); static int mac_test_check_proc_setegid(struct ucred *cred, gid_t egid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setegid); return (0); } COUNTER_DECL(check_proc_setgroups); static int mac_test_check_proc_setgroups(struct ucred *cred, int ngroups, gid_t *gidset) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setgroups); return (0); } COUNTER_DECL(check_proc_setreuid); static int mac_test_check_proc_setreuid(struct ucred *cred, uid_t ruid, uid_t euid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setreuid); return (0); } COUNTER_DECL(check_proc_setregid); static int mac_test_check_proc_setregid(struct ucred *cred, gid_t rgid, gid_t egid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setregid); return (0); } COUNTER_DECL(check_proc_setresuid); static int mac_test_check_proc_setresuid(struct ucred *cred, uid_t ruid, uid_t euid, uid_t suid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setresuid); return (0); } COUNTER_DECL(check_proc_setresgid); static int mac_test_check_proc_setresgid(struct ucred *cred, gid_t rgid, gid_t egid, gid_t sgid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_setresgid); return (0); } COUNTER_DECL(check_proc_wait); static int mac_test_check_proc_wait(struct ucred *cred, struct proc *p) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(p->p_ucred->cr_label, MAGIC_CRED); COUNTER_INC(check_proc_wait); return (0); } COUNTER_DECL(check_socket_accept); static int mac_test_check_socket_accept(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_accept); return (0); } COUNTER_DECL(check_socket_bind); static int mac_test_check_socket_bind(struct ucred *cred, struct socket *so, struct label *solabel, struct sockaddr *sa) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_bind); return (0); } COUNTER_DECL(check_socket_connect); static int mac_test_check_socket_connect(struct ucred *cred, struct socket *so, struct label *solabel, struct sockaddr *sa) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_connect); return (0); } COUNTER_DECL(check_socket_deliver); static int mac_test_check_socket_deliver(struct socket *so, struct label *solabel, struct mbuf *m, struct label *mlabel) { LABEL_CHECK(solabel, MAGIC_SOCKET); LABEL_CHECK(mlabel, MAGIC_MBUF); COUNTER_INC(check_socket_deliver); return (0); } COUNTER_DECL(check_socket_listen); static int mac_test_check_socket_listen(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_listen); return (0); } COUNTER_DECL(check_socket_poll); static int mac_test_check_socket_poll(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_poll); return (0); } COUNTER_DECL(check_socket_receive); static int mac_test_check_socket_receive(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_receive); return (0); } COUNTER_DECL(check_socket_relabel); static int mac_test_check_socket_relabel(struct ucred *cred, struct socket *so, struct label *solabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); LABEL_CHECK(newlabel, MAGIC_SOCKET); COUNTER_INC(check_socket_relabel); return (0); } COUNTER_DECL(check_socket_send); static int mac_test_check_socket_send(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_send); return (0); } COUNTER_DECL(check_socket_stat); static int mac_test_check_socket_stat(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_stat); return (0); } COUNTER_DECL(check_socket_visible); static int mac_test_check_socket_visible(struct ucred *cred, struct socket *so, struct label *solabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(solabel, MAGIC_SOCKET); COUNTER_INC(check_socket_visible); return (0); } COUNTER_DECL(check_system_acct); static int mac_test_check_system_acct(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_system_acct); return (0); } COUNTER_DECL(check_system_audit); static int mac_test_check_system_audit(struct ucred *cred, void *record, int length) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_system_audit); return (0); } COUNTER_DECL(check_system_auditctl); static int mac_test_check_system_auditctl(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_system_auditctl); return (0); } COUNTER_DECL(check_system_auditon); static int mac_test_check_system_auditon(struct ucred *cred, int cmd) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_system_auditon); return (0); } COUNTER_DECL(check_system_reboot); static int mac_test_check_system_reboot(struct ucred *cred, int how) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_system_reboot); return (0); } COUNTER_DECL(check_system_swapoff); static int mac_test_check_system_swapoff(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_system_swapoff); return (0); } COUNTER_DECL(check_system_swapon); static int mac_test_check_system_swapon(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_system_swapon); return (0); } COUNTER_DECL(check_system_sysctl); static int mac_test_check_system_sysctl(struct ucred *cred, struct sysctl_oid *oidp, void *arg1, int arg2, struct sysctl_req *req) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); COUNTER_INC(check_system_sysctl); return (0); } COUNTER_DECL(check_vnode_access); static int mac_test_check_vnode_access(struct ucred *cred, struct vnode *vp, struct label *vplabel, int acc_mode) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_access); return (0); } COUNTER_DECL(check_vnode_chdir); static int mac_test_check_vnode_chdir(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_chdir); return (0); } COUNTER_DECL(check_vnode_chroot); static int mac_test_check_vnode_chroot(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_chroot); return (0); } COUNTER_DECL(check_vnode_create); static int mac_test_check_vnode_create(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct componentname *cnp, struct vattr *vap) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_create); return (0); } COUNTER_DECL(check_vnode_deleteacl); static int mac_test_check_vnode_deleteacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_deleteacl); return (0); } COUNTER_DECL(check_vnode_deleteextattr); static int mac_test_check_vnode_deleteextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_deleteextattr); return (0); } COUNTER_DECL(check_vnode_exec); static int mac_test_check_vnode_exec(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct image_params *imgp, struct label *execlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); LABEL_CHECK(execlabel, MAGIC_CRED); COUNTER_INC(check_vnode_exec); return (0); } COUNTER_DECL(check_vnode_getacl); static int mac_test_check_vnode_getacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_getacl); return (0); } COUNTER_DECL(check_vnode_getextattr); static int mac_test_check_vnode_getextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name, struct uio *uio) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_getextattr); return (0); } COUNTER_DECL(check_vnode_link); static int mac_test_check_vnode_link(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_link); return (0); } COUNTER_DECL(check_vnode_listextattr); static int mac_test_check_vnode_listextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_listextattr); return (0); } COUNTER_DECL(check_vnode_lookup); static int mac_test_check_vnode_lookup(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_lookup); return (0); } COUNTER_DECL(check_vnode_mmap); static int mac_test_check_vnode_mmap(struct ucred *cred, struct vnode *vp, struct label *vplabel, int prot, int flags) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_mmap); return (0); } COUNTER_DECL(check_vnode_open); static int mac_test_check_vnode_open(struct ucred *cred, struct vnode *vp, struct label *vplabel, int acc_mode) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_open); return (0); } COUNTER_DECL(check_vnode_poll); static int mac_test_check_vnode_poll(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(active_cred->cr_label, MAGIC_CRED); if (file_cred != NULL) LABEL_CHECK(file_cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_poll); return (0); } COUNTER_DECL(check_vnode_read); static int mac_test_check_vnode_read(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(active_cred->cr_label, MAGIC_CRED); if (file_cred != NULL) LABEL_CHECK(file_cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_read); return (0); } COUNTER_DECL(check_vnode_readdir); static int mac_test_check_vnode_readdir(struct ucred *cred, struct vnode *dvp, struct label *dvplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_readdir); return (0); } COUNTER_DECL(check_vnode_readlink); static int mac_test_check_vnode_readlink(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_readlink); return (0); } COUNTER_DECL(check_vnode_relabel); static int mac_test_check_vnode_relabel(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct label *newlabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); LABEL_CHECK(newlabel, MAGIC_VNODE); COUNTER_INC(check_vnode_relabel); return (0); } COUNTER_DECL(check_vnode_rename_from); static int mac_test_check_vnode_rename_from(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_rename_from); return (0); } COUNTER_DECL(check_vnode_rename_to); static int mac_test_check_vnode_rename_to(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, int samedir, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_rename_to); return (0); } COUNTER_DECL(check_vnode_revoke); static int mac_test_check_vnode_revoke(struct ucred *cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_revoke); return (0); } COUNTER_DECL(check_vnode_setacl); static int mac_test_check_vnode_setacl(struct ucred *cred, struct vnode *vp, struct label *vplabel, acl_type_t type, struct acl *acl) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setacl); return (0); } COUNTER_DECL(check_vnode_setextattr); static int mac_test_check_vnode_setextattr(struct ucred *cred, struct vnode *vp, struct label *vplabel, int attrnamespace, const char *name, struct uio *uio) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setextattr); return (0); } COUNTER_DECL(check_vnode_setflags); static int mac_test_check_vnode_setflags(struct ucred *cred, struct vnode *vp, struct label *vplabel, u_long flags) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setflags); return (0); } COUNTER_DECL(check_vnode_setmode); static int mac_test_check_vnode_setmode(struct ucred *cred, struct vnode *vp, struct label *vplabel, mode_t mode) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setmode); return (0); } COUNTER_DECL(check_vnode_setowner); static int mac_test_check_vnode_setowner(struct ucred *cred, struct vnode *vp, struct label *vplabel, uid_t uid, gid_t gid) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setowner); return (0); } COUNTER_DECL(check_vnode_setutimes); static int mac_test_check_vnode_setutimes(struct ucred *cred, struct vnode *vp, struct label *vplabel, struct timespec atime, struct timespec mtime) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_setutimes); return (0); } COUNTER_DECL(check_vnode_stat); static int mac_test_check_vnode_stat(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(active_cred->cr_label, MAGIC_CRED); if (file_cred != NULL) LABEL_CHECK(file_cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_stat); return (0); } COUNTER_DECL(check_vnode_unlink); static int mac_test_check_vnode_unlink(struct ucred *cred, struct vnode *dvp, struct label *dvplabel, struct vnode *vp, struct label *vplabel, struct componentname *cnp) { LABEL_CHECK(cred->cr_label, MAGIC_CRED); LABEL_CHECK(dvplabel, MAGIC_VNODE); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_unlink); return (0); } COUNTER_DECL(check_vnode_write); static int mac_test_check_vnode_write(struct ucred *active_cred, struct ucred *file_cred, struct vnode *vp, struct label *vplabel) { LABEL_CHECK(active_cred->cr_label, MAGIC_CRED); if (file_cred != NULL) LABEL_CHECK(file_cred->cr_label, MAGIC_CRED); LABEL_CHECK(vplabel, MAGIC_VNODE); COUNTER_INC(check_vnode_write); return (0); } static struct mac_policy_ops mac_test_ops = { .mpo_init_bpfdesc_label = mac_test_init_bpfdesc_label, .mpo_init_cred_label = mac_test_init_cred_label, .mpo_init_devfs_label = mac_test_init_devfs_label, .mpo_init_ifnet_label = mac_test_init_ifnet_label, .mpo_init_sysv_msgmsg_label = mac_test_init_sysv_msgmsg_label, .mpo_init_sysv_msgqueue_label = mac_test_init_sysv_msgqueue_label, .mpo_init_sysv_sem_label = mac_test_init_sysv_sem_label, .mpo_init_sysv_shm_label = mac_test_init_sysv_shm_label, .mpo_init_inpcb_label = mac_test_init_inpcb_label, .mpo_init_ipq_label = mac_test_init_ipq_label, .mpo_init_mbuf_label = mac_test_init_mbuf_label, .mpo_init_mount_label = mac_test_init_mount_label, .mpo_init_pipe_label = mac_test_init_pipe_label, .mpo_init_posix_sem_label = mac_test_init_posix_sem_label, .mpo_init_proc_label = mac_test_init_proc_label, .mpo_init_socket_label = mac_test_init_socket_label, .mpo_init_socket_peer_label = mac_test_init_socket_peer_label, .mpo_init_vnode_label = mac_test_init_vnode_label, .mpo_destroy_bpfdesc_label = mac_test_destroy_bpfdesc_label, .mpo_destroy_cred_label = mac_test_destroy_cred_label, .mpo_destroy_devfs_label = mac_test_destroy_devfs_label, .mpo_destroy_ifnet_label = mac_test_destroy_ifnet_label, .mpo_destroy_sysv_msgmsg_label = mac_test_destroy_sysv_msgmsg_label, .mpo_destroy_sysv_msgqueue_label = mac_test_destroy_sysv_msgqueue_label, .mpo_destroy_sysv_sem_label = mac_test_destroy_sysv_sem_label, .mpo_destroy_sysv_shm_label = mac_test_destroy_sysv_shm_label, .mpo_destroy_inpcb_label = mac_test_destroy_inpcb_label, .mpo_destroy_ipq_label = mac_test_destroy_ipq_label, .mpo_destroy_mbuf_label = mac_test_destroy_mbuf_label, .mpo_destroy_mount_label = mac_test_destroy_mount_label, .mpo_destroy_pipe_label = mac_test_destroy_pipe_label, .mpo_destroy_posix_sem_label = mac_test_destroy_posix_sem_label, .mpo_destroy_proc_label = mac_test_destroy_proc_label, .mpo_destroy_socket_label = mac_test_destroy_socket_label, .mpo_destroy_socket_peer_label = mac_test_destroy_socket_peer_label, .mpo_destroy_vnode_label = mac_test_destroy_vnode_label, .mpo_copy_cred_label = mac_test_copy_cred_label, .mpo_copy_ifnet_label = mac_test_copy_ifnet_label, .mpo_copy_mbuf_label = mac_test_copy_mbuf_label, .mpo_copy_pipe_label = mac_test_copy_pipe_label, .mpo_copy_socket_label = mac_test_copy_socket_label, .mpo_copy_vnode_label = mac_test_copy_vnode_label, .mpo_externalize_cred_label = mac_test_externalize_label, .mpo_externalize_ifnet_label = mac_test_externalize_label, .mpo_externalize_pipe_label = mac_test_externalize_label, .mpo_externalize_socket_label = mac_test_externalize_label, .mpo_externalize_socket_peer_label = mac_test_externalize_label, .mpo_externalize_vnode_label = mac_test_externalize_label, .mpo_internalize_cred_label = mac_test_internalize_label, .mpo_internalize_ifnet_label = mac_test_internalize_label, .mpo_internalize_pipe_label = mac_test_internalize_label, .mpo_internalize_socket_label = mac_test_internalize_label, .mpo_internalize_vnode_label = mac_test_internalize_label, .mpo_associate_vnode_devfs = mac_test_associate_vnode_devfs, .mpo_associate_vnode_extattr = mac_test_associate_vnode_extattr, .mpo_associate_vnode_singlelabel = mac_test_associate_vnode_singlelabel, .mpo_create_devfs_device = mac_test_create_devfs_device, .mpo_create_devfs_directory = mac_test_create_devfs_directory, .mpo_create_devfs_symlink = mac_test_create_devfs_symlink, .mpo_create_vnode_extattr = mac_test_create_vnode_extattr, .mpo_create_mount = mac_test_create_mount, .mpo_relabel_vnode = mac_test_relabel_vnode, .mpo_setlabel_vnode_extattr = mac_test_setlabel_vnode_extattr, .mpo_update_devfs = mac_test_update_devfs, .mpo_create_mbuf_from_socket = mac_test_create_mbuf_from_socket, .mpo_create_pipe = mac_test_create_pipe, .mpo_create_posix_sem = mac_test_create_posix_sem, .mpo_create_socket = mac_test_create_socket, .mpo_create_socket_from_socket = mac_test_create_socket_from_socket, .mpo_relabel_pipe = mac_test_relabel_pipe, .mpo_relabel_socket = mac_test_relabel_socket, .mpo_set_socket_peer_from_mbuf = mac_test_set_socket_peer_from_mbuf, .mpo_set_socket_peer_from_socket = mac_test_set_socket_peer_from_socket, .mpo_create_bpfdesc = mac_test_create_bpfdesc, .mpo_create_ifnet = mac_test_create_ifnet, .mpo_create_inpcb_from_socket = mac_test_create_inpcb_from_socket, .mpo_create_sysv_msgmsg = mac_test_create_sysv_msgmsg, .mpo_create_sysv_msgqueue = mac_test_create_sysv_msgqueue, .mpo_create_sysv_sem = mac_test_create_sysv_sem, .mpo_create_sysv_shm = mac_test_create_sysv_shm, .mpo_create_datagram_from_ipq = mac_test_create_datagram_from_ipq, .mpo_create_fragment = mac_test_create_fragment, .mpo_create_ipq = mac_test_create_ipq, .mpo_create_mbuf_from_inpcb = mac_test_create_mbuf_from_inpcb, .mpo_create_mbuf_linklayer = mac_test_create_mbuf_linklayer, .mpo_create_mbuf_from_bpfdesc = mac_test_create_mbuf_from_bpfdesc, .mpo_create_mbuf_from_ifnet = mac_test_create_mbuf_from_ifnet, .mpo_create_mbuf_multicast_encap = mac_test_create_mbuf_multicast_encap, .mpo_create_mbuf_netlayer = mac_test_create_mbuf_netlayer, .mpo_fragment_match = mac_test_fragment_match, .mpo_reflect_mbuf_icmp = mac_test_reflect_mbuf_icmp, .mpo_reflect_mbuf_tcp = mac_test_reflect_mbuf_tcp, .mpo_relabel_ifnet = mac_test_relabel_ifnet, .mpo_update_ipq = mac_test_update_ipq, .mpo_inpcb_sosetlabel = mac_test_inpcb_sosetlabel, .mpo_execve_transition = mac_test_execve_transition, .mpo_execve_will_transition = mac_test_execve_will_transition, .mpo_create_proc0 = mac_test_create_proc0, .mpo_create_proc1 = mac_test_create_proc1, .mpo_relabel_cred = mac_test_relabel_cred, .mpo_thread_userret = mac_test_thread_userret, .mpo_cleanup_sysv_msgmsg = mac_test_cleanup_sysv_msgmsg, .mpo_cleanup_sysv_msgqueue = mac_test_cleanup_sysv_msgqueue, .mpo_cleanup_sysv_sem = mac_test_cleanup_sysv_sem, .mpo_cleanup_sysv_shm = mac_test_cleanup_sysv_shm, .mpo_check_bpfdesc_receive = mac_test_check_bpfdesc_receive, .mpo_check_cred_relabel = mac_test_check_cred_relabel, .mpo_check_cred_visible = mac_test_check_cred_visible, .mpo_check_ifnet_relabel = mac_test_check_ifnet_relabel, .mpo_check_ifnet_transmit = mac_test_check_ifnet_transmit, .mpo_check_inpcb_deliver = mac_test_check_inpcb_deliver, .mpo_check_sysv_msgmsq = mac_test_check_sysv_msgmsq, .mpo_check_sysv_msgrcv = mac_test_check_sysv_msgrcv, .mpo_check_sysv_msgrmid = mac_test_check_sysv_msgrmid, .mpo_check_sysv_msqget = mac_test_check_sysv_msqget, .mpo_check_sysv_msqsnd = mac_test_check_sysv_msqsnd, .mpo_check_sysv_msqrcv = mac_test_check_sysv_msqrcv, .mpo_check_sysv_msqctl = mac_test_check_sysv_msqctl, .mpo_check_sysv_semctl = mac_test_check_sysv_semctl, .mpo_check_sysv_semget = mac_test_check_sysv_semget, .mpo_check_sysv_semop = mac_test_check_sysv_semop, .mpo_check_sysv_shmat = mac_test_check_sysv_shmat, .mpo_check_sysv_shmctl = mac_test_check_sysv_shmctl, .mpo_check_sysv_shmdt = mac_test_check_sysv_shmdt, .mpo_check_sysv_shmget = mac_test_check_sysv_shmget, .mpo_check_kenv_dump = mac_test_check_kenv_dump, .mpo_check_kenv_get = mac_test_check_kenv_get, .mpo_check_kenv_set = mac_test_check_kenv_set, .mpo_check_kenv_unset = mac_test_check_kenv_unset, .mpo_check_kld_load = mac_test_check_kld_load, .mpo_check_kld_stat = mac_test_check_kld_stat, .mpo_check_mount_stat = mac_test_check_mount_stat, .mpo_check_pipe_ioctl = mac_test_check_pipe_ioctl, .mpo_check_pipe_poll = mac_test_check_pipe_poll, .mpo_check_pipe_read = mac_test_check_pipe_read, .mpo_check_pipe_relabel = mac_test_check_pipe_relabel, .mpo_check_pipe_stat = mac_test_check_pipe_stat, .mpo_check_pipe_write = mac_test_check_pipe_write, .mpo_check_posix_sem_destroy = mac_test_check_posix_sem, .mpo_check_posix_sem_getvalue = mac_test_check_posix_sem, .mpo_check_posix_sem_open = mac_test_check_posix_sem, .mpo_check_posix_sem_post = mac_test_check_posix_sem, .mpo_check_posix_sem_unlink = mac_test_check_posix_sem, .mpo_check_posix_sem_wait = mac_test_check_posix_sem, .mpo_check_proc_debug = mac_test_check_proc_debug, .mpo_check_proc_sched = mac_test_check_proc_sched, .mpo_check_proc_setaudit = mac_test_check_proc_setaudit, .mpo_check_proc_setaudit_addr = mac_test_check_proc_setaudit_addr, .mpo_check_proc_setauid = mac_test_check_proc_setauid, .mpo_check_proc_setuid = mac_test_check_proc_setuid, .mpo_check_proc_seteuid = mac_test_check_proc_seteuid, .mpo_check_proc_setgid = mac_test_check_proc_setgid, .mpo_check_proc_setegid = mac_test_check_proc_setegid, .mpo_check_proc_setgroups = mac_test_check_proc_setgroups, .mpo_check_proc_setreuid = mac_test_check_proc_setreuid, .mpo_check_proc_setregid = mac_test_check_proc_setregid, .mpo_check_proc_setresuid = mac_test_check_proc_setresuid, .mpo_check_proc_setresgid = mac_test_check_proc_setresgid, .mpo_check_proc_signal = mac_test_check_proc_signal, .mpo_check_proc_wait = mac_test_check_proc_wait, .mpo_check_socket_accept = mac_test_check_socket_accept, .mpo_check_socket_bind = mac_test_check_socket_bind, .mpo_check_socket_connect = mac_test_check_socket_connect, .mpo_check_socket_deliver = mac_test_check_socket_deliver, .mpo_check_socket_listen = mac_test_check_socket_listen, .mpo_check_socket_poll = mac_test_check_socket_poll, .mpo_check_socket_receive = mac_test_check_socket_receive, .mpo_check_socket_relabel = mac_test_check_socket_relabel, .mpo_check_socket_send = mac_test_check_socket_send, .mpo_check_socket_stat = mac_test_check_socket_stat, .mpo_check_socket_visible = mac_test_check_socket_visible, .mpo_check_system_acct = mac_test_check_system_acct, .mpo_check_system_audit = mac_test_check_system_audit, .mpo_check_system_auditctl = mac_test_check_system_auditctl, .mpo_check_system_auditon = mac_test_check_system_auditon, .mpo_check_system_reboot = mac_test_check_system_reboot, .mpo_check_system_swapoff = mac_test_check_system_swapoff, .mpo_check_system_swapon = mac_test_check_system_swapon, .mpo_check_system_sysctl = mac_test_check_system_sysctl, .mpo_check_vnode_access = mac_test_check_vnode_access, .mpo_check_vnode_chdir = mac_test_check_vnode_chdir, .mpo_check_vnode_chroot = mac_test_check_vnode_chroot, .mpo_check_vnode_create = mac_test_check_vnode_create, .mpo_check_vnode_deleteacl = mac_test_check_vnode_deleteacl, .mpo_check_vnode_deleteextattr = mac_test_check_vnode_deleteextattr, .mpo_check_vnode_exec = mac_test_check_vnode_exec, .mpo_check_vnode_getacl = mac_test_check_vnode_getacl, .mpo_check_vnode_getextattr = mac_test_check_vnode_getextattr, .mpo_check_vnode_link = mac_test_check_vnode_link, .mpo_check_vnode_listextattr = mac_test_check_vnode_listextattr, .mpo_check_vnode_lookup = mac_test_check_vnode_lookup, .mpo_check_vnode_mmap = mac_test_check_vnode_mmap, .mpo_check_vnode_open = mac_test_check_vnode_open, .mpo_check_vnode_poll = mac_test_check_vnode_poll, .mpo_check_vnode_read = mac_test_check_vnode_read, .mpo_check_vnode_readdir = mac_test_check_vnode_readdir, .mpo_check_vnode_readlink = mac_test_check_vnode_readlink, .mpo_check_vnode_relabel = mac_test_check_vnode_relabel, .mpo_check_vnode_rename_from = mac_test_check_vnode_rename_from, .mpo_check_vnode_rename_to = mac_test_check_vnode_rename_to, .mpo_check_vnode_revoke = mac_test_check_vnode_revoke, .mpo_check_vnode_setacl = mac_test_check_vnode_setacl, .mpo_check_vnode_setextattr = mac_test_check_vnode_setextattr, .mpo_check_vnode_setflags = mac_test_check_vnode_setflags, .mpo_check_vnode_setmode = mac_test_check_vnode_setmode, .mpo_check_vnode_setowner = mac_test_check_vnode_setowner, .mpo_check_vnode_setutimes = mac_test_check_vnode_setutimes, .mpo_check_vnode_stat = mac_test_check_vnode_stat, .mpo_check_vnode_unlink = mac_test_check_vnode_unlink, .mpo_check_vnode_write = mac_test_check_vnode_write, }; MAC_POLICY_SET(&mac_test_ops, mac_test, "TrustedBSD MAC/Test", MPC_LOADTIME_FLAG_UNLOADOK | MPC_LOADTIME_FLAG_LABELMBUFS, &test_slot); Index: stable/7/sys/sparc64/pci/psycho.c =================================================================== --- stable/7/sys/sparc64/pci/psycho.c (revision 177733) +++ stable/7/sys/sparc64/pci/psycho.c (revision 177734) @@ -1,1449 +1,1449 @@ /*- * Copyright (c) 1999, 2000 Matthew R. Green * Copyright (c) 2001 - 2003 by Thomas Moestl * Copyright (c) 2005 - 2006 Marius Strobl * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: NetBSD: psycho.c,v 1.39 2001/10/07 20:30:41 eeh Exp */ #include __FBSDID("$FreeBSD$"); /* * Support for `Hummingbird' (UltraSPARC IIe), `Psycho' and `Psycho+' * (UltraSPARC II) and `Sabre' (UltraSPARC IIi) UPA to PCI bridges. */ #include "opt_ofw_pci.h" #include "opt_psycho.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" static const struct psycho_desc *psycho_find_desc(const struct psycho_desc *, const char *); static const struct psycho_desc *psycho_get_desc(device_t); static void psycho_set_intr(struct psycho_softc *, u_int, bus_addr_t, driver_filter_t, driver_intr_t); static int psycho_find_intrmap(struct psycho_softc *, u_int, bus_addr_t *, bus_addr_t *, u_long *); static driver_filter_t psycho_dmasync; static void psycho_intr_enable(void *); static void psycho_intr_disable(void *); static void psycho_intr_eoi(void *); static bus_space_tag_t psycho_alloc_bus_tag(struct psycho_softc *, int); /* Interrupt handlers */ static driver_filter_t psycho_ue; static driver_filter_t psycho_ce; static driver_filter_t psycho_pci_bus; static driver_filter_t psycho_powerfail; static driver_intr_t psycho_overtemp; #ifdef PSYCHO_MAP_WAKEUP static driver_filter_t psycho_wakeup; #endif /* IOMMU support */ static void psycho_iommu_init(struct psycho_softc *, int, uint32_t); /* * Methods */ static device_probe_t psycho_probe; static device_attach_t psycho_attach; static bus_read_ivar_t psycho_read_ivar; static bus_setup_intr_t psycho_setup_intr; static bus_teardown_intr_t psycho_teardown_intr; static bus_alloc_resource_t psycho_alloc_resource; static bus_activate_resource_t psycho_activate_resource; static bus_deactivate_resource_t psycho_deactivate_resource; static bus_release_resource_t psycho_release_resource; static bus_get_dma_tag_t psycho_get_dma_tag; static pcib_maxslots_t psycho_maxslots; static pcib_read_config_t psycho_read_config; static pcib_write_config_t psycho_write_config; static pcib_route_interrupt_t psycho_route_interrupt; static ofw_pci_intr_pending_t psycho_intr_pending; static ofw_bus_get_node_t psycho_get_node; static ofw_pci_alloc_busno_t psycho_alloc_busno; static ofw_pci_adjust_busrange_t psycho_adjust_busrange; static device_method_t psycho_methods[] = { /* Device interface */ DEVMETHOD(device_probe, psycho_probe), DEVMETHOD(device_attach, psycho_attach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), /* Bus interface */ DEVMETHOD(bus_print_child, bus_generic_print_child), DEVMETHOD(bus_read_ivar, psycho_read_ivar), DEVMETHOD(bus_setup_intr, psycho_setup_intr), DEVMETHOD(bus_teardown_intr, psycho_teardown_intr), DEVMETHOD(bus_alloc_resource, psycho_alloc_resource), DEVMETHOD(bus_activate_resource, psycho_activate_resource), DEVMETHOD(bus_deactivate_resource, psycho_deactivate_resource), DEVMETHOD(bus_release_resource, psycho_release_resource), DEVMETHOD(bus_get_dma_tag, psycho_get_dma_tag), /* pcib interface */ DEVMETHOD(pcib_maxslots, psycho_maxslots), DEVMETHOD(pcib_read_config, psycho_read_config), DEVMETHOD(pcib_write_config, psycho_write_config), DEVMETHOD(pcib_route_interrupt, psycho_route_interrupt), /* ofw_bus interface */ DEVMETHOD(ofw_bus_get_node, psycho_get_node), /* ofw_pci interface */ DEVMETHOD(ofw_pci_intr_pending, psycho_intr_pending), DEVMETHOD(ofw_pci_alloc_busno, psycho_alloc_busno), DEVMETHOD(ofw_pci_adjust_busrange, psycho_adjust_busrange), { 0, 0 } }; static devclass_t psycho_devclass; DEFINE_CLASS_0(pcib, psycho_driver, psycho_methods, sizeof(struct psycho_softc)); DRIVER_MODULE(psycho, nexus, psycho_driver, psycho_devclass, 0, 0); static SLIST_HEAD(, psycho_softc) psycho_softcs = SLIST_HEAD_INITIALIZER(psycho_softcs); static uint8_t psycho_pci_bus_cnt; static const struct intr_controller psycho_ic = { psycho_intr_enable, psycho_intr_disable, psycho_intr_eoi }; struct psycho_icarg { struct psycho_softc *pica_sc; bus_addr_t pica_map; bus_addr_t pica_clr; }; struct psycho_dmasync { struct psycho_softc *pds_sc; driver_filter_t *pds_handler; /* handler to call */ void *pds_arg; /* argument for the handler */ void *pds_cookie; /* parent bus int. cookie */ device_t pds_ppb; /* farest PCI-PCI bridge */ uint8_t pds_bus; /* bus of farest PCI device */ uint8_t pds_slot; /* slot of farest PCI device */ uint8_t pds_func; /* func. of farest PCI device */ }; #define PSYCHO_READ8(sc, off) \ bus_read_8((sc)->sc_mem_res, (off)) #define PSYCHO_WRITE8(sc, off, v) \ bus_write_8((sc)->sc_mem_res, (off), (v)) #define PCICTL_READ8(sc, off) \ PSYCHO_READ8((sc), (sc)->sc_pcictl + (off)) #define PCICTL_WRITE8(sc, off, v) \ PSYCHO_WRITE8((sc), (sc)->sc_pcictl + (off), (v)) /* * "Sabre" is the UltraSPARC IIi onboard UPA to PCI bridge. It manages a * single PCI bus and does not have a streaming buffer. It often has an APB * (advanced PCI bridge) connected to it, which was designed specifically for * the IIi. The APB let's the IIi handle two independednt PCI buses, and * appears as two "Simba"'s underneath the Sabre. * * "Hummingbird" is the UltraSPARC IIe onboard UPA to PCI bridge. It's * basically the same as Sabre but without an APB underneath it. * * "Psycho" and "Psycho+" are dual UPA to PCI bridges. They sit on the UPA bus * and manage two PCI buses. "Psycho" has two 64-bit 33MHz buses, while * "Psycho+" controls both a 64-bit 33Mhz and a 64-bit 66Mhz PCI bus. You * will usually find a "Psycho+" since I don't think the original "Psycho" * ever shipped, and if it did it would be in the U30. * * Each "Psycho" PCI bus appears as a separate OFW node, but since they are * both part of the same IC, they only have a single register space. As such, * they need to be configured together, even though the autoconfiguration will * attach them separately. * * On UltraIIi machines, "Sabre" itself usually takes pci0, with "Simba" often * as pci1 and pci2, although they have been implemented with other PCI bus * numbers on some machines. * * On UltraII machines, there can be any number of "Psycho+" ICs, each * providing two PCI buses. */ #define OFW_PCI_TYPE "pci" struct psycho_desc { const char *pd_string; int pd_mode; const char *pd_name; }; static const struct psycho_desc psycho_compats[] = { { "pci108e,8000", PSYCHO_MODE_PSYCHO, "Psycho compatible" }, { "pci108e,a000", PSYCHO_MODE_SABRE, "Sabre compatible" }, { "pci108e,a001", PSYCHO_MODE_SABRE, "Hummingbird compatible" }, { NULL, 0, NULL } }; static const struct psycho_desc psycho_models[] = { { "SUNW,psycho", PSYCHO_MODE_PSYCHO, "Psycho" }, { "SUNW,sabre", PSYCHO_MODE_SABRE, "Sabre" }, { NULL, 0, NULL } }; static const struct psycho_desc * psycho_find_desc(const struct psycho_desc *table, const char *string) { const struct psycho_desc *desc; if (string == NULL) return (NULL); for (desc = table; desc->pd_string != NULL; desc++) if (strcmp(desc->pd_string, string) == 0) return (desc); return (NULL); } static const struct psycho_desc * psycho_get_desc(device_t dev) { const struct psycho_desc *rv; rv = psycho_find_desc(psycho_models, ofw_bus_get_model(dev)); if (rv == NULL) rv = psycho_find_desc(psycho_compats, ofw_bus_get_compat(dev)); return (rv); } static int psycho_probe(device_t dev) { const char *dtype; dtype = ofw_bus_get_type(dev); if (dtype != NULL && strcmp(dtype, OFW_PCI_TYPE) == 0 && psycho_get_desc(dev) != NULL) { device_set_desc(dev, "U2P UPA-PCI bridge"); return (0); } return (ENXIO); } static int psycho_attach(device_t dev) { char name[sizeof("pci108e,1000")]; struct psycho_icarg *pica; struct psycho_softc *asc, *sc, *osc; struct ofw_pci_ranges *range; const struct psycho_desc *desc; bus_addr_t intrclr, intrmap; uint64_t csr, dr; phandle_t child, node; uint32_t dvmabase, psycho_br[2]; int32_t rev; u_int ver; int i, n, nrange, rid; node = ofw_bus_get_node(dev); sc = device_get_softc(dev); desc = psycho_get_desc(dev); sc->sc_node = node; sc->sc_dev = dev; sc->sc_mode = desc->pd_mode; /* * The Psycho gets three register banks: * (0) per-PBM configuration and status registers * (1) per-PBM PCI configuration space, containing only the * PBM 256-byte PCI header * (2) the shared Psycho configuration registers */ if (sc->sc_mode == PSYCHO_MODE_PSYCHO) { rid = 2; sc->sc_pcictl = bus_get_resource_start(dev, SYS_RES_MEMORY, 0) - bus_get_resource_start(dev, SYS_RES_MEMORY, 2); switch (sc->sc_pcictl) { case PSR_PCICTL0: sc->sc_half = 0; break; case PSR_PCICTL1: sc->sc_half = 1; break; default: panic("%s: bogus PCI control register location", __func__); } } else { rid = 0; sc->sc_pcictl = PSR_PCICTL0; sc->sc_half = 0; } sc->sc_mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, (sc->sc_mode == PSYCHO_MODE_PSYCHO ? RF_SHAREABLE : 0) | RF_ACTIVE); if (sc->sc_mem_res == NULL) panic("%s: could not allocate registers", __func__); /* * Match other Psycho's that are already configured against * the base physical address. This will be the same for a * pair of devices that share register space. */ osc = NULL; SLIST_FOREACH(asc, &psycho_softcs, sc_link) { if (rman_get_start(asc->sc_mem_res) == rman_get_start(sc->sc_mem_res)) { /* Found partner. */ osc = asc; break; } } if (osc == NULL) { sc->sc_mtx = malloc(sizeof(*sc->sc_mtx), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->sc_mtx == NULL) panic("%s: could not malloc mutex", __func__); mtx_init(sc->sc_mtx, "pcib_mtx", NULL, MTX_SPIN); } else { if (mtx_initialized(osc->sc_mtx) == 0) panic("%s: mutex not initialized", __func__); sc->sc_mtx = osc->sc_mtx; } /* Clear PCI AFSR. */ PCICTL_WRITE8(sc, PCR_AFS, PCIAFSR_ERRMASK); csr = PSYCHO_READ8(sc, PSR_CS); ver = PSYCHO_GCSR_VERS(csr); sc->sc_ign = 0x1f; /* Hummingbird/Sabre IGN is always 0x1f. */ if (sc->sc_mode == PSYCHO_MODE_PSYCHO) sc->sc_ign = PSYCHO_GCSR_IGN(csr); device_printf(dev, "%s, impl %d, version %d, IGN %#x, bus %c\n", desc->pd_name, (u_int)PSYCHO_GCSR_IMPL(csr), ver, sc->sc_ign, 'A' + sc->sc_half); /* Set up the PCI control and PCI diagnostic registers. */ /* * Revision 0 EBus bridges have a bug which prevents them from * working when bus parking is enabled. */ rev = -1; csr = PCICTL_READ8(sc, PCR_CS); csr &= ~PCICTL_ARB_PARK; for (child = OF_child(node); child != 0; child = OF_peer(child)) { if (OF_getprop(child, "name", name, sizeof(name)) == -1) continue; if ((strcmp(name, "ebus") == 0 || strcmp(name, "pci108e,1000") == 0) && OF_getprop(child, "revision-id", &rev, sizeof(rev)) > 0 && rev == 0) break; } if (rev != 0 && OF_getproplen(node, "no-bus-parking") < 0) csr |= PCICTL_ARB_PARK; /* Workarounds for version specific bugs. */ dr = PCICTL_READ8(sc, PCR_DIAG); switch (ver) { case 0: dr |= DIAG_RTRY_DIS; dr &= ~DIAG_DWSYNC_DIS; /* XXX need to also disable rerun of the streaming buffers. */ break; case 1: csr &= ~PCICTL_ARB_PARK; dr |= DIAG_RTRY_DIS | DIAG_DWSYNC_DIS; /* XXX need to also disable rerun of the streaming buffers. */ break; default: dr |= DIAG_DWSYNC_DIS; dr &= ~DIAG_RTRY_DIS; break; } csr |= PCICTL_SERR | PCICTL_ERRINTEN | PCICTL_ARB_4; csr &= ~(PCICTL_SBHINTEN | PCICTL_WAKEUPEN); #ifdef PSYCHO_DEBUG device_printf(dev, "PCI CSR 0x%016llx -> 0x%016llx\n", (unsigned long long)PCICTL_READ8(sc, PCR_CS), (unsigned long long)csr); #endif PCICTL_WRITE8(sc, PCR_CS, csr); dr &= ~DIAG_ISYNC_DIS; #ifdef PSYCHO_DEBUG device_printf(dev, "PCI DR 0x%016llx -> 0x%016llx\n", (unsigned long long)PCICTL_READ8(sc, PCR_DIAG), (unsigned long long)dr); #endif PCICTL_WRITE8(sc, PCR_DIAG, dr); if (sc->sc_mode == PSYCHO_MODE_SABRE) { /* Use the PROM preset for now. */ csr = PCICTL_READ8(sc, PCR_TAS); if (csr == 0) panic("%s: Hummingbird/Sabre TAS not initialized.", __func__); dvmabase = (ffs(csr) - 1) << PCITAS_ADDR_SHIFT; } else dvmabase = -1; /* Initialize memory and I/O rmans. */ sc->sc_pci_io_rman.rm_type = RMAN_ARRAY; sc->sc_pci_io_rman.rm_descr = "Psycho PCI I/O Ports"; if (rman_init(&sc->sc_pci_io_rman) != 0 || rman_manage_region(&sc->sc_pci_io_rman, 0, PSYCHO_IO_SIZE) != 0) panic("%s: failed to set up I/O rman", __func__); sc->sc_pci_mem_rman.rm_type = RMAN_ARRAY; sc->sc_pci_mem_rman.rm_descr = "Psycho PCI Memory"; if (rman_init(&sc->sc_pci_mem_rman) != 0 || rman_manage_region(&sc->sc_pci_mem_rman, 0, PSYCHO_MEM_SIZE) != 0) panic("%s: failed to set up memory rman", __func__); nrange = OF_getprop_alloc(node, "ranges", sizeof(*range), (void **)&range); /* * Make sure that the expected ranges are present. The OFW_PCI_CS_MEM64 * one is not currently used though. */ if (nrange != PSYCHO_NRANGE) panic("%s: unsupported number of ranges", __func__); /* * Find the addresses of the various bus spaces. * There should not be multiple ones of one kind. * The physical start addresses of the ranges are the configuration, * memory and I/O handles. */ for (n = 0; n < PSYCHO_NRANGE; n++) { i = OFW_PCI_RANGE_CS(&range[n]); if (sc->sc_pci_bh[i] != 0) panic("%s: duplicate range for space %d", __func__, i); sc->sc_pci_bh[i] = OFW_PCI_RANGE_PHYS(&range[n]); } free(range, M_OFWPROP); /* Register the softc, this is needed for paired Psychos. */ SLIST_INSERT_HEAD(&psycho_softcs, sc, sc_link); /* * If we're a Hummingbird/Sabre or the first of a pair of Psychos * to arrive here, do the interrupt setup and start up the IOMMU. */ if (osc == NULL) { /* * Hunt through all the interrupt mapping regs and register * our interrupt controller for the corresponding interrupt * vectors. */ for (n = 0; n <= PSYCHO_MAX_INO; n++) { if (psycho_find_intrmap(sc, n, &intrmap, &intrclr, NULL) == 0) continue; pica = malloc(sizeof(*pica), M_DEVBUF, M_NOWAIT); if (pica == NULL) panic("%s: could not allocate interrupt " "controller argument", __func__); pica->pica_sc = sc; pica->pica_map = intrmap; pica->pica_clr = intrclr; #ifdef PSYCHO_DEBUG /* * Enable all interrupts and clear all interrupt * states. This aids the debugging of interrupt * routing problems. */ device_printf(dev, "intr map (INO %d, %s) %#lx: %#lx, clr: %#lx\n", n, intrmap <= PSR_PCIB3_INT_MAP ? "PCI" : "OBIO", (u_long)intrmap, (u_long)PSYCHO_READ8(sc, intrmap), (u_long)intrclr); PSYCHO_WRITE8(sc, intrmap, INTMAP_VEC(sc->sc_ign, n)); PSYCHO_WRITE8(sc, intrclr, 0); PSYCHO_WRITE8(sc, intrmap, INTMAP_ENABLE(INTMAP_VEC(sc->sc_ign, n), PCPU_GET(mid))); #endif if (intr_controller_register(INTMAP_VEC(sc->sc_ign, n), &psycho_ic, pica) != 0) panic("%s: could not register interrupt " "controller for INO %d", __func__, n); } /* * Establish handlers for interesting interrupts... * * XXX We need to remember these and remove this to support * hotplug on the UPA/FHC bus. * * XXX Not all controllers have these, but installing them * is better than trying to sort through this mess. */ psycho_set_intr(sc, 1, PSR_UE_INT_MAP, psycho_ue, NULL); psycho_set_intr(sc, 2, PSR_CE_INT_MAP, psycho_ce, NULL); #ifdef DEBUGGER_ON_POWERFAIL psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, psycho_powerfail, NULL); #else psycho_set_intr(sc, 3, PSR_POWER_INT_MAP, NULL, (driver_intr_t *)psycho_powerfail); #endif /* Psycho-specific initialization */ if (sc->sc_mode == PSYCHO_MODE_PSYCHO) { /* * Hummingbirds/Sabres do not have the following two * interrupts. */ /* * The spare hardware interrupt is used for the * over-temperature interrupt. */ psycho_set_intr(sc, 4, PSR_SPARE_INT_MAP, NULL, psycho_overtemp); #ifdef PSYCHO_MAP_WAKEUP /* * psycho_wakeup() doesn't do anything useful right * now. */ psycho_set_intr(sc, 5, PSR_PWRMGT_INT_MAP, psycho_wakeup, NULL); #endif /* PSYCHO_MAP_WAKEUP */ /* Initialize the counter-timer. */ sparc64_counter_init(rman_get_bustag(sc->sc_mem_res), rman_get_bushandle(sc->sc_mem_res), PSR_TC0); } /* * Set up IOMMU and PCI configuration if we're the first * of a pair of Psycho's to arrive here. * * We should calculate a TSB size based on amount of RAM * and number of bus controllers and number and type of * child devices. * * For the moment, 32KB should be more than enough. */ sc->sc_is = malloc(sizeof(struct iommu_state), M_DEVBUF, M_NOWAIT | M_ZERO); if (sc->sc_is == NULL) panic("%s: malloc iommu_state failed", __func__); if (sc->sc_mode == PSYCHO_MODE_SABRE) sc->sc_is->is_pmaxaddr = IOMMU_MAXADDR(SABRE_IOMMU_BITS); else sc->sc_is->is_pmaxaddr = IOMMU_MAXADDR(PSYCHO_IOMMU_BITS); sc->sc_is->is_sb[0] = 0; sc->sc_is->is_sb[1] = 0; if (OF_getproplen(node, "no-streaming-cache") < 0) sc->sc_is->is_sb[0] = sc->sc_pcictl + PCR_STRBUF; psycho_iommu_init(sc, 3, dvmabase); } else { /* Just copy IOMMU state, config tag and address. */ sc->sc_is = osc->sc_is; if (OF_getproplen(node, "no-streaming-cache") < 0) sc->sc_is->is_sb[1] = sc->sc_pcictl + PCR_STRBUF; iommu_reset(sc->sc_is); } /* * Register a PCI bus error interrupt handler according to which * half this is. Hummingbird/Sabre don't have a PCI bus B error * interrupt but they are also only used for PCI bus A. */ psycho_set_intr(sc, 0, sc->sc_half == 0 ? PSR_PCIAERR_INT_MAP : PSR_PCIBERR_INT_MAP, psycho_pci_bus, NULL); /* Allocate our tags. */ sc->sc_pci_memt = psycho_alloc_bus_tag(sc, PCI_MEMORY_BUS_SPACE); sc->sc_pci_iot = psycho_alloc_bus_tag(sc, PCI_IO_BUS_SPACE); sc->sc_pci_cfgt = psycho_alloc_bus_tag(sc, PCI_CONFIG_BUS_SPACE); if (bus_dma_tag_create(bus_get_dma_tag(dev), 8, 0, sc->sc_is->is_pmaxaddr, ~0, NULL, NULL, sc->sc_is->is_pmaxaddr, 0xff, 0xffffffff, 0, NULL, NULL, &sc->sc_pci_dmat) != 0) panic("%s: bus_dma_tag_create failed", __func__); /* Customize the tag. */ sc->sc_pci_dmat->dt_cookie = sc->sc_is; sc->sc_pci_dmat->dt_mt = &iommu_dma_methods; /* * Get the bus range from the firmware; it is used solely for obtaining * the inital bus number, and cannot be trusted on all machines. */ n = OF_getprop(node, "bus-range", (void *)psycho_br, sizeof(psycho_br)); if (n == -1) panic("%s: could not get bus-range", __func__); if (n != sizeof(psycho_br)) panic("%s: broken bus-range (%d)", __func__, n); /* Clear PCI status error bits. */ PCIB_WRITE_CONFIG(dev, psycho_br[0], PCS_DEVICE, PCS_FUNC, PCIR_STATUS, PCIM_STATUS_PERR | PCIM_STATUS_RMABORT | PCIM_STATUS_RTABORT | PCIM_STATUS_STABORT | PCIM_STATUS_PERRREPORT, 2); /* * Set the latency timer register as this isn't always done by the * firmware. */ PCIB_WRITE_CONFIG(dev, psycho_br[0], PCS_DEVICE, PCS_FUNC, PCIR_LATTIMER, 64, 1); sc->sc_pci_secbus = sc->sc_pci_subbus = psycho_alloc_busno(dev); /* * Program the bus range registers. * NOTE: for the Psycho, the second write changes the bus number the * Psycho itself uses for it's configuration space, so these * writes must be kept in this order! * The Hummingbird/Sabre always uses bus 0, but there only can be one * Hummingbird/Sabre per machine. */ PCIB_WRITE_CONFIG(dev, psycho_br[0], PCS_DEVICE, PCS_FUNC, PCSR_SUBBUS, sc->sc_pci_subbus, 1); PCIB_WRITE_CONFIG(dev, psycho_br[0], PCS_DEVICE, PCS_FUNC, PCSR_SECBUS, sc->sc_pci_secbus, 1); for (n = PCIR_VENDOR; n < PCIR_STATUS; n += sizeof(uint16_t)) le16enc(&sc->sc_pci_hpbcfg[n], bus_space_read_2( sc->sc_pci_cfgt, sc->sc_pci_bh[OFW_PCI_CS_CONFIG], PSYCHO_CONF_OFF(sc->sc_pci_secbus, PCS_DEVICE, PCS_FUNC, n))); for (n = PCIR_REVID; n <= PCIR_BIST; n += sizeof(uint8_t)) sc->sc_pci_hpbcfg[n] = bus_space_read_1(sc->sc_pci_cfgt, sc->sc_pci_bh[OFW_PCI_CS_CONFIG], PSYCHO_CONF_OFF( sc->sc_pci_secbus, PCS_DEVICE, PCS_FUNC, n)); ofw_bus_setup_iinfo(node, &sc->sc_pci_iinfo, sizeof(ofw_pci_intr_t)); /* * On E250 the interrupt map entry for the EBus bridge is wrong, * causing incorrect interrupts to be assigned to some devices on * the EBus. Work around it by changing our copy of the interrupt * map mask to perform a full comparison of the INO. That way * the interrupt map entry for the EBus bridge won't match at all * and the INOs specified in the "interrupts" properties of the * EBus devices will be used directly instead. */ if (strcmp(sparc64_model, "SUNW,Ultra-250") == 0 && sc->sc_pci_iinfo.opi_imapmsk != NULL) *(ofw_pci_intr_t *)(&sc->sc_pci_iinfo.opi_imapmsk[ sc->sc_pci_iinfo.opi_addrc]) = INTMAP_INO_MASK; device_add_child(dev, "pci", sc->sc_pci_secbus); return (bus_generic_attach(dev)); } static void psycho_set_intr(struct psycho_softc *sc, u_int index, bus_addr_t intrmap, driver_filter_t filt, driver_intr_t intr) { u_long vec; int rid; rid = index; sc->sc_irq_res[index] = bus_alloc_resource_any(sc->sc_dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->sc_irq_res[index] == NULL || INTIGN(vec = rman_get_start(sc->sc_irq_res[index])) != sc->sc_ign || INTVEC(PSYCHO_READ8(sc, intrmap)) != vec || intr_vectors[vec].iv_ic != &psycho_ic || bus_setup_intr(sc->sc_dev, sc->sc_irq_res[index], INTR_TYPE_MISC, filt, intr, sc, &sc->sc_ihand[index]) != 0) panic("%s: failed to set up interrupt %d", __func__, index); } static int psycho_find_intrmap(struct psycho_softc *sc, u_int ino, bus_addr_t *intrmapptr, bus_addr_t *intrclrptr, bus_addr_t *intrdiagptr) { bus_addr_t intrclr, intrmap; uint64_t diag; int found; /* * XXX we only compare INOs rather than INRs since the firmware may * not provide the IGN and the IGN is constant for all devices on * that PCI controller. * This could cause problems for the FFB/external interrupt which * has a full vector that can be set arbitrarily. */ if (ino > PSYCHO_MAX_INO) { device_printf(sc->sc_dev, "out of range INO %d requested\n", ino); return (0); } found = 0; /* Hunt through OBIO first. */ diag = PSYCHO_READ8(sc, PSR_OBIO_INT_DIAG); for (intrmap = PSR_SCSI_INT_MAP, intrclr = PSR_SCSI_INT_CLR; intrmap <= PSR_PWRMGT_INT_MAP; intrmap += 8, intrclr += 8, diag >>= 2) { if (sc->sc_mode == PSYCHO_MODE_SABRE && (intrmap == PSR_TIMER0_INT_MAP || intrmap == PSR_TIMER1_INT_MAP || intrmap == PSR_PCIBERR_INT_MAP || intrmap == PSR_PWRMGT_INT_MAP)) continue; if (INTINO(PSYCHO_READ8(sc, intrmap)) == ino) { diag &= 2; found = 1; break; } } if (!found) { diag = PSYCHO_READ8(sc, PSR_PCI_INT_DIAG); /* Now do PCI interrupts. */ for (intrmap = PSR_PCIA0_INT_MAP, intrclr = PSR_PCIA0_INT_CLR; intrmap <= PSR_PCIB3_INT_MAP; intrmap += 8, intrclr += 32, diag >>= 8) { if (sc->sc_mode == PSYCHO_MODE_PSYCHO && (intrmap == PSR_PCIA2_INT_MAP || intrmap == PSR_PCIA3_INT_MAP)) continue; if (((PSYCHO_READ8(sc, intrmap) ^ ino) & 0x3c) == 0) { intrclr += 8 * (ino & 3); diag = (diag >> ((ino & 3) * 2)) & 2; found = 1; break; } } } if (intrmapptr != NULL) *intrmapptr = intrmap; if (intrclrptr != NULL) *intrclrptr = intrclr; if (intrdiagptr != NULL) *intrdiagptr = diag; return (found); } /* * Interrupt handlers */ static int psycho_ue(void *arg) { struct psycho_softc *sc = arg; uint64_t afar, afsr; afar = PSYCHO_READ8(sc, PSR_UE_AFA); afsr = PSYCHO_READ8(sc, PSR_UE_AFS); /* * On the UltraSPARC-IIi/IIe, IOMMU misses/protection faults cause * the AFAR to be set to the physical address of the TTE entry that * was invalid/write protected. Call into the iommu code to have * them decoded to virtual I/O addresses. */ if ((afsr & UEAFSR_P_DTE) != 0) iommu_decode_fault(sc->sc_is, afar); panic("%s: uncorrectable DMA error AFAR %#lx AFSR %#lx", device_get_name(sc->sc_dev), (u_long)afar, (u_long)afsr); return (FILTER_HANDLED); } static int psycho_ce(void *arg) { struct psycho_softc *sc = arg; uint64_t afar, afsr; mtx_lock_spin(sc->sc_mtx); afar = PSYCHO_READ8(sc, PSR_CE_AFA); afsr = PSYCHO_READ8(sc, PSR_CE_AFS); device_printf(sc->sc_dev, "correctable DMA error AFAR %#lx " "AFSR %#lx\n", (u_long)afar, (u_long)afsr); /* Clear the error bits that we caught. */ PSYCHO_WRITE8(sc, PSR_CE_AFS, afsr & CEAFSR_ERRMASK); mtx_unlock_spin(sc->sc_mtx); return (FILTER_HANDLED); } static int psycho_pci_bus(void *arg) { struct psycho_softc *sc = arg; uint64_t afar, afsr; afar = PCICTL_READ8(sc, PCR_AFA); afsr = PCICTL_READ8(sc, PCR_AFS); panic("%s: PCI bus %c error AFAR %#lx AFSR %#lx", device_get_name(sc->sc_dev), 'A' + sc->sc_half, (u_long)afar, (u_long)afsr); return (FILTER_HANDLED); } static int psycho_powerfail(void *arg) { #ifdef DEBUGGER_ON_POWERFAIL struct psycho_softc *sc = arg; - kdb_enter("powerfail"); + kdb_enter_why(KDB_WHY_POWERFAIL, "powerfail"); #else static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) return (FILTER_HANDLED); shutdown++; printf("Power Failure Detected: Shutting down NOW.\n"); shutdown_nice(0); #endif return (FILTER_HANDLED); } static void psycho_overtemp(void *arg) { static int shutdown; /* As the interrupt is cleared we may be called multiple times. */ if (shutdown != 0) return; shutdown++; printf("DANGER: OVER TEMPERATURE detected.\nShutting down NOW.\n"); shutdown_nice(RB_POWEROFF); } #ifdef PSYCHO_MAP_WAKEUP static int psycho_wakeup(void *arg) { struct psycho_softc *sc = arg; /* Gee, we don't really have a framework to deal with this properly. */ device_printf(sc->sc_dev, "power management wakeup\n"); return (FILTER_HANDLED); } #endif /* PSYCHO_MAP_WAKEUP */ static void psycho_iommu_init(struct psycho_softc *sc, int tsbsize, uint32_t dvmabase) { char *name; struct iommu_state *is = sc->sc_is; /* Punch in our copies. */ is->is_bustag = rman_get_bustag(sc->sc_mem_res); is->is_bushandle = rman_get_bushandle(sc->sc_mem_res); is->is_iommu = PSR_IOMMU; is->is_dtag = PSR_IOMMU_TLB_TAG_DIAG; is->is_ddram = PSR_IOMMU_TLB_DATA_DIAG; is->is_dqueue = PSR_IOMMU_QUEUE_DIAG; is->is_dva = PSR_IOMMU_SVADIAG; is->is_dtcmp = PSR_IOMMU_TLB_CMP_DIAG; /* Give us a nice name... */ name = malloc(32, M_DEVBUF, M_NOWAIT); if (name == NULL) panic("%s: could not malloc iommu name", __func__); snprintf(name, 32, "%s dvma", device_get_nameunit(sc->sc_dev)); iommu_init(name, is, tsbsize, dvmabase, 0); } static int psycho_maxslots(device_t dev) { /* XXX: is this correct? */ return (PCI_SLOTMAX); } static uint32_t psycho_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, int width) { struct psycho_softc *sc; bus_space_handle_t bh; u_long offset = 0; uint8_t byte; uint16_t shrt; uint32_t r, wrd; int i; sc = device_get_softc(dev); bh = sc->sc_pci_bh[OFW_PCI_CS_CONFIG]; /* * The Hummingbird and Sabre bridges are picky in that they * only allow their config space to be accessed using the * "native" width of the respective register being accessed * and return semi-random other content of their config space * otherwise. Given that the PCI specs don't say anything * about such a (unusual) limitation and lots of stuff expects * to be able to access the contents of the config space at * any width we allow just that. We do this by using a copy * of the header of the bridge (the rest is all zero anyway) * read during attach (expect for PCIR_STATUS) in order to * simplify things. * The Psycho bridges contain a dupe of their header at 0x80 * which we nullify that way also. */ if (bus == sc->sc_pci_secbus && slot == PCS_DEVICE && func == PCS_FUNC) { if (offset % width != 0) return (-1); if (reg >= sizeof(sc->sc_pci_hpbcfg)) return (0); if ((reg < PCIR_STATUS && reg + width > PCIR_STATUS) || reg == PCIR_STATUS || reg == PCIR_STATUS + 1) le16enc(&sc->sc_pci_hpbcfg[PCIR_STATUS], bus_space_read_2(sc->sc_pci_cfgt, bh, PSYCHO_CONF_OFF(sc->sc_pci_secbus, PCS_DEVICE, PCS_FUNC, PCIR_STATUS))); switch (width) { case 1: return (sc->sc_pci_hpbcfg[reg]); case 2: return (le16dec(&sc->sc_pci_hpbcfg[reg])); case 4: return (le32dec(&sc->sc_pci_hpbcfg[reg])); } } offset = PSYCHO_CONF_OFF(bus, slot, func, reg); switch (width) { case 1: i = bus_space_peek_1(sc->sc_pci_cfgt, bh, offset, &byte); r = byte; break; case 2: i = bus_space_peek_2(sc->sc_pci_cfgt, bh, offset, &shrt); r = shrt; break; case 4: i = bus_space_peek_4(sc->sc_pci_cfgt, bh, offset, &wrd); r = wrd; break; default: panic("%s: bad width", __func__); } if (i) { #ifdef PSYCHO_DEBUG printf("%s: read data error reading: %d.%d.%d: 0x%x\n", __func__, bus, slot, func, reg); #endif r = -1; } return (r); } static void psycho_write_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg, uint32_t val, int width) { struct psycho_softc *sc; bus_space_handle_t bh; u_long offset = 0; sc = device_get_softc(dev); offset = PSYCHO_CONF_OFF(bus, slot, func, reg); bh = sc->sc_pci_bh[OFW_PCI_CS_CONFIG]; switch (width) { case 1: bus_space_write_1(sc->sc_pci_cfgt, bh, offset, val); break; case 2: bus_space_write_2(sc->sc_pci_cfgt, bh, offset, val); break; case 4: bus_space_write_4(sc->sc_pci_cfgt, bh, offset, val); break; default: panic("%s: bad width", __func__); } } static int psycho_route_interrupt(device_t bridge, device_t dev, int pin) { struct psycho_softc *sc; struct ofw_pci_register reg; bus_addr_t intrmap; ofw_pci_intr_t pintr, mintr; uint8_t maskbuf[sizeof(reg) + sizeof(pintr)]; sc = device_get_softc(bridge); pintr = pin; if (ofw_bus_lookup_imap(ofw_bus_get_node(dev), &sc->sc_pci_iinfo, ®, sizeof(reg), &pintr, sizeof(pintr), &mintr, sizeof(mintr), maskbuf)) return (mintr); /* * If this is outside of the range for an intpin, it's likely a full * INO, and no mapping is required at all; this happens on the U30, * where there's no interrupt map at the Psycho node. Fortunately, * there seem to be no INOs in the intpin range on this boxen, so * this easy heuristics will do. */ if (pin > 4) return (pin); /* * Guess the INO; we always assume that this is a non-OBIO * device, and that pin is a "real" intpin number. Determine * the mapping register to be used by the slot number. * We only need to do this on E450s, it seems; here, the slot numbers * for bus A are one-based, while those for bus B seemingly have an * offset of 2 (hence the factor of 3 below). */ intrmap = PSR_PCIA0_INT_MAP + 8 * (pci_get_slot(dev) - 1 + 3 * sc->sc_half); mintr = INTINO(PSYCHO_READ8(sc, intrmap)) + pin - 1; device_printf(bridge, "guessing interrupt %d for device %d.%d pin %d\n", (int)mintr, pci_get_slot(dev), pci_get_function(dev), pin); return (mintr); } static int psycho_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct psycho_softc *sc; sc = device_get_softc(dev); switch (which) { case PCIB_IVAR_DOMAIN: *result = 0; return (0); case PCIB_IVAR_BUS: *result = sc->sc_pci_secbus; return (0); } return (ENOENT); } static int psycho_dmasync(void *arg) { struct psycho_dmasync *pds = arg; (void)PCIB_READ_CONFIG(pds->pds_ppb, pds->pds_bus, pds->pds_slot, pds->pds_func, PCIR_VENDOR, 2); (void)PSYCHO_READ8(pds->pds_sc, PSR_DMA_WRITE_SYNC); return (pds->pds_handler(pds->pds_arg)); } static void psycho_intr_enable(void *arg) { struct intr_vector *iv = arg; struct psycho_icarg *pica = iv->iv_icarg; PSYCHO_WRITE8(pica->pica_sc, pica->pica_map, INTMAP_ENABLE(iv->iv_vec, iv->iv_mid)); } static void psycho_intr_disable(void *arg) { struct intr_vector *iv = arg; struct psycho_icarg *pica = iv->iv_icarg; PSYCHO_WRITE8(pica->pica_sc, pica->pica_map, iv->iv_vec); } static void psycho_intr_eoi(void *arg) { struct intr_vector *iv = arg; struct psycho_icarg *pica = iv->iv_icarg; PSYCHO_WRITE8(pica->pica_sc, pica->pica_clr, 0); } static int psycho_setup_intr(device_t dev, device_t child, struct resource *ires, int flags, driver_filter_t *filt, driver_intr_t *intr, void *arg, void **cookiep) { struct { int apb:1; int ppb:1; } found; devclass_t pci_devclass; device_t cdev, pdev, pcidev; struct psycho_softc *sc; struct psycho_dmasync *pds; u_long vec; int error; sc = device_get_softc(dev); /* * Make sure the vector is fully specified and we registered * our interrupt controller for it. */ vec = rman_get_start(ires); if (INTIGN(vec) != sc->sc_ign || intr_vectors[vec].iv_ic != &psycho_ic) { device_printf(dev, "invalid interrupt vector 0x%lx\n", vec); return (EINVAL); } /* * The Sabre-APB-combination has a bug where it does not drain * DMA write data for devices behind additional PCI-PCI bridges * underneath the APB PCI-PCI bridge. The workaround is to do * a read on the farest PCI-PCI bridge followed by a read of the * PCI DMA write sync register of the Sabre. * XXX installing the wrapper for an affected device and the * actual workaround in psycho_dmasync() should be moved to * psycho(4)-specific bus_dma_tag_create() and bus_dmamap_sync() * methods, respectively, once DMA tag creation is newbus'ified, * so the workaround isn't only applied for interrupt handlers * but also for polling(4) callbacks. */ if (sc->sc_mode == PSYCHO_MODE_SABRE) { pds = malloc(sizeof(*pds), M_DEVBUF, M_NOWAIT | M_ZERO); if (pds == NULL) return (ENOMEM); pcidev = NULL; found.apb = found.ppb = 0; pci_devclass = devclass_find("pci"); for (cdev = child; cdev != dev; cdev = pdev) { pdev = device_get_parent(cdev); if (pcidev == NULL) { if (device_get_devclass(pdev) != pci_devclass) continue; pcidev = cdev; continue; } /* * NB: APB would also match as PCI-PCI bridges. */ if (pci_get_vendor(cdev) == 0x108e && pci_get_device(cdev) == 0x5000) { found.apb = 1; break; } if (pci_get_class(cdev) == PCIC_BRIDGE && pci_get_subclass(cdev) == PCIS_BRIDGE_PCI) found.ppb = 1; } if (found.apb && found.ppb && pcidev != NULL) { pds->pds_sc = sc; pds->pds_arg = arg; pds->pds_ppb = device_get_parent(device_get_parent(pcidev)); pds->pds_bus = pci_get_bus(pcidev); pds->pds_slot = pci_get_slot(pcidev); pds->pds_func = pci_get_function(pcidev); if (bootverbose) device_printf(dev, "installed DMA sync " "workaround for device %d.%d on bus %d\n", pds->pds_slot, pds->pds_func, pds->pds_bus); if (intr == NULL) { pds->pds_handler = filt; error = bus_generic_setup_intr(dev, child, ires, flags, psycho_dmasync, intr, pds, cookiep); } else { pds->pds_handler = (driver_filter_t *)intr; error = bus_generic_setup_intr(dev, child, ires, flags, filt, (driver_intr_t *)psycho_dmasync, pds, cookiep); } } else error = bus_generic_setup_intr(dev, child, ires, flags, filt, intr, arg, cookiep); if (error != 0) { free(pds, M_DEVBUF); return (error); } pds->pds_cookie = *cookiep; *cookiep = pds; return (error); } return (bus_generic_setup_intr(dev, child, ires, flags, filt, intr, arg, cookiep)); } static int psycho_teardown_intr(device_t dev, device_t child, struct resource *vec, void *cookie) { struct psycho_softc *sc; struct psycho_dmasync *pds; int error; sc = device_get_softc(dev); if (sc->sc_mode == PSYCHO_MODE_SABRE) { pds = cookie; error = bus_generic_teardown_intr(dev, child, vec, pds->pds_cookie); if (error == 0) free(pds, M_DEVBUF); return (error); } return (bus_generic_teardown_intr(dev, child, vec, cookie)); } static struct resource * psycho_alloc_resource(device_t bus, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { struct psycho_softc *sc; struct resource *rv; struct rman *rm; bus_space_tag_t bt; bus_space_handle_t bh; int needactivate = flags & RF_ACTIVE; flags &= ~RF_ACTIVE; sc = device_get_softc(bus); if (type == SYS_RES_IRQ) { /* * XXX: Don't accept blank ranges for now, only single * interrupts. The other case should not happen with the * MI PCI code... * XXX: This may return a resource that is out of the * range that was specified. Is this correct...? */ if (start != end) panic("%s: XXX: interrupt range", __func__); start = end = INTMAP_VEC(sc->sc_ign, end); return (BUS_ALLOC_RESOURCE(device_get_parent(bus), child, type, rid, start, end, count, flags)); } switch (type) { case SYS_RES_MEMORY: rm = &sc->sc_pci_mem_rman; bt = sc->sc_pci_memt; bh = sc->sc_pci_bh[OFW_PCI_CS_MEM32]; break; case SYS_RES_IOPORT: rm = &sc->sc_pci_io_rman; bt = sc->sc_pci_iot; bh = sc->sc_pci_bh[OFW_PCI_CS_IO]; break; default: return (NULL); } rv = rman_reserve_resource(rm, start, end, count, flags, child); if (rv == NULL) return (NULL); rman_set_rid(rv, *rid); bh += rman_get_start(rv); rman_set_bustag(rv, bt); rman_set_bushandle(rv, bh); if (needactivate) { if (bus_activate_resource(child, type, *rid, rv)) { rman_release_resource(rv); return (NULL); } } return (rv); } static int psycho_activate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { void *p; int error; if (type == SYS_RES_IRQ) return (BUS_ACTIVATE_RESOURCE(device_get_parent(bus), child, type, rid, r)); if (type == SYS_RES_MEMORY) { /* * Need to memory-map the device space, as some drivers depend * on the virtual address being set and useable. */ error = sparc64_bus_mem_map(rman_get_bustag(r), rman_get_bushandle(r), rman_get_size(r), 0, 0, &p); if (error != 0) return (error); rman_set_virtual(r, p); } return (rman_activate_resource(r)); } static int psycho_deactivate_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { if (type == SYS_RES_IRQ) return (BUS_DEACTIVATE_RESOURCE(device_get_parent(bus), child, type, rid, r)); if (type == SYS_RES_MEMORY) { sparc64_bus_mem_unmap(rman_get_virtual(r), rman_get_size(r)); rman_set_virtual(r, NULL); } return (rman_deactivate_resource(r)); } static int psycho_release_resource(device_t bus, device_t child, int type, int rid, struct resource *r) { int error; if (type == SYS_RES_IRQ) return (BUS_RELEASE_RESOURCE(device_get_parent(bus), child, type, rid, r)); if (rman_get_flags(r) & RF_ACTIVE) { error = bus_deactivate_resource(child, type, rid, r); if (error) return (error); } return (rman_release_resource(r)); } static bus_dma_tag_t psycho_get_dma_tag(device_t bus, device_t child) { struct psycho_softc *sc; sc = device_get_softc(bus); return (sc->sc_pci_dmat); } static int psycho_intr_pending(device_t dev, ofw_pci_intr_t intr) { struct psycho_softc *sc; u_long diag; sc = device_get_softc(dev); if (psycho_find_intrmap(sc, intr, NULL, NULL, &diag) == 0) { device_printf(dev, "%s: mapping not found for %d\n", __func__, intr); return (0); } return (diag != 0); } static phandle_t psycho_get_node(device_t bus, device_t dev) { struct psycho_softc *sc; sc = device_get_softc(bus); /* We only have one child, the PCI bus, which needs our own node. */ return (sc->sc_node); } static int psycho_alloc_busno(device_t dev) { if (psycho_pci_bus_cnt == PCI_BUSMAX) panic("%s: out of PCI bus numbers", __func__); return (psycho_pci_bus_cnt++); } static void psycho_adjust_busrange(device_t dev, u_int subbus) { struct psycho_softc *sc; sc = device_get_softc(dev); /* If necessary, adjust the subordinate bus number register. */ if (subbus > sc->sc_pci_subbus) { #ifdef PSYCHO_DEBUG device_printf(dev, "adjusting subordinate bus number from %d to %d\n", sc->sc_pci_subbus, subbus); #endif sc->sc_pci_subbus = subbus; PCIB_WRITE_CONFIG(dev, sc->sc_pci_secbus, PCS_DEVICE, PCS_FUNC, PCSR_SUBBUS, subbus, 1); } } static bus_space_tag_t psycho_alloc_bus_tag(struct psycho_softc *sc, int type) { bus_space_tag_t bt; bt = malloc(sizeof(struct bus_space_tag), M_DEVBUF, M_NOWAIT | M_ZERO); if (bt == NULL) panic("%s: out of memory", __func__); bt->bst_cookie = sc; bt->bst_parent = rman_get_bustag(sc->sc_mem_res); bt->bst_type = type; return (bt); } Index: stable/7/sys/sparc64/sparc64/machdep.c =================================================================== --- stable/7/sys/sparc64/sparc64/machdep.c (revision 177733) +++ stable/7/sys/sparc64/sparc64/machdep.c (revision 177734) @@ -1,912 +1,913 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef int ofw_vec_t(void *); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; /* * Note: timer quality for CPU's is set low to try and prevent them from * being chosen as the primary timecounter. The CPU counters are not * synchronized among the CPU's so in MP machines this causes problems * when calculating the time. With this value the CPU's should only be * chosen as the primary timecounter as a last resort. */ #define UP_TICK_QUALITY 1000 #define MP_TICK_QUALITY -100 static struct timecounter tick_tc; char sparc64_model[32]; static int cpu_use_vis = 1; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; static timecounter_get_t tick_get_timecount; void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; tick_tc.tc_get_timecount = tick_get_timecount; tick_tc.tc_poll_pps = NULL; tick_tc.tc_counter_mask = ~0u; tick_tc.tc_frequency = tick_freq; tick_tc.tc_name = "tick"; tick_tc.tc_quality = UP_TICK_QUALITY; #ifdef SMP /* * We do not know if each CPU's tick counter is synchronized. */ if (cpu_mp_probe()) tick_tc.tc_quality = MP_TICK_QUALITY; #endif tc_init(&tick_tc); physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz / PAGE_SIZE; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", cnt.v_free_count * PAGE_SIZE, cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); cpu_identify(rdpr(ver), tick_freq, PCPU_GET(cpuid)); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = rdpr(pil); wrpr(pil, 0, PIL_TICK); td->td_md.md_saved_pil = pil; } td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) wrpr(pil, td->td_md.md_saved_pil, 0); } unsigned tick_get_timecount(struct timecounter *tc) { return ((unsigned)rd(tick)); } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { phandle_t child; phandle_t root; struct pcpu *pc; vm_offset_t end; caddr_t kmdp; u_int clock; char *env; char type[8]; end = 0; kmdp = NULL; /* * Find out what kind of cpu we have first, for anything that changes * behaviour. */ cpu_impl = VER_IMPL(rdpr(ver)); /* * Initialize Open Firmware (needed for console). */ OF_init(vec); /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } init_param1(); root = OF_peer(0); for (child = OF_child(root); child != 0; child = OF_peer(child)) { OF_getprop(child, "device_type", type, sizeof(type)); if (strcmp(type, "cpu") == 0) break; } /* * Initialize the tick counter. Must be before the console is inited * in order to provide the low-level console drivers with a working * DELAY(). */ OF_getprop(child, "clock-frequency", &clock, sizeof(clock)); tick_init(clock); /* * Initialize the console before printing anything. */ cninit(); /* * Panic if there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL) { printf("sparc64_init: no loader metadata.\n" "This probably means you are not using loader(8).\n"); panic("sparc64_init"); } /* * Sanity check the kernel end, which is important. */ if (end == 0) { printf("sparc64_init: warning, kernel end not specified.\n" "Attempting to continue anyway.\n"); end = (vm_offset_t)_end; } cache_init(child); uma_set_align(cache.dc_linesize - 1); cpu_block_copy = bcopy; cpu_block_zero = bzero; getenv_int("machdep.use_vis", &cpu_use_vis); if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: cpu_block_copy = spitfire_block_copy; cpu_block_zero = spitfire_block_zero; break; } } #ifdef SMP mp_init(); #endif /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(end); /* * Initialize tunables. */ init_param2(physmem); env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize the interrupt tables. */ intr_init1(); /* * Initialize proc0 stuff (p_contested needs to be done early). */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; thread0.td_kstack = kstack0; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; /* * Prime our per-cpu data page for use. Note, we are using it for our * stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or * it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG)); pc->pc_addr = (vm_offset_t)pcpu0; pc->pc_node = child; pc->pc_tlb_ctx = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Initialize global registers. */ cpu_setregs(pc); /* * Initialize the message buffer (after setting trap table). */ msgbufinit(msgbufp, MSGBUF_SIZE); mutex_init(); intr_init2(); /* * Finish pmap initialization now that we're ready for mutexes. */ PMAP_LOCK_INIT(kernel_pmap); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif } void set_openfirm_callback(ofw_vec_t *vec) { ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; int oonstack; u_long sp; int sig; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal trampoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->mc_tpc, mc->mc_sp, mc->mc_tstate); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; bcopy(tf, mc, sizeof(*tf)); if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } mc->mc_flags = _MC_VERSION; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); tf->tf_fprs &= ~FPRS_FEF; pcb->pcb_flags |= PCB_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; uint64_t wstate; if (!TSTATE_SECURE(mc->mc_tstate) || (mc->mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); wstate = tf->tf_wstate; bcopy(mc, tf, sizeof(*tf)); tf->tf_wstate = wstate; if ((mc->mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif openfirmware_exit(args); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(void) { /* Insert code to halt (until next interrupt) for the idle loop */ } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct pcb *pcb; struct proc *p; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = entry + 4; tf->tf_tpc = entry; tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } Index: stable/7/sys/sparc64/sparc64/trap.c =================================================================== --- stable/7/sys/sparc64/sparc64/trap.c (revision 177733) +++ stable/7/sys/sparc64/sparc64/trap.c (revision 177734) @@ -1,671 +1,671 @@ /*- * Copyright (c) 2001, Jake Burkholder * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_ktr.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void trap(struct trapframe *tf); void syscall(struct trapframe *tf); static int trap_pfault(struct thread *td, struct trapframe *tf); extern char copy_fault[]; extern char copy_nofault_begin[]; extern char copy_nofault_end[]; extern char fs_fault[]; extern char fs_nofault_begin[]; extern char fs_nofault_end[]; extern char fs_nofault_intr_begin[]; extern char fs_nofault_intr_end[]; extern char fas_fault[]; extern char fas_nofault_begin[]; extern char fas_nofault_end[]; extern char *syscallnames[]; const char *trap_msg[] = { "reserved", "instruction access exception", "instruction access error", "instruction access protection", "illtrap instruction", "illegal instruction", "privileged opcode", "floating point disabled", "floating point exception ieee 754", "floating point exception other", "tag overflow", "division by zero", "data access exception", "data access error", "data access protection", "memory address not aligned", "privileged action", "async data error", "trap instruction 16", "trap instruction 17", "trap instruction 18", "trap instruction 19", "trap instruction 20", "trap instruction 21", "trap instruction 22", "trap instruction 23", "trap instruction 24", "trap instruction 25", "trap instruction 26", "trap instruction 27", "trap instruction 28", "trap instruction 29", "trap instruction 30", "trap instruction 31", "fast instruction access mmu miss", "fast data access mmu miss", "interrupt", "physical address watchpoint", "virtual address watchpoint", "corrected ecc error", "spill", "fill", "fill", "breakpoint", "clean window", "range check", "fix alignment", "integer overflow", "syscall", "restore physical watchpoint", "restore virtual watchpoint", "kernel stack fault", }; static const int trap_sig[] = { SIGILL, /* reserved */ SIGILL, /* instruction access exception */ SIGILL, /* instruction access error */ SIGILL, /* instruction access protection */ SIGILL, /* illtrap instruction */ SIGILL, /* illegal instruction */ SIGBUS, /* privileged opcode */ SIGFPE, /* floating point disabled */ SIGFPE, /* floating point exception ieee 754 */ SIGFPE, /* floating point exception other */ SIGEMT, /* tag overflow */ SIGFPE, /* division by zero */ SIGILL, /* data access exception */ SIGILL, /* data access error */ SIGBUS, /* data access protection */ SIGBUS, /* memory address not aligned */ SIGBUS, /* privileged action */ SIGBUS, /* async data error */ SIGILL, /* trap instruction 16 */ SIGILL, /* trap instruction 17 */ SIGILL, /* trap instruction 18 */ SIGILL, /* trap instruction 19 */ SIGILL, /* trap instruction 20 */ SIGILL, /* trap instruction 21 */ SIGILL, /* trap instruction 22 */ SIGILL, /* trap instruction 23 */ SIGILL, /* trap instruction 24 */ SIGILL, /* trap instruction 25 */ SIGILL, /* trap instruction 26 */ SIGILL, /* trap instruction 27 */ SIGILL, /* trap instruction 28 */ SIGILL, /* trap instruction 29 */ SIGILL, /* trap instruction 30 */ SIGILL, /* trap instruction 31 */ SIGSEGV, /* fast instruction access mmu miss */ SIGSEGV, /* fast data access mmu miss */ -1, /* interrupt */ -1, /* physical address watchpoint */ -1, /* virtual address watchpoint */ -1, /* corrected ecc error */ SIGILL, /* spill */ SIGILL, /* fill */ SIGILL, /* fill */ SIGTRAP, /* breakpoint */ SIGILL, /* clean window */ SIGILL, /* range check */ SIGILL, /* fix alignment */ SIGILL, /* integer overflow */ SIGSYS, /* syscall */ -1, /* restore physical watchpoint */ -1, /* restore virtual watchpoint */ -1, /* kernel stack fault */ }; CTASSERT(sizeof(struct trapframe) == 256); int debugger_on_signal = 0; SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW, &debugger_on_signal, 0, ""); void trap(struct trapframe *tf) { struct thread *td; struct proc *p; int error; int sig; register_t addr; ksiginfo_t ksi; td = PCPU_GET(curthread); CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td, trap_msg[tf->tf_type & ~T_KERNEL], (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil)); PCPU_INC(cnt.v_trap); if ((tf->tf_tstate & TSTATE_PRIV) == 0) { KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); p = td->td_proc; td->td_pticks = 0; td->td_frame = tf; addr = tf->tf_tpc; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (tf->tf_type) { case T_DATA_MISS: case T_DATA_PROTECTION: addr = tf->tf_sfar; /* FALLTHROUGH */ case T_INSTRUCTION_MISS: sig = trap_pfault(td, tf); break; case T_FILL: sig = rwindow_load(td, tf, 2); break; case T_FILL_RET: sig = rwindow_load(td, tf, 1); break; case T_SPILL: sig = rwindow_save(td); break; default: if (tf->tf_type < 0 || tf->tf_type >= T_MAX || trap_sig[tf->tf_type] == -1) panic("trap: bad trap type"); sig = trap_sig[tf->tf_type]; break; } if (sig != 0) { /* Translate fault for emulators. */ if (p->p_sysent->sv_transtrap != NULL) { sig = p->p_sysent->sv_transtrap(sig, tf->tf_type); } if (debugger_on_signal && (sig == 4 || sig == 10 || sig == 11)) - kdb_enter("trapsig"); + kdb_enter_why(KDB_WHY_TRAPSIG, "trapsig"); ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int)tf->tf_type; /* XXX not POSIX */ ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = (int)tf->tf_type; trapsignal(td, &ksi); } userret(td, tf); mtx_assert(&Giant, MA_NOTOWNED); } else { KASSERT((tf->tf_type & T_KERNEL) != 0, ("trap: kernel trap isn't")); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif switch (tf->tf_type & ~T_KERNEL) { #ifdef KDB case T_BREAKPOINT: case T_KSTACK_FAULT: error = (kdb_trap(tf->tf_type, 0, tf) == 0); TF_DONE(tf); break; #ifdef notyet case T_PA_WATCHPOINT: case T_VA_WATCHPOINT: error = db_watch_trap(tf); break; #endif #endif case T_DATA_MISS: case T_DATA_PROTECTION: case T_INSTRUCTION_MISS: error = trap_pfault(td, tf); break; case T_DATA_EXCEPTION: case T_MEM_ADDRESS_NOT_ALIGNED: if ((tf->tf_sfsr & MMU_SFSR_FV) != 0 && MMU_SFSR_GET_ASI(tf->tf_sfsr) == ASI_AIUP) { if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } } error = 1; break; case T_DATA_ERROR: /* * Handle PCI poke/peek as per UltraSPARC IIi * User's Manual 16.2.1, modulo checking the * TPC as USIII CPUs generate a precise trap * instead of a special deferred one. */ if (tf->tf_tpc > (u_long)fas_nofault_begin && tf->tf_tpc < (u_long)fas_nofault_end) { cache_flush(); cache_enable(); tf->tf_tpc = (u_long)fas_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } error = 1; break; default: error = 1; break; } if (error != 0) panic("trap: %s", trap_msg[tf->tf_type & ~T_KERNEL]); } CTR1(KTR_TRAP, "trap: td=%p return", td); } static int trap_pfault(struct thread *td, struct trapframe *tf) { struct vmspace *vm; struct pcb *pcb; struct proc *p; vm_offset_t va; vm_prot_t prot; u_long ctx; int flags; int type; int rv; if (td == NULL) return (-1); KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL")); KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL")); KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); p = td->td_proc; rv = KERN_SUCCESS; ctx = TLB_TAR_CTX(tf->tf_tar); pcb = td->td_pcb; type = tf->tf_type & ~T_KERNEL; va = TLB_TAR_VA(tf->tf_tar); CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx", td, p->p_vmspace->vm_pmap.pm_context[PCPU_GET(cpuid)], va, ctx); if (type == T_DATA_PROTECTION) { prot = VM_PROT_WRITE; flags = VM_FAULT_DIRTY; } else { if (type == T_DATA_MISS) prot = VM_PROT_READ; else prot = VM_PROT_READ | VM_PROT_EXECUTE; flags = VM_FAULT_NORMAL; } if (ctx != TLB_CTX_KERNEL) { if ((tf->tf_tstate & TSTATE_PRIV) != 0 && (tf->tf_tpc >= (u_long)fs_nofault_intr_begin && tf->tf_tpc <= (u_long)fs_nofault_intr_end)) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } /* * This is a fault on non-kernel virtual memory. */ vm = p->p_vmspace; /* * Keep swapout from messing with us during this * critical time. */ PROC_LOCK(p); ++p->p_lock; PROC_UNLOCK(p); /* Fault in the user page. */ rv = vm_fault(&vm->vm_map, va, prot, flags); /* * Now the process can be swapped again. */ PROC_LOCK(p); --p->p_lock; PROC_UNLOCK(p); } else { /* * This is a fault on kernel virtual memory. Attempts to * access kernel memory from user mode cause privileged * action traps, not page fault. */ KASSERT(tf->tf_tstate & TSTATE_PRIV, ("trap_pfault: fault on nucleus context from user mode")); /* * Don't have to worry about process locking or stacks in the * kernel. */ rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL); } CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", td, va, rv); if (rv == KERN_SUCCESS) return (0); if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) { if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } /* Maximum number of arguments that can be passed via the out registers. */ #define REG_MAXARGS 6 /* * Syscall handler. The arguments to the syscall are passed in the o registers * by the caller, and are saved in the trap frame. The syscall number is passed * in %g1 (and also saved in the trap frame). */ void syscall(struct trapframe *tf) { struct sysent *callp; struct thread *td; register_t args[8]; register_t *argp; struct proc *p; u_long code; u_long tpc; int reg; int regcnt; int narg; int error; td = PCPU_GET(curthread); KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); p = td->td_proc; PCPU_INC(cnt.v_syscall); narg = 0; error = 0; reg = 0; regcnt = REG_MAXARGS; td->td_pticks = 0; td->td_frame = tf; if (td->td_ucred != p->p_ucred) cred_update_thread(td); #ifdef KSE if (p->p_flag & P_SA) thread_user_enter(td); #endif code = tf->tf_global[1]; /* * For syscalls, we don't want to retry the faulting instruction * (usually), instead we need to advance one instruction. */ tpc = tf->tf_tpc; TF_DONE(tf); if (p->p_sysent->sv_prepsyscall) { /* * The prep code is MP aware. */ #if 0 (*p->p_sysent->sv_prepsyscall)(tf, args, &code, ¶ms); #endif } else if (code == SYS_syscall || code == SYS___syscall) { code = tf->tf_out[reg++]; regcnt--; } if (p->p_sysent->sv_mask) code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg; if (narg <= regcnt) { argp = &tf->tf_out[reg]; error = 0; } else { KASSERT(narg <= sizeof(args) / sizeof(args[0]), ("Too many syscall arguments!")); argp = args; bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt); error = copyin((void *)(tf->tf_out[6] + SPOFF + offsetof(struct frame, fr_pad[6])), &args[regcnt], (narg - regcnt) * sizeof(args[0])); } CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td, syscallnames[code], argp[0], argp[1], argp[2]); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(code, narg, argp); #endif td->td_syscalls++; if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; STOPEVENT(p, S_SCE, narg); /* MP aware */ PTRACESTOP_SC(p, td, S_PT_SCE); AUDIT_SYSCALL_ENTER(code, td); error = (*callp->sy_call)(td, argp); AUDIT_SYSCALL_EXIT(error, td); CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p, error, syscallnames[code], td->td_retval[0], td->td_retval[1]); } /* * MP SAFE (we may or may not have the MP lock at this point) */ switch (error) { case 0: tf->tf_out[0] = td->td_retval[0]; tf->tf_out[1] = td->td_retval[1]; tf->tf_tstate &= ~TSTATE_XCC_C; break; case ERESTART: /* * Undo the tpc advancement we have done above, we want to * reexecute the system call. */ tf->tf_tpc = tpc; tf->tf_tnpc -= 4; break; case EJUSTRETURN: break; default: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } tf->tf_out[0] = error; tf->tf_tstate |= TSTATE_XCC_C; break; } /* * Check for misbehavior. */ WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); KASSERT(td->td_critnest == 0, ("System call %s returning in a critical section", (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???")); KASSERT(td->td_locks == 0, ("System call %s returning with %d locks held", (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???", td->td_locks)); /* * Handle reschedule and other end-of-syscall issues */ userret(td, tf); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); PTRACESTOP_SC(p, td, S_PT_SCX); } Index: stable/7/sys/sun4v/sun4v/hvcons.c =================================================================== --- stable/7/sys/sun4v/sun4v/hvcons.c (revision 177733) +++ stable/7/sys/sun4v/sun4v/hvcons.c (revision 177734) @@ -1,421 +1,424 @@ /*- * Copyright (C) 2006 Kip Macy * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Kip Macy ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL Kip Macy BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mdesc_bus_if.h" #include "opt_simulator.h" #include #include #include #define HVCN_POLL_FREQ 10 static d_open_t hvcn_open; static d_close_t hvcn_close; static struct cdevsw hvcn_cdevsw = { .d_version = D_VERSION, .d_open = hvcn_open, .d_close = hvcn_close, .d_name = "hvcn", .d_flags = D_TTY | D_NEEDGIANT, }; #define PCBURST 16 static struct tty *hvcn_tp = NULL; static struct resource *hvcn_irq; static void *hvcn_intrhand; static int bufindex; static int buflen; static u_char buf[PCBURST]; static int polltime; static struct callout_handle hvcn_timeouthandle = CALLOUT_HANDLE_INITIALIZER(&hvcn_timeouthandle); #if defined(KDB) static int alt_break_state; #endif static void hvcn_tty_start(struct tty *); static int hvcn_tty_param(struct tty *, struct termios *); static void hvcn_tty_stop(struct tty *, int); static void hvcn_timeout(void *); static cn_probe_t hvcn_cnprobe; static cn_init_t hvcn_cninit; static cn_getc_t hvcn_cngetc; static cn_putc_t hvcn_cnputc; static cn_term_t hvcn_cnterm; CONSOLE_DRIVER(hvcn); void hv_cnputs(char *p) { int c, error; while ((c = *p++) != '\0') { if (c == '\n') { do { error = hv_cons_putchar('\r'); } while (error == H_EWOULDBLOCK); } do { error = hv_cons_putchar(c); } while (error == H_EWOULDBLOCK); } } static int hvcn_open(struct cdev *dev, int flag, int mode, struct thread *td) { struct tty *tp; int error, setuptimeout; setuptimeout = 0; if (dev->si_tty == NULL) { hvcn_tp = ttyalloc(); dev->si_tty = hvcn_tp; hvcn_tp->t_dev = dev; } tp = dev->si_tty; tp->t_oproc = hvcn_tty_start; tp->t_param = hvcn_tty_param; tp->t_stop = hvcn_tty_stop; if ((tp->t_state & TS_ISOPEN) == 0) { tp->t_state |= TS_CARR_ON; ttyconsolemode(tp, 0); setuptimeout = 1; } else if ((tp->t_state & TS_XCLUDE) && priv_check(td, PRIV_TTY_EXCLUSIVE)) { return (EBUSY); } error = ttyld_open(tp, dev); #if defined(SIMULATOR) || 1 if (error == 0 && setuptimeout) { int polltime; polltime = hz / HVCN_POLL_FREQ; if (polltime < 1) { polltime = 1; } hvcn_timeouthandle = timeout(hvcn_timeout, tp, polltime); } #endif return (error); } static int hvcn_close(struct cdev *dev, int flag, int mode, struct thread *td) { int unit; struct tty *tp; unit = minor(dev); tp = dev->si_tty; if (unit != 0) return (ENXIO); untimeout(hvcn_timeout, tp, hvcn_timeouthandle); ttyld_close(tp, flag); tty_close(tp); return (0); } static void hvcn_cnprobe(struct consdev *cp) { #if 0 char name[64]; node = OF_peer(0); if (node == -1) panic("%s: OF_peer failed.", __func__); for (node = OF_child(node); node > 0; node = OF_peer(node)) { OF_getprop(node, "name", name, sizeof(name)); if (!strcmp(name, "virtual-devices")) break; } if (node == 0) goto done; for (node = OF_child(node); node > 0; node = OF_peer(node)) { OF_getprop(node, "name", name, sizeof(name)); if (!strcmp(name, "console")) break; } done: #endif cp->cn_pri = CN_NORMAL; } static void hvcn_cninit(struct consdev *cp) { sprintf(cp->cn_name, "hvcn"); } static int hvcn_cngetc(struct consdev *cp) { unsigned char ch; int l; ch = '\0'; while ((l = hv_cons_getchar(&ch)) != H_EOK) { #if defined(KDB) if (l == H_BREAK || l == H_HUP) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); if (kdb_alt_break(ch, &alt_break_state)) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, "Break sequence on console"); #endif if (l != -2 && l != 0) { return (-1); } } return (ch); } static int hvcn_cncheckc(struct consdev *cp) { unsigned char ch; int l; if ((l = hv_cons_getchar(&ch)) == H_EOK) { #if defined(KDB) if (l == H_BREAK || l == H_HUP) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); if (kdb_alt_break(ch, &alt_break_state)) - kdb_enter("Break sequence on console"); + kdb_enter_why(KDB_WHY_BREAK, + "Break sequence on console"); #endif return (ch); } return (-1); } static void hvcn_cnterm(struct consdev *cp) { ; } static void hvcn_cnputc(struct consdev *cp, int c) { int error; error = 0; do { if (c == '\n') error = hv_cons_putchar('\r'); } while (error == H_EWOULDBLOCK); do { error = hv_cons_putchar(c); } while (error == H_EWOULDBLOCK); } static int hvcn_tty_param(struct tty *tp, struct termios *t) { tp->t_ispeed = t->c_ispeed; tp->t_ospeed = t->c_ospeed; tp->t_cflag = t->c_cflag; return (0); } static void hvcn_tty_start(struct tty *tp) { if (!(tp->t_state & (TS_TIMEOUT | TS_BUSY | TS_TTSTOP))) { tp->t_state |= TS_BUSY; do { if (buflen == 0) { buflen = q_to_b(&tp->t_outq, buf, PCBURST); bufindex = 0; } while (buflen) { if (hv_cons_putchar(buf[bufindex]) == H_EWOULDBLOCK) goto done; bufindex++; buflen--; } } while (tp->t_outq.c_cc != 0); done: tp->t_state &= ~TS_BUSY; ttwwakeup(tp); } } static void hvcn_tty_stop(struct tty *tp, int flag) { if ((tp->t_state & TS_BUSY) && !(tp->t_state & TS_TTSTOP)) tp->t_state |= TS_FLUSH; } static void hvcn_intr(void *v) { struct tty *tp; int c; tp = (struct tty *)v; while ((c = hvcn_cncheckc(NULL)) != -1) if (tp->t_state & TS_ISOPEN) ttyld_rint(tp, c); if (tp->t_outq.c_cc != 0 || buflen != 0) hvcn_tty_start(tp); } static void hvcn_timeout(void *v) { hvcn_intr(v); hvcn_timeouthandle = timeout(hvcn_timeout, v, polltime); } static int hvcn_dev_probe(device_t dev) { if (strcmp(mdesc_bus_get_name(dev), "console")) return (ENXIO); device_set_desc(dev, "sun4v virtual console"); return (0); } static int hvcn_dev_attach(device_t dev) { struct cdev *cdev; int error, rid; /* belongs in attach - but attach is getting called multiple times * for reasons I have not delved into */ if (hvcn_consdev.cn_pri == CN_DEAD || hvcn_consdev.cn_name[0] == '\0') return (ENXIO); cdev = make_dev(&hvcn_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, "ttyv%r", 1); make_dev_alias(cdev, "hvcn"); rid = 0; if ((hvcn_irq = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_SHAREABLE | RF_ACTIVE)) == NULL) { device_printf(dev, "couldn't map interrupt\n"); error = ENXIO; goto fail; } error = bus_setup_intr(dev, hvcn_irq, INTR_TYPE_TTY, NULL, hvcn_intr, hvcn_tp, hvcn_intrhand); if (error) device_printf(dev, "couldn't set up irq\n"); fail: return (error); } static device_method_t hvcn_methods[] = { DEVMETHOD(device_probe, hvcn_dev_probe), DEVMETHOD(device_attach, hvcn_dev_attach), {0, 0} }; static driver_t hvcn_driver = { "hvcn", hvcn_methods, 0, }; static devclass_t hvcn_devclass; DRIVER_MODULE(hvcn, vnex, hvcn_driver, hvcn_devclass, 0, 0); Index: stable/7/sys/sun4v/sun4v/machdep.c =================================================================== --- stable/7/sys/sun4v/sun4v/machdep.c (revision 177733) +++ stable/7/sys/sun4v/sun4v/machdep.c (revision 177734) @@ -1,1001 +1,1002 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX move this to a header */ extern void mdesc_init(void); typedef int ofw_vec_t(void *); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; int trap_conversion[256]; vm_paddr_t mmu_fault_status_area; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; /* * Note: timer quality for CPU's is set low to try and prevent them from * being chosen as the primary timecounter. The CPU counters are not * synchronized among the CPU's so in MP machines this causes problems * when calculating the time. With this value the CPU's should only be * chosen as the primary timecounter as a last resort. */ #define UP_TICK_QUALITY 1000 #ifdef SUN4V #define MP_TICK_QUALITY 1000 #else #define MP_TICK_QUALITY -100 #endif static struct timecounter tick_tc; char sparc64_model[32]; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; static timecounter_get_t tick_get_timecount; void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); CTASSERT((sizeof(struct pcpu) & ((1<<6)-1)) == 0); #define BVPRINTF(x) \ if (bootverbose) \ printf(x); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; tick_tc.tc_get_timecount = tick_get_timecount; tick_tc.tc_poll_pps = NULL; tick_tc.tc_counter_mask = ~0u; tick_tc.tc_frequency = tick_freq; tick_tc.tc_name = "tick"; tick_tc.tc_quality = UP_TICK_QUALITY; #ifdef SMP /* * We do not know if each CPU's tick counter is synchronized. */ if (cpu_mp_probe()) tick_tc.tc_quality = MP_TICK_QUALITY; #endif tc_init(&tick_tc); physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", cnt.v_free_count * PAGE_SIZE, cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); #ifdef notyet cpu_identify(rdpr(ver), tick_freq, PCPU_GET(cpuid)); #endif } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = intr_disable(); td->td_md.md_saved_pil = pil; } td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) { intr_restore(td->td_md.md_saved_pil); } } unsigned tick_get_timecount(struct timecounter *tc) { return ((unsigned)rd(tick)); } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { phandle_t child; phandle_t root; struct pcpu *pc; vm_offset_t end; caddr_t kmdp; u_int clock; char *env; char type[8]; vm_paddr_t mmfsa; int i; end = 0; kmdp = NULL; /* * Initialize Open Firmware (needed for console). */ OF_init(vec); /* * XXX */ bootverbose = 1; /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } if (boothowto & RB_VERBOSE) bootverbose = 1; init_param1(); root = OF_peer(0); for (child = OF_child(root); child != 0; child = OF_peer(child)) { OF_getprop(child, "device_type", type, sizeof(type)); if (strcmp(type, "cpu") == 0) break; } OF_getprop(child, "clock-frequency", &clock, sizeof(clock)); /* * Initialize the console before printing anything. * console uses the pcpu area for serialization */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; cpu_setregs(pc); /* * Initialize proc0 stuff (p_contested needs to be done early). */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; if ((u_long)thread0.td_frame & 0x3f) { panic("unaligned frame0"); } /* * Prime our per-cpu data page for use. Note, we are using it for our * stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or * it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_addr = (vm_offset_t)pcpu0; cninit(); tick_init(clock); printf("cpu0: UltraSparc T1 Processor (%d.%02d MHz CPU)\n", (clock + 4999) / 1000000, ((clock + 4999) / 10000) % 100); /* * Panic is there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL) { printf("sparc64_init: no loader metadata.\n" "This probably means you are not using loader(8).\n"); panic("sparc64_init"); } /* * Sanity check the kernel end, which is important. */ if (end == 0) { printf("sparc64_init: warning, kernel end not specified.\n" "Attempting to continue anyway.\n"); end = (vm_offset_t)_end; } cpu_block_copy = bcopy; cpu_block_zero = bzero; #ifdef SMP mp_tramp = mp_tramp_alloc(); #endif env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize global registers. * needed for curthread to work */ cpu_setregs(pc); /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(end); thread0.td_kstack = kstack0; thread0.td_md.md_saved_pil = 0; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; thread0.td_pcb->pcb_kstack = (uint64_t)(((char *)thread0.td_pcb) - (CCFSZ + SPOFF)); thread0.td_pcb = (struct pcb *)TLB_PHYS_TO_DIRECT(vtophys((vm_offset_t)thread0.td_pcb)); pc->pc_curpcb = thread0.td_pcb; if (((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f) != 0) { printf("unaligned stack pcb_kstack & 0x3f == 0x%lx\n", ((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f)); } /* * Update PCPU_REG to point to direct address * to support easy phys <-> virt translation in trap handler */ pc = (struct pcpu *)TLB_PHYS_TO_DIRECT(vtophys(pc)); BVPRINTF("initializing cpu regs\n"); cpu_setregs(pc); /* * Initialize tunables. */ BVPRINTF("initialize tunables\n"); init_param2(physmem); /* * setup trap table and fault status area */ BVPRINTF("initialize trap tables\n"); mmfsa = mmu_fault_status_area + MMFSA_SIZE; BVPRINTF("setwstate\n"); set_wstate(WSTATE_KERN); BVPRINTF("set_mmfsa_scratchpad\n"); set_mmfsa_scratchpad(mmfsa); BVPRINTF("init_mondo_queue\n"); init_mondo_queue(); BVPRINTF("set_mmfsa_traptable\n"); set_mmfsa_traptable(&tl0_base, mmfsa); BVPRINTF("trap conversion\n"); for (i = 0; i < 256; i++) trap_conversion[i] = 0; trap_conversion[TT_INSTRUCTION_EXCEPTION] = T_INSTRUCTION_EXCEPTION; trap_conversion[TT_INSTRUCTION_MISS] = T_INSTRUCTION_MISS; trap_conversion[TT_ILLEGAL_INSTRUCTION] = T_ILLEGAL_INSTRUCTION; trap_conversion[TT_PRIVILEGED_OPCODE] = T_PRIVILEGED_OPCODE; trap_conversion[TT_FP_EXCEPTION_IEEE_754] = T_FP_EXCEPTION_IEEE_754; trap_conversion[TT_TAG_OVERFLOW] = T_TAG_OVERFLOW; trap_conversion[TT_DIVISION_BY_ZERO] = T_DIVISION_BY_ZERO; trap_conversion[TT_DATA_EXCEPTION] = T_DATA_EXCEPTION; trap_conversion[TT_DATA_MISS] = T_DATA_MISS; trap_conversion[TT_ALIGNMENT] = T_ALIGNMENT; trap_conversion[TT_DATA_PROTECTION] = T_DATA_PROTECTION; /* * Initialize the message buffer (after setting trap table). */ BVPRINTF("initialize msgbuf\n"); msgbufinit(msgbufp, MSGBUF_SIZE); BVPRINTF("initialize mutexes\n"); mutex_init(); BVPRINTF("initialize machine descriptor table\n"); mdesc_init(); BVPRINTF("initialize get model name\n"); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); BVPRINTF("initialize kdb\n"); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) - kdb_enter("Boot flags requested debugger"); + kdb_enter_why(KDB_WHY_BOOTFLAGS, + "Boot flags requested debugger"); #endif BVPRINTF("sparc64_init done\n"); } void set_openfirm_callback(ofw_vec_t *vec) { ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; int oonstack; u_long sp; int sig; int code; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal tramoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; /* Fill siginfo structure. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_addr = (void *)tf->tf_tpc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->mc_tpc, mc->mc_sp, mc->mc_tstate); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; bcopy(tf, mc, sizeof(*tf)); if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } mc->mc_flags = _MC_VERSION; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); pcb->pcb_flags |= PCB_FEF; tf->tf_fprs &= ~FPRS_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; uint64_t wstate; if (!TSTATE_SECURE(mc->mc_tstate) || (mc->mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); wstate = tf->tf_wstate; bcopy(mc, tf, sizeof(*tf)); tf->tf_wstate = wstate; if ((mc->mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif hv_mach_exit(0); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(void) { if (rdpr(pil) != 0) panic("pil in cpu_idle not 0 - %ld", rdpr(pil)); if (rdpr(pstate) != 0x16) panic("interrupts disabled in cpu_idle 0x%lx", rdpr(pstate)); /* XXX heinous hack begin*/ cpu_yield(); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct pcb *pcb; struct proc *p; uint64_t kstack; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; kstack = pcb->pcb_kstack; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); pcb->pcb_kstack = kstack; tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = entry + 4; tf->tf_tpc = entry; tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs = ~FPRS_FEF; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } void cpu_yield(void) { if (rdpr(pil) < PIL_TICK) hv_cpu_yield(); } Index: stable/7/sys/sun4v/sun4v/trap.c =================================================================== --- stable/7/sys/sun4v/sun4v/trap.c (revision 177733) +++ stable/7/sys/sun4v/sun4v/trap.c (revision 177734) @@ -1,742 +1,742 @@ /*- * Copyright (c) 2001, Jake Burkholder * Copyright (C) 1994, David Greenman * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the University of Utah, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)trap.c 7.4 (Berkeley) 5/13/91 * from: FreeBSD: src/sys/i386/i386/trap.c,v 1.197 2001/07/19 * $FreeBSD$ */ #include "opt_ddb.h" #include "opt_ktr.h" #include "opt_ktrace.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include void trap(struct trapframe *tf, int64_t type, uint64_t data); void syscall(struct trapframe *tf); extern vm_paddr_t mmu_fault_status_area; static int trap_pfault(struct thread *td, struct trapframe *tf, int64_t type, uint64_t data); extern char copy_fault[]; extern char copy_nofault_begin[]; extern char copy_nofault_end[]; extern char fs_fault[]; extern char fs_nofault_begin[]; extern char fs_nofault_end[]; extern char fs_nofault_intr_begin[]; extern char fs_nofault_intr_end[]; extern char fas_fault[]; extern char fas_nofault_begin[]; extern char fas_nofault_end[]; extern char *syscallnames[]; const char *trap_msg[] = { "reserved", "instruction access exception", "instruction access error", "instruction access protection", "illtrap instruction", "illegal instruction", "privileged opcode", "floating point disabled", "floating point exception ieee 754", "floating point exception other", "tag overflow", "division by zero", "data access exception", "data access error", "data access protection", "memory address not aligned", "privileged action", "async data error", "trap instruction 16", "trap instruction 17", "trap instruction 18", "trap instruction 19", "trap instruction 20", "trap instruction 21", "trap instruction 22", "trap instruction 23", "trap instruction 24", "trap instruction 25", "trap instruction 26", "trap instruction 27", "trap instruction 28", "trap instruction 29", "trap instruction 30", "trap instruction 31", "fast instruction access mmu miss", "fast data access mmu miss", "interrupt", "physical address watchpoint", "virtual address watchpoint", "corrected ecc error", "spill", "fill", "fill", "breakpoint", "clean window", "range check", "fix alignment", "integer overflow", "syscall", "restore physical watchpoint", "restore virtual watchpoint", "kernel stack fault", "resumable error", "nonresumable error" }; const int trap_sig[] = { SIGILL, /* reserved */ SIGILL, /* instruction access exception */ SIGILL, /* instruction access error */ SIGILL, /* instruction access protection */ SIGILL, /* illtrap instruction */ SIGILL, /* illegal instruction */ SIGBUS, /* privileged opcode */ SIGFPE, /* floating point disabled */ SIGFPE, /* floating point exception ieee 754 */ SIGFPE, /* floating point exception other */ SIGEMT, /* tag overflow */ SIGFPE, /* division by zero */ SIGILL, /* data access exception */ SIGILL, /* data access error */ SIGBUS, /* data access protection */ SIGBUS, /* memory address not aligned */ SIGBUS, /* privileged action */ SIGBUS, /* async data error */ SIGILL, /* trap instruction 16 */ SIGILL, /* trap instruction 17 */ SIGILL, /* trap instruction 18 */ SIGILL, /* trap instruction 19 */ SIGILL, /* trap instruction 20 */ SIGILL, /* trap instruction 21 */ SIGILL, /* trap instruction 22 */ SIGILL, /* trap instruction 23 */ SIGILL, /* trap instruction 24 */ SIGILL, /* trap instruction 25 */ SIGILL, /* trap instruction 26 */ SIGILL, /* trap instruction 27 */ SIGILL, /* trap instruction 28 */ SIGILL, /* trap instruction 29 */ SIGILL, /* trap instruction 30 */ SIGILL, /* trap instruction 31 */ SIGFPE, /* floating point error */ SIGSEGV, /* fast data access mmu miss */ -1, /* interrupt */ -1, /* physical address watchpoint */ -1, /* virtual address watchpoint */ -1, /* corrected ecc error */ SIGILL, /* spill */ SIGILL, /* fill */ SIGILL, /* fill */ SIGTRAP, /* breakpoint */ SIGILL, /* clean window */ SIGILL, /* range check */ SIGILL, /* fix alignment */ SIGILL, /* integer overflow */ SIGSYS, /* syscall */ -1, /* restore physical watchpoint */ -1, /* restore virtual watchpoint */ -1, /* kernel stack fault */ }; CTASSERT(sizeof(struct trapframe) == 256); int debugger_on_signal = 0; #ifdef DEBUG SYSCTL_INT(_debug, OID_AUTO, debugger_on_signal, CTLFLAG_RW, &debugger_on_signal, 0, ""); #endif /* * This interface allows the client to safely take over the %tba by * the prom's service. The prom will take care of the quiescence of * interrupts and handle any pending soft interrupts. * This call also sets the MMU fault status area for the CPU. */ void set_mmfsa_traptable(void *tba_addr, uint64_t mmfsa_ra) { static struct { cell_t name; cell_t nargs; cell_t nreturns; cell_t tba_addr; cell_t mmfsa_ra; } args = { (cell_t)"SUNW,set-trap-table", 2, }; args.tba_addr = (cell_t)tba_addr; args.mmfsa_ra = mmfsa_ra; openfirmware(&args); } void trap_init(void) { vm_paddr_t mmfsa; mmfsa = mmu_fault_status_area + (MMFSA_SIZE*curcpu); set_wstate(WSTATE_KERN); set_mmfsa_scratchpad(mmfsa); init_mondo_queue(); set_mmfsa_traptable(&tl0_base, mmfsa); } void trap(struct trapframe *tf, int64_t type, uint64_t data) { struct thread *td; struct proc *p; int error, sig, ctx; uint64_t trapno; register_t addr; ksiginfo_t ksi; td = curthread; CTR4(KTR_TRAP, "trap: %p type=%s (%s) pil=%#lx", td, trap_msg[trapno], (TRAPF_USERMODE(tf) ? "user" : "kernel"), rdpr(pil)); PCPU_INC(cnt.v_trap); trapno = (type & TRAP_MASK); ctx = (type >> TRAP_CTX_SHIFT); if (((tf->tf_tstate & TSTATE_PRIV) == 0) || (ctx != 0)) { KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); p = td->td_proc; td->td_pticks = 0; td->td_frame = tf; addr = tf->tf_tpc; if (td->td_ucred != p->p_ucred) cred_update_thread(td); switch (trapno) { case T_DATA_MISS: case T_DATA_PROTECTION: addr = TLB_TAR_VA(data); case T_INSTRUCTION_MISS: sig = trap_pfault(td, tf, trapno, data); break; case T_FILL: sig = rwindow_load(td, tf, 2); break; case T_FILL_RET: sig = rwindow_load(td, tf, 1); break; case T_SPILL: sig = rwindow_save(td); break; case T_DATA_EXCEPTION: case T_DATA_ERROR: case T_MEM_ADDRESS_NOT_ALIGNED: printf("bad trap trapno=%ld data=0x%lx pc=0x%lx\n", trapno, data, tf->tf_tpc); if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; sig = 0; break; } if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; sig = 0; break; } addr = data; sig = trap_sig[trapno]; break; default: if (trapno < 0 || trapno >= T_MAX || trap_sig[trapno] == -1) panic("trap: bad trap type"); sig = trap_sig[trapno]; break; } if (sig != 0) { /* Translate fault for emulators. */ if (p->p_sysent->sv_transtrap != NULL) { sig = p->p_sysent->sv_transtrap(sig, trapno); } if (debugger_on_signal && (sig == 4 || sig == 10 || sig == 11)) - kdb_enter("trapsig"); + kdb_enter_why(KDB_WHY_TRAPSIG, "trapsig"); #ifdef VERBOSE if (sig == 4 || sig == 10 || sig == 11) printf("trap: %ld:%s: 0x%lx at 0x%lx on cpu=%d sig=%d proc=%s\n", trapno, trap_msg[trapno], data, tf->tf_tpc, curcpu, sig, curthread->td_proc->p_comm); #endif /* XXX I've renumbered the traps to largely reflect what the hardware uses * so this will need to be re-visited */ ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = (int)trapno; /* XXX not POSIX */ ksi.ksi_addr = (void *)addr; ksi.ksi_trapno = (int)trapno; trapsignal(td, &ksi); } userret(td, tf); mtx_assert(&Giant, MA_NOTOWNED); } else { KASSERT((type & T_KERNEL) != 0, ("trap: kernel trap isn't - trap: %ld:%s: 0x%lx at 0x%lx on cpu=%d\n", trapno, trap_msg[trapno], data, tf->tf_tpc, curcpu)); #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif switch (trapno) { #ifdef KDB case T_BREAKPOINT: case T_KSTACK_FAULT: error = (kdb_trap(trapno, 0, tf) == 0); TF_DONE(tf); break; #endif case T_DATA_MISS: case T_DATA_PROTECTION: case T_INSTRUCTION_MISS: error = trap_pfault(td, tf, trapno, data); break; case T_DATA_EXCEPTION: printf("data exception on 0x%lx at 0x%lx\n", data, tf->tf_tpc); printf("trap: %ld=%s: 0x%lx at 0x%lx:0x%lx\n", trapno, trap_msg[trapno], data, tf->tf_tpc, tf->tf_tnpc); case T_ILLEGAL_INSTRUCTION: if (tf->tf_tpc > KERNBASE) { printf("illinstr: 0x%lx\n", tf->tf_tpc); printf("illinstr: 0x%x\n", *((uint32_t *)tf->tf_tpc)); } case T_DATA_ERROR: case T_ALIGNMENT: if (tf->tf_asi == ASI_AIUS) { if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } printf("ASI_AIUS but bad tpc\n"); } if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; error = 0; break; } printf("asi=0x%lx\n", tf->tf_asi); error = 1; break; default: printf("unchecked trap 0x%lx asi=0x%lx\n", trapno, tf->tf_asi); error = 1; break; } if (error != 0) panic("trap: %ld=%s: 0x%lx at 0x%lx:0x%lx error=%d asi=0x%lx", trapno, trap_msg[trapno], data, tf->tf_tpc, tf->tf_tnpc, error, tf->tf_asi); } CTR1(KTR_TRAP, "trap: td=%p return", td); } static int trap_pfault(struct thread *td, struct trapframe *tf, int64_t type, uint64_t data) { struct vmspace *vm; struct pcb *pcb; struct proc *p; vm_offset_t va; vm_prot_t prot; u_long ctx; int flags; int rv; if (td == NULL) return (-1); p = td->td_proc; rv = KERN_SUCCESS; ctx = TLB_TAR_CTX(data); pcb = td->td_pcb; type = type & ~T_KERNEL; va = TLB_TAR_VA(data); if (data > VM_MIN_DIRECT_ADDRESS) printf("trap_pfault(type=%ld, data=0x%lx, tpc=0x%lx, ctx=0x%lx)\n", type, data, tf->tf_tpc, ctx); #if 0 CTR4(KTR_TRAP, "trap_pfault: td=%p pm_ctx=%#lx va=%#lx ctx=%#lx", td, p->p_vmspace->vm_pmap.pm_context, va, ctx); #endif KASSERT(td->td_pcb != NULL, ("trap_pfault: pcb NULL")); KASSERT(td->td_proc != NULL, ("trap_pfault: curproc NULL")); KASSERT(td->td_proc->p_vmspace != NULL, ("trap_pfault: vmspace NULL")); if (type == T_DATA_PROTECTION) { prot = VM_PROT_WRITE; flags = VM_FAULT_DIRTY; } else { if (type == T_DATA_MISS) prot = VM_PROT_READ; else prot = VM_PROT_READ | VM_PROT_EXECUTE; flags = VM_FAULT_NORMAL; } if (ctx != TLB_CTX_KERNEL) { if ((tf->tf_tstate & TSTATE_PRIV) != 0 && (tf->tf_tpc >= (u_long)fs_nofault_intr_begin && tf->tf_tpc <= (u_long)fs_nofault_intr_end)) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } /* * This is a fault on non-kernel virtual memory. */ vm = p->p_vmspace; /* * Keep swapout from messing with us during this * critical time. */ PROC_LOCK(p); ++p->p_lock; PROC_UNLOCK(p); /* Fault in the user page. */ rv = vm_fault(&vm->vm_map, va, prot, flags); /* * Now the process can be swapped again. */ PROC_LOCK(p); --p->p_lock; PROC_UNLOCK(p); } else { /* * This is a fault on kernel virtual memory. Attempts to * access kernel memory from user mode cause privileged * action traps, not page fault. */ KASSERT(tf->tf_tstate & TSTATE_PRIV, ("trap_pfault: fault on nucleus context from user mode")); /* * Don't have to worry about process locking or stacks in the * kernel. */ rv = vm_fault(kernel_map, va, prot, VM_FAULT_NORMAL); } CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", td, va, rv); if (rv == KERN_SUCCESS) return (0); if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) { if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; return (0); } } return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); } /* Maximum number of arguments that can be passed via the out registers. */ #define REG_MAXARGS 6 /* * Syscall handler. The arguments to the syscall are passed in the o registers * by the caller, and are saved in the trap frame. The syscall number is passed * in %g1 (and also saved in the trap frame). */ void syscall(struct trapframe *tf) { struct sysent *callp; struct thread *td; register_t args[8]; register_t *argp; struct proc *p; u_long code; u_long tpc; int reg; int regcnt; int narg; int error; td = curthread; KASSERT(td != NULL, ("trap: curthread NULL")); KASSERT(td->td_proc != NULL, ("trap: curproc NULL")); p = td->td_proc; PCPU_INC(cnt.v_syscall); narg = 0; error = 0; reg = 0; regcnt = REG_MAXARGS; td->td_pticks = 0; td->td_frame = tf; if (td->td_ucred != p->p_ucred) cred_update_thread(td); code = tf->tf_global[1]; /* * For syscalls, we don't want to retry the faulting instruction * (usually), instead we need to advance one instruction. */ tpc = tf->tf_tpc; TF_DONE(tf); if (p->p_sysent->sv_prepsyscall) { /* * The prep code is MP aware. */ #if 0 (*p->p_sysent->sv_prepsyscall)(tf, args, &code, ¶ms); #endif } else if (code == SYS_syscall || code == SYS___syscall) { code = tf->tf_out[reg++]; regcnt--; } if (p->p_sysent->sv_mask) code &= p->p_sysent->sv_mask; if (code >= p->p_sysent->sv_size) callp = &p->p_sysent->sv_table[0]; else callp = &p->p_sysent->sv_table[code]; narg = callp->sy_narg; if (narg <= regcnt) { argp = &tf->tf_out[reg]; error = 0; } else { KASSERT(narg <= sizeof(args) / sizeof(args[0]), ("Too many syscall arguments!")); argp = args; bcopy(&tf->tf_out[reg], args, sizeof(args[0]) * regcnt); error = copyin((void *)(tf->tf_out[6] + SPOFF + offsetof(struct frame, fr_pad[6])), &args[regcnt], (narg - regcnt) * sizeof(args[0])); } CTR5(KTR_SYSC, "syscall: td=%p %s(%#lx, %#lx, %#lx)", td, syscallnames[code], argp[0], argp[1], argp[2]); /* * Try to run the syscall without the MP lock if the syscall * is MP safe. */ #ifdef KTRACE if (KTRPOINT(td, KTR_SYSCALL)) ktrsyscall(code, narg, argp); #endif if (error == 0) { td->td_retval[0] = 0; td->td_retval[1] = 0; STOPEVENT(p, S_SCE, narg); /* MP aware */ PTRACESTOP_SC(p, td, S_PT_SCE); AUDIT_SYSCALL_ENTER(code, td); error = (*callp->sy_call)(td, argp); AUDIT_SYSCALL_EXIT(error, td); CTR5(KTR_SYSC, "syscall: p=%p error=%d %s return %#lx %#lx ", p, error, syscallnames[code], td->td_retval[0], td->td_retval[1]); } /* * MP SAFE (we may or may not have the MP lock at this point) */ switch (error) { case 0: tf->tf_out[0] = td->td_retval[0]; tf->tf_out[1] = td->td_retval[1]; tf->tf_tstate &= ~TSTATE_XCC_C; break; case ERESTART: /* * Undo the tpc advancement we have done above, we want to * reexecute the system call. */ tf->tf_tpc = tpc; tf->tf_tnpc -= 4; break; case EJUSTRETURN: break; default: if (p->p_sysent->sv_errsize) { if (error >= p->p_sysent->sv_errsize) error = -1; /* XXX */ else error = p->p_sysent->sv_errtbl[error]; } tf->tf_out[0] = error; tf->tf_tstate |= TSTATE_XCC_C; break; } /* * Handle reschedule and other end-of-syscall issues */ userret(td, tf); #ifdef KTRACE if (KTRPOINT(td, KTR_SYSRET)) ktrsysret(code, error, td->td_retval[0]); #endif /* * This works because errno is findable through the * register set. If we ever support an emulation where this * is not the case, this code will need to be revisited. */ STOPEVENT(p, S_SCX, code); PTRACESTOP_SC(p, td, S_PT_SCX); WITNESS_WARN(WARN_PANIC, NULL, "System call %s returning", (code >= 0 && code < SYS_MAXSYSCALL) ? syscallnames[code] : "???"); mtx_assert(&Giant, MA_NOTOWNED); } Index: stable/7/sys/sys/kdb.h =================================================================== --- stable/7/sys/sys/kdb.h (revision 177733) +++ stable/7/sys/sys/kdb.h (revision 177734) @@ -1,81 +1,109 @@ /*- * Copyright (c) 2004 Marcel Moolenaar * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_KDB_H_ #define _SYS_KDB_H_ #include typedef int dbbe_init_f(void); typedef void dbbe_trace_f(void); typedef int dbbe_trap_f(int, int); struct kdb_dbbe { const char *dbbe_name; dbbe_init_f *dbbe_init; dbbe_trace_f *dbbe_trace; dbbe_trap_f *dbbe_trap; int dbbe_active; }; #define KDB_BACKEND(name, init, trace, trap) \ static struct kdb_dbbe name##_dbbe = { \ .dbbe_name = #name, \ .dbbe_init = init, \ .dbbe_trace = trace, \ .dbbe_trap = trap \ }; \ DATA_SET(kdb_dbbe_set, name##_dbbe) struct pcb; struct thread; struct trapframe; extern int kdb_active; /* Non-zero while in debugger. */ extern int debugger_on_panic; /* enter the debugger on panic. */ extern struct kdb_dbbe *kdb_dbbe; /* Default debugger backend or NULL. */ extern struct trapframe *kdb_frame; /* Frame to kdb_trap(). */ extern struct pcb *kdb_thrctx; /* Current context. */ extern struct thread *kdb_thread; /* Current thread. */ int kdb_alt_break(int, int *); void kdb_backtrace(void); int kdb_dbbe_select(const char *); void kdb_enter(const char *); +void kdb_enter_why(const char *, const char *); void kdb_init(void); void * kdb_jmpbuf(jmp_buf); void kdb_reenter(void); struct pcb *kdb_thr_ctx(struct thread *); struct thread *kdb_thr_first(void); struct thread *kdb_thr_from_pid(pid_t); struct thread *kdb_thr_lookup(lwpid_t); struct thread *kdb_thr_next(struct thread *); int kdb_thr_select(struct thread *); int kdb_trap(int, int, struct trapframe *); + +/* + * KDB enters the debugger via breakpoint(), which leaves the debugger without + * a lot of information about why it was entered. This simple enumerated set + * captures some basic information. + * + * It is recommended that values here be short (<16 character) alpha-numeric + * strings, as they will be used to construct DDB(4) script names. + */ +extern const char * volatile kdb_why; +#define KDB_WHY_UNSET NULL /* No reason set. */ +#define KDB_WHY_PANIC "panic" /* panic() was called. */ +#define KDB_WHY_SYSCTL "sysctl" /* Sysctl entered debugger. */ +#define KDB_WHY_BOOTFLAGS "bootflags" /* Boot flags were set. */ +#define KDB_WHY_WITNESS "witness" /* Witness entered debugger. */ +#define KDB_WHY_VFSLOCK "vfslock" /* VFS detected lock problem. */ +#define KDB_WHY_NETGRAPH "netgraph" /* Netgraph entered debugger. */ +#define KDB_WHY_BREAK "break" /* Console or serial break. */ +#define KDB_WHY_WATCHDOG "watchdog" /* Watchdog entered debugger. */ +#define KDB_WHY_CAM "cam" /* CAM has entered debugger. */ +#define KDB_WHY_NDIS "ndis" /* NDIS entered debugger. */ +#define KDB_WHY_ACPI "acpi" /* ACPI entered debugger. */ +#define KDB_WHY_TRAPSIG "trapsig" /* Sun4v/Sparc fault. */ +#define KDB_WHY_POWERFAIL "powerfail" /* Powerfail NMI. */ +#define KDB_WHY_MAC "mac" /* MAC Framework. */ +#define KDB_WHY_POWERPC "powerpc" /* Unhandled powerpc intr. */ +#define KDB_WHY_UNIONFS "unionfs" /* Unionfs bug. */ #endif /* !_SYS_KDB_H_ */