diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index eb1e72247307..6993dea8e584 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1,2203 +1,2213 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_perfmon.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); extern void printcpuinfo(void); /* XXX header file */ extern void identify_cpu(void); extern void panicifcpuunsupported(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; struct mtx dt_lock; /* lock for GDT and LDT */ static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook", 7) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif sysenv = getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10); freeenv(sysenv); } else memsize = 0; if (memsize > 0) printf("real memory = %ju (%ju MB)\n", memsize << 10, memsize >> 10); else printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); mca_init(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - sizeof(struct sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; long rflags; int cs, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) { printf("sigreturn (pid %d): copyin failed\n", p->p_pid); return (error); } ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { printf("sigreturn (pid %d): mc_flags %x\n", p->p_pid, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("sigreturn (pid %d): rflags = 0x%lx\n", p->p_pid, rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn (pid %d): cs = 0x%x\n", p->p_pid, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) { printf("sigreturn (pid %d): set_fpcontext\n", p->p_pid); return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); td->td_pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; td->td_pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); td->td_pcb->pcb_flags |= PCB_FULLCTX; return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ static void cpu_idle_hlt(int busy) { /* * we must absolutely guarentee that hlt is the next instruction * after sti or we introduce a timing window. */ disable_intr(); if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); } static void cpu_idle_acpi(int busy) { disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(); else __asm __volatile("sti; hlt"); } static void cpu_idle_spin(int busy) { return; } void (*cpu_idle_fn)(int) = cpu_idle_acpi; void cpu_idle(int busy) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif cpu_idle_fn(busy); } /* * mwait cpu power states. Lower 4 bits are sub-states. */ #define MWAIT_C0 0xf0 #define MWAIT_C1 0x00 #define MWAIT_C2 0x10 #define MWAIT_C3 0x20 #define MWAIT_C4 0x30 #define MWAIT_DISABLED 0x0 #define MWAIT_WOKEN 0x1 #define MWAIT_WAITING 0x2 static void cpu_idle_mwait(int busy) { int *mwait; mwait = (int *)PCPU_PTR(monitorbuf); *mwait = MWAIT_WAITING; if (sched_runnable()) return; cpu_monitor(mwait, 0, 0); if (*mwait == MWAIT_WAITING) cpu_mwait(0, MWAIT_C1); } static void cpu_idle_mwait_hlt(int busy) { int *mwait; mwait = (int *)PCPU_PTR(monitorbuf); if (busy == 0) { *mwait = MWAIT_DISABLED; cpu_idle_hlt(busy); return; } *mwait = MWAIT_WAITING; if (sched_runnable()) return; cpu_monitor(mwait, 0, 0); if (*mwait == MWAIT_WAITING) cpu_mwait(0, MWAIT_C1); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; int *mwait; if (cpu_idle_fn == cpu_idle_spin) return (1); if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) return (0); pcpu = pcpu_find(cpu); mwait = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) return (0); *mwait = MWAIT_WOKEN; return (1); } /* * Ordered by speed/power consumption. */ struct { void *id_fn; char *id_name; } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, { cpu_idle_mwait_hlt, "mwait_hlt" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; p += sprintf(p, "%s, ", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } static int idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; char *p; int i; p = "unknown"; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; cpu_idle_fn = idle_tbl[i].id_fn; return (0); } return (EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); /* * Reset registers to default values on exec. */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; pcb->pcb_flags &= ~(PCB_32BIT | PCB_GS32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = entry; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; /* * Software prototypes -- in more palatable form. * * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same * slots as corresponding segments for i386 kernel. */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GNULL2_SEL 1 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 8 64 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE - 1, .ssd_type = SDT_SYSTSS, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 LDT Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 12 LDT Descriptor, double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; void setidt(idx, func, typ, dpl, ist) int idx; inthand_t *func; int typ; int dpl; int ist; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), IDTVEC(fast_syscall), IDTVEC(fast_syscall32); void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include #include /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. * This is only here to pacify sio. It is NOT FATAL if this doesn't work. * It shouldn't be here. There should probably be an APIC centric * implementation in the apic driver code, if at all. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif u_int basemem; /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, off, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_tunable; pt_entry_t *pte; struct bios_smap *smapbase, *smap, *smapend; u_int32_t smapsize; quad_t dcons_addr, dcons_size; bzero(physmap, sizeof(physmap)); basemem = 0; physmap_idx = 0; /* * get memory map from INT 15:E820, kindly supplied by the loader. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) panic("No BIOS smap info from loader!"); smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) continue; if (smap->length == 0) continue; for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-monotonic memory region, ignoring second region\n"); continue; } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; continue; } physmap_idx += 2; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); break; } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; } /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0) panic("BIOS smap did not include a basemem segment!"); #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1] / 1024); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= 0x100000 && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr; char *env; thread0.td_kstack = physfree + KERNBASE; bzero((void *)thread0.td_kstack, KSTACK_PAGES * PAGE_SIZE); physfree += KSTACK_PAGES * PAGE_SIZE; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *) + KERNBASE; #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif /* Init basic tunables, hz etc */ init_param1(); /* * make gdt memory segments */ for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = thread0.td_kstack + \ KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb); /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xFul; PCPU_SET(rsp0, common_tss[0].tss_rsp0); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just * above the start of the ist2 stack. */ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPAGES * PAGE_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); fpuinit(); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); _ufssel = GSEL(GUFS32_SEL, SEL_UPL); _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_cr3 = KPML4phys; thread0.td_frame = &proc0_tf; env = getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); #ifdef XENHVM if (inw(0x10) == 0x49d2) { if (bootverbose) printf("Xen detected: disabling emulated block and network devices\n"); outw(0x10, 3); } #endif /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = (ISPL(tf->tf_cs)) ? tf->tf_rsp : (long)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_rflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; if (tp->tf_flags & TF_HASSEGS) { regs->r_ds = tp->tf_ds; regs->r_es = tp->tf_es; regs->r_fs = tp->tf_fs; regs->r_gs = tp->tf_gs; } else { regs->r_ds = 0; regs->r_es = 0; regs->r_fs = 0; regs->r_gs = 0; } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; if (0) { /* XXXKIB */ tp->tf_ds = regs->r_ds; tp->tf_es = regs->r_es; tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; } td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { fill_fpregs_xmm(&td->td_pcb->pcb_save, fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, &td->td_pcb->pcb_save); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_ds = tp->tf_ds; mcp->mc_es = tp->tf_es; mcp->mc_fs = tp->tf_fs; mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); mcp->mc_fsbase = td->td_pcb->pcb_fsbase; mcp->mc_gsbase = td->td_pcb->pcb_gsbase; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; long rflags; int ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); ret = set_fpcontext(td, mcp); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; tp->tf_flags = mcp->mc_flags; if (tp->tf_flags & TF_HASSEGS) { tp->tf_ds = mcp->mc_ds; tp->tf_es = mcp->mc_es; tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } if (mcp->mc_flags & _MC_HASBASES) { td->td_pcb->pcb_fsbase = mcp->mc_fsbase; td->td_pcb->pcb_gsbase = mcp->mc_gsbase; } td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { mcp->mc_ownedfp = fpugetregs(td, (struct savefpu *)&mcp->mc_fpstate); mcp->mc_fpformat = fpuformat(); } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { struct savefpu *fpstate; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* * XXX we violate the dubious requirement that fpusetregs() * be called with interrupts disabled. * XXX obsolete on trap-16 systems? */ fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; fpusetregs(td, fpstate); } else return (EINVAL); return (0); } void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_FPUINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/arm/arm/machdep.c b/sys/arm/arm/machdep.c index 0f574c1e46ae..597cdf541063 100644 --- a/sys/arm/arm/machdep.c +++ b/sys/arm/arm/machdep.c @@ -1,696 +1,708 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependant functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; long realmem = 0; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_flags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; tf->tf_usr_lr = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Assume the MD caller knows what it's doing here, and * really does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... * * NOTE: If the CPU control register is not readable, * this will totally fail! We'll just assume that * any system that has high vector support has a * readable CPU control register, for now. If we * ever encounter one that does not, we'll have to * rethink this. */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; #ifndef ARM_CACHE_LOCK_ENABLE vm_page_t m; #endif cpu_setup(""); identify_arm_cpu(); printf("real memory = %ju (%ju MB)\n", (uintmax_t)ptoa(physmem), (uintmax_t)ptoa(physmem) / 1048576); realmem = physmem; /* * Display the RAM layout. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf("%#08jx - %#08jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)ptoa(cnt.v_free_count), (uintmax_t)ptoa(cnt.v_free_count) / 1048576); bufinit(); vm_pager_bufferinit(); pcb->un_32.pcb32_und_sp = (u_int)thread0.td_kstack + USPACE_UNDEF_STACK_TOP; pcb->un_32.pcb32_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; vector_page_setprot(VM_PROT_READ); pmap_set_pcb_pagedir(pmap_kernel(), pcb); pmap_postinit(); #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + + cpu_dcache_wb_range((uintptr_t)ptr, len); + cpu_l2cache_wb_range((uintptr_t)ptr, len); +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { cpu_sleep(0); } int cpu_idle_wakeup(int cpu) { return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, u_int32_t *v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, u_int32_t v) { struct iovec iov; struct uio uio; PROC_LOCK_ASSERT(td->td_proc, MA_NOTOWNED); iov.iov_base = (caddr_t) &v; iov.iov_len = sizeof(u_int32_t); uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = (off_t)addr; uio.uio_resid = sizeof(u_int32_t); uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; return proc_rwmem(td->td_proc, &uio); } int ptrace_single_step(struct thread *td) { struct proc *p; int error; KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc + 4, &td->td_md.md_ptrace_instr); if (error) goto out; error = ptrace_write_int(td, td->td_frame->tf_pc + 4, PTRACE_BREAKPOINT); if (error) td->td_md.md_ptrace_instr = 0; td->td_md.md_ptrace_addr = td->td_frame->tf_pc + 4; out: PROC_LOCK(p); return (error); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; if (td->td_md.md_ptrace_instr) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_cspr = disable_interrupts(I32_bit | F32_bit); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(td->td_md.md_saved_cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = entry; tf->tf_svc_lr = 0x77777777; tf->tf_pc = entry; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) gr[_REG_R0] = 0; else gr[_REG_R0] = tf->tf_r0; gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; gr[_REG_CPSR] = tf->tf_spsr; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { struct proc *p = td->td_proc; struct sigframe sf; struct trapframe *tf; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &sf, sizeof(sf))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = sf.sf_uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (I32_bit | F32_bit)) != 0) return (EINVAL); /* Restore register context. */ tf = td->td_frame; set_mcontext(td, &sf.sf_uc.uc_mcontext); /* Restore signal mask. */ PROC_LOCK(p); td->td_sigmask = sf.sf_uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->un_32.pcb32_r8 = tf->tf_r8; pcb->un_32.pcb32_r9 = tf->tf_r9; pcb->un_32.pcb32_r10 = tf->tf_r10; pcb->un_32.pcb32_r11 = tf->tf_r11; pcb->un_32.pcb32_r12 = tf->tf_r12; pcb->un_32.pcb32_pc = tf->tf_pc; pcb->un_32.pcb32_lr = tf->tf_usr_lr; pcb->un_32.pcb32_sp = tf->tf_usr_sp; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(void) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); ksym_start = zstart; ksym_end = zend; } else #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index 48d48fdeec0d..a03b0784a26e 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -1,1318 +1,1321 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ * */ /*- * The following functions are based in the vn(4) driver: mdstart_swap(), * mdstart_vnode(), mdcreate_swap(), mdcreate_vnode() and mddestroy(), * and as such under the following copyright: * * Copyright (c) 1988 University of Utah. * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: Utah Hdr: vn.c 1.13 94/04/02 * * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 * From: src/sys/dev/vn/vn.c,v 1.122 2000/12/16 16:06:03 */ #include "opt_geom.h" #include "opt_md.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MD_MODVER 1 #define MD_SHUTDOWN 0x10000 /* Tell worker thread to terminate. */ #define MD_EXITING 0x20000 /* Worker thread is exiting. */ #ifndef MD_NSECT #define MD_NSECT (10000 * 2) #endif static MALLOC_DEFINE(M_MD, "md_disk", "Memory Disk"); static MALLOC_DEFINE(M_MDSECT, "md_sectors", "Memory Disk Sectors"); static int md_debug; SYSCTL_INT(_debug, OID_AUTO, mddebug, CTLFLAG_RW, &md_debug, 0, ""); #if defined(MD_ROOT) && defined(MD_ROOT_SIZE) /* * Preloaded image gets put here. * Applications that patch the object with the image can determine * the size looking at the start and end markers (strings), * so we want them contiguous. */ static struct { u_char start[MD_ROOT_SIZE*1024]; u_char end[128]; } mfs_root = { .start = "MFS Filesystem goes here", .end = "MFS Filesystem had better STOP here", }; #endif static g_init_t g_md_init; static g_fini_t g_md_fini; static g_start_t g_md_start; static g_access_t g_md_access; static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp); static int mdunits; static struct cdev *status_dev = 0; static struct sx md_sx; static d_ioctl_t mdctlioctl; static struct cdevsw mdctl_cdevsw = { .d_version = D_VERSION, .d_ioctl = mdctlioctl, .d_name = MD_NAME, }; struct g_class g_md_class = { .name = "MD", .version = G_VERSION, .init = g_md_init, .fini = g_md_fini, .start = g_md_start, .access = g_md_access, .dumpconf = g_md_dumpconf, }; DECLARE_GEOM_CLASS(g_md_class, g_md); static LIST_HEAD(, md_s) md_softc_list = LIST_HEAD_INITIALIZER(&md_softc_list); #define NINDIR (PAGE_SIZE / sizeof(uintptr_t)) #define NMASK (NINDIR-1) static int nshift; struct indir { uintptr_t *array; u_int total; u_int used; u_int shift; }; struct md_s { int unit; LIST_ENTRY(md_s) list; struct bio_queue_head bio_queue; struct mtx queue_mtx; struct cdev *dev; enum md_types type; off_t mediasize; unsigned sectorsize; unsigned opencount; unsigned fwheads; unsigned fwsectors; unsigned flags; char name[20]; struct proc *procp; struct g_geom *gp; struct g_provider *pp; int (*start)(struct md_s *sc, struct bio *bp); struct devstat *devstat; /* MD_MALLOC related fields */ struct indir *indir; uma_zone_t uma; /* MD_PRELOAD related fields */ u_char *pl_ptr; size_t pl_len; /* MD_VNODE related fields */ struct vnode *vnode; char file[PATH_MAX]; struct ucred *cred; /* MD_SWAP related fields */ vm_object_t object; }; static struct indir * new_indir(u_int shift) { struct indir *ip; ip = malloc(sizeof *ip, M_MD, M_NOWAIT | M_ZERO); if (ip == NULL) return (NULL); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, M_NOWAIT | M_ZERO); if (ip->array == NULL) { free(ip, M_MD); return (NULL); } ip->total = NINDIR; ip->shift = shift; return (ip); } static void del_indir(struct indir *ip) { free(ip->array, M_MDSECT); free(ip, M_MD); } static void destroy_indir(struct md_s *sc, struct indir *ip) { int i; for (i = 0; i < NINDIR; i++) { if (!ip->array[i]) continue; if (ip->shift) destroy_indir(sc, (struct indir*)(ip->array[i])); else if (ip->array[i] > 255) uma_zfree(sc->uma, (void *)(ip->array[i])); } del_indir(ip); } /* * This function does the math and allocates the top level "indir" structure * for a device of "size" sectors. */ static struct indir * dimension(off_t size) { off_t rcnt; struct indir *ip; int i, layer; rcnt = size; layer = 0; while (rcnt > NINDIR) { rcnt /= NINDIR; layer++; } /* figure out log2(NINDIR) */ for (i = NINDIR, nshift = -1; i; nshift++) i >>= 1; /* * XXX: the top layer is probably not fully populated, so we allocate * too much space for ip->array in here. */ ip = malloc(sizeof *ip, M_MD, M_WAITOK | M_ZERO); ip->array = malloc(sizeof(uintptr_t) * NINDIR, M_MDSECT, M_WAITOK | M_ZERO); ip->total = NINDIR; ip->shift = layer * nshift; return (ip); } /* * Read a given sector */ static uintptr_t s_read(struct indir *ip, off_t offset) { struct indir *cip; int idx; uintptr_t up; if (md_debug > 1) printf("s_read(%jd)\n", (intmax_t)offset); up = 0; for (cip = ip; cip != NULL;) { if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; cip = (struct indir *)up; continue; } idx = offset & NMASK; return (cip->array[idx]); } return (0); } /* * Write a given sector, prune the tree if the value is 0 */ static int s_write(struct indir *ip, off_t offset, uintptr_t ptr) { struct indir *cip, *lip[10]; int idx, li; uintptr_t up; if (md_debug > 1) printf("s_write(%jd, %p)\n", (intmax_t)offset, (void *)ptr); up = 0; li = 0; cip = ip; for (;;) { lip[li++] = cip; if (cip->shift) { idx = (offset >> cip->shift) & NMASK; up = cip->array[idx]; if (up != 0) { cip = (struct indir *)up; continue; } /* Allocate branch */ cip->array[idx] = (uintptr_t)new_indir(cip->shift - nshift); if (cip->array[idx] == 0) return (ENOSPC); cip->used++; up = cip->array[idx]; cip = (struct indir *)up; continue; } /* leafnode */ idx = offset & NMASK; up = cip->array[idx]; if (up != 0) cip->used--; cip->array[idx] = ptr; if (ptr != 0) cip->used++; break; } if (cip->used != 0 || li == 1) return (0); li--; while (cip->used == 0 && cip != ip) { li--; idx = (offset >> lip[li]->shift) & NMASK; up = lip[li]->array[idx]; KASSERT(up == (uintptr_t)cip, ("md screwed up")); del_indir(cip); lip[li]->array[idx] = 0; lip[li]->used--; cip = lip[li]; } return (0); } static int g_md_access(struct g_provider *pp, int r, int w, int e) { struct md_s *sc; sc = pp->geom->softc; if (sc == NULL) { if (r <= 0 && w <= 0 && e <= 0) return (0); return (ENXIO); } r += pp->acr; w += pp->acw; e += pp->ace; if ((sc->flags & MD_READONLY) != 0 && w > 0) return (EROFS); if ((pp->acr + pp->acw + pp->ace) == 0 && (r + w + e) > 0) { sc->opencount = 1; } else if ((pp->acr + pp->acw + pp->ace) > 0 && (r + w + e) == 0) { sc->opencount = 0; } return (0); } static void g_md_start(struct bio *bp) { struct md_s *sc; sc = bp->bio_to->geom->softc; if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) devstat_start_transaction_bio(sc->devstat, bp); mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); wakeup(sc); } static int mdstart_malloc(struct md_s *sc, struct bio *bp) { int i, error; u_char *dst; off_t secno, nsec, uc; uintptr_t sp, osp; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } nsec = bp->bio_length / sc->sectorsize; secno = bp->bio_offset / sc->sectorsize; dst = bp->bio_data; error = 0; while (nsec--) { osp = s_read(sc->indir, secno); if (bp->bio_cmd == BIO_DELETE) { if (osp != 0) error = s_write(sc->indir, secno, 0); } else if (bp->bio_cmd == BIO_READ) { if (osp == 0) bzero(dst, sc->sectorsize); else if (osp <= 255) - for (i = 0; i < sc->sectorsize; i++) - dst[i] = osp; - else + memset(dst, osp, sc->sectorsize); + else { bcopy((void *)osp, dst, sc->sectorsize); + cpu_flush_dcache(dst, sc->sectorsize); + } osp = 0; } else if (bp->bio_cmd == BIO_WRITE) { if (sc->flags & MD_COMPRESS) { uc = dst[0]; for (i = 1; i < sc->sectorsize; i++) if (dst[i] != uc) break; } else { i = 0; uc = 0; } if (i == sc->sectorsize) { if (osp != uc) error = s_write(sc->indir, secno, uc); } else { if (osp <= 255) { sp = (uintptr_t)uma_zalloc(sc->uma, M_NOWAIT); if (sp == 0) { error = ENOSPC; break; } bcopy(dst, (void *)sp, sc->sectorsize); error = s_write(sc->indir, secno, sp); } else { bcopy(dst, (void *)osp, sc->sectorsize); osp = 0; } } } else { error = EOPNOTSUPP; } if (osp > 255) uma_zfree(sc->uma, (void*)osp); if (error != 0) break; secno++; dst += sc->sectorsize; } bp->bio_resid = 0; return (error); } static int mdstart_preload(struct md_s *sc, struct bio *bp) { switch (bp->bio_cmd) { case BIO_READ: bcopy(sc->pl_ptr + bp->bio_offset, bp->bio_data, bp->bio_length); + cpu_flush_dcache(bp->bio_data, bp->bio_length); break; case BIO_WRITE: bcopy(bp->bio_data, sc->pl_ptr + bp->bio_offset, bp->bio_length); break; } bp->bio_resid = 0; return (0); } static int mdstart_vnode(struct md_s *sc, struct bio *bp) { int error, vfslocked; struct uio auio; struct iovec aiov; struct mount *mp; struct vnode *vp; struct thread *td; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_FLUSH: break; default: return (EOPNOTSUPP); } td = curthread; vp = sc->vnode; /* * VNODE I/O * * If an error occurs, we set BIO_ERROR but we do not set * B_INVAL because (for a write anyway), the buffer is * still valid. */ if (bp->bio_cmd == BIO_FLUSH) { vfslocked = VFS_LOCK_GIANT(vp->v_mount); (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); VFS_UNLOCK_GIANT(vfslocked); return (error); } bzero(&auio, sizeof(auio)); aiov.iov_base = bp->bio_data; aiov.iov_len = bp->bio_length; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = (vm_ooffset_t)bp->bio_offset; auio.uio_segflg = UIO_SYSSPACE; if (bp->bio_cmd == BIO_READ) auio.uio_rw = UIO_READ; else if (bp->bio_cmd == BIO_WRITE) auio.uio_rw = UIO_WRITE; else panic("wrong BIO_OP in mdstart_vnode"); auio.uio_resid = bp->bio_length; auio.uio_td = td; /* * When reading set IO_DIRECT to try to avoid double-caching * the data. When writing IO_DIRECT is not optimal. */ vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (bp->bio_cmd == BIO_READ) { vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(vp, &auio, IO_DIRECT, sc->cred); VOP_UNLOCK(vp, 0); } else { (void) vn_start_write(vp, &mp, V_WAIT); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); error = VOP_WRITE(vp, &auio, sc->flags & MD_ASYNC ? 0 : IO_SYNC, sc->cred); VOP_UNLOCK(vp, 0); vn_finished_write(mp); } VFS_UNLOCK_GIANT(vfslocked); bp->bio_resid = auio.uio_resid; return (error); } static int mdstart_swap(struct md_s *sc, struct bio *bp) { struct sf_buf *sf; int rv, offs, len, lastend; vm_pindex_t i, lastp; vm_page_t m; u_char *p; switch (bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: break; default: return (EOPNOTSUPP); } p = bp->bio_data; /* * offs is the offset at which to start operating on the * next (ie, first) page. lastp is the last page on * which we're going to operate. lastend is the ending * position within that last page (ie, PAGE_SIZE if * we're operating on complete aligned pages). */ offs = bp->bio_offset % PAGE_SIZE; lastp = (bp->bio_offset + bp->bio_length - 1) / PAGE_SIZE; lastend = (bp->bio_offset + bp->bio_length - 1) % PAGE_SIZE + 1; rv = VM_PAGER_OK; VM_OBJECT_LOCK(sc->object); vm_object_pip_add(sc->object, 1); for (i = bp->bio_offset / PAGE_SIZE; i <= lastp; i++) { len = ((i == lastp) ? lastend : PAGE_SIZE) - offs; m = vm_page_grab(sc->object, i, VM_ALLOC_NORMAL|VM_ALLOC_RETRY); VM_OBJECT_UNLOCK(sc->object); sched_pin(); sf = sf_buf_alloc(m, SFB_CPUPRIVATE); VM_OBJECT_LOCK(sc->object); if (bp->bio_cmd == BIO_READ) { if (m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, 0); if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); break; } bcopy((void *)(sf_buf_kva(sf) + offs), p, len); + cpu_flush_dcache(p, len); } else if (bp->bio_cmd == BIO_WRITE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, 0); if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); break; } bcopy(p, (void *)(sf_buf_kva(sf) + offs), len); m->valid = VM_PAGE_BITS_ALL; #if 0 } else if (bp->bio_cmd == BIO_DELETE) { if (len != PAGE_SIZE && m->valid != VM_PAGE_BITS_ALL) rv = vm_pager_get_pages(sc->object, &m, 1, 0); if (rv == VM_PAGER_ERROR) { sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); break; } bzero((void *)(sf_buf_kva(sf) + offs), len); vm_page_dirty(m); m->valid = VM_PAGE_BITS_ALL; #endif } sf_buf_free(sf); sched_unpin(); vm_page_wakeup(m); vm_page_lock_queues(); vm_page_activate(m); if (bp->bio_cmd == BIO_WRITE) vm_page_dirty(m); vm_page_unlock_queues(); /* Actions on further pages start at offset 0 */ p += PAGE_SIZE - offs; offs = 0; #if 0 if (bootverbose || bp->bio_offset / PAGE_SIZE < 17) printf("wire_count %d busy %d flags %x hold_count %d act_count %d queue %d valid %d dirty %d @ %d\n", m->wire_count, m->busy, m->flags, m->hold_count, m->act_count, m->queue, m->valid, m->dirty, i); #endif } vm_object_pip_subtract(sc->object, 1); vm_object_set_writeable_dirty(sc->object); VM_OBJECT_UNLOCK(sc->object); return (rv != VM_PAGER_ERROR ? 0 : ENOSPC); } static void md_kthread(void *arg) { struct md_s *sc; struct bio *bp; int error; sc = arg; thread_lock(curthread); sched_prio(curthread, PRIBIO); thread_unlock(curthread); if (sc->type == MD_VNODE) curthread->td_pflags |= TDP_NORUNNINGBUF; for (;;) { mtx_lock(&sc->queue_mtx); if (sc->flags & MD_SHUTDOWN) { sc->flags |= MD_EXITING; mtx_unlock(&sc->queue_mtx); kproc_exit(0); } bp = bioq_takefirst(&sc->bio_queue); if (!bp) { msleep(sc, &sc->queue_mtx, PRIBIO | PDROP, "mdwait", 0); continue; } mtx_unlock(&sc->queue_mtx); if (bp->bio_cmd == BIO_GETATTR) { if (sc->fwsectors && sc->fwheads && (g_handleattr_int(bp, "GEOM::fwsectors", sc->fwsectors) || g_handleattr_int(bp, "GEOM::fwheads", sc->fwheads))) error = -1; else error = EOPNOTSUPP; } else { error = sc->start(sc, bp); } if (error != -1) { bp->bio_completed = bp->bio_length; g_io_deliver(bp, error); if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) devstat_end_transaction_bio(sc->devstat, bp); } } } static struct md_s * mdfind(int unit) { struct md_s *sc; LIST_FOREACH(sc, &md_softc_list, list) { if (sc->unit == unit) break; } return (sc); } static struct md_s * mdnew(int unit, int *errp, enum md_types type) { struct md_s *sc, *sc2; int error, max = -1; *errp = 0; LIST_FOREACH(sc2, &md_softc_list, list) { if (unit == sc2->unit) { *errp = EBUSY; return (NULL); } if (unit == -1 && sc2->unit > max) max = sc2->unit; } if (unit == -1) unit = max + 1; sc = (struct md_s *)malloc(sizeof *sc, M_MD, M_WAITOK | M_ZERO); sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); sc->unit = unit; sprintf(sc->name, "md%d", unit); LIST_INSERT_HEAD(&md_softc_list, sc, list); error = kproc_create(md_kthread, sc, &sc->procp, 0, 0,"%s", sc->name); if (error == 0) return (sc); LIST_REMOVE(sc, list); mtx_destroy(&sc->queue_mtx); free(sc, M_MD); *errp = error; return (NULL); } static void mdinit(struct md_s *sc) { struct g_geom *gp; struct g_provider *pp; g_topology_lock(); gp = g_new_geomf(&g_md_class, "md%d", sc->unit); gp->softc = sc; pp = g_new_providerf(gp, "md%d", sc->unit); pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; sc->gp = gp; sc->pp = pp; g_error_provider(pp, 0); g_topology_unlock(); sc->devstat = devstat_new_entry("md", sc->unit, sc->sectorsize, DEVSTAT_ALL_SUPPORTED, DEVSTAT_TYPE_DIRECT, DEVSTAT_PRIORITY_MAX); } /* * XXX: we should check that the range they feed us is mapped. * XXX: we should implement read-only. */ static int mdcreate_preload(struct md_s *sc, struct md_ioctl *mdio) { if (mdio->md_options & ~(MD_AUTOUNIT | MD_FORCE)) return (EINVAL); sc->flags = mdio->md_options & MD_FORCE; /* Cast to pointer size, then to pointer to avoid warning */ sc->pl_ptr = (u_char *)(uintptr_t)mdio->md_base; sc->pl_len = (size_t)sc->mediasize; return (0); } static int mdcreate_malloc(struct md_s *sc, struct md_ioctl *mdio) { uintptr_t sp; int error; off_t u; error = 0; if (mdio->md_options & ~(MD_AUTOUNIT | MD_COMPRESS | MD_RESERVE)) return (EINVAL); if (mdio->md_sectorsize != 0 && !powerof2(mdio->md_sectorsize)) return (EINVAL); /* Compression doesn't make sense if we have reserved space */ if (mdio->md_options & MD_RESERVE) mdio->md_options &= ~MD_COMPRESS; if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; sc->flags = mdio->md_options & (MD_COMPRESS | MD_FORCE); sc->indir = dimension(sc->mediasize / sc->sectorsize); sc->uma = uma_zcreate(sc->name, sc->sectorsize, NULL, NULL, NULL, NULL, 0x1ff, 0); if (mdio->md_options & MD_RESERVE) { off_t nsectors; nsectors = sc->mediasize / sc->sectorsize; for (u = 0; u < nsectors; u++) { sp = (uintptr_t)uma_zalloc(sc->uma, M_NOWAIT | M_ZERO); if (sp != 0) error = s_write(sc->indir, u, sp); else error = ENOMEM; if (error != 0) break; } } return (error); } static int mdsetcred(struct md_s *sc, struct ucred *cred) { char *tmpbuf; int error = 0; /* * Set credits in our softc */ if (sc->cred) crfree(sc->cred); sc->cred = crhold(cred); /* * Horrible kludge to establish credentials for NFS XXX. */ if (sc->vnode) { struct uio auio; struct iovec aiov; tmpbuf = malloc(sc->sectorsize, M_TEMP, M_WAITOK); bzero(&auio, sizeof(auio)); aiov.iov_base = tmpbuf; aiov.iov_len = sc->sectorsize; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_resid = aiov.iov_len; vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); error = VOP_READ(sc->vnode, &auio, 0, sc->cred); VOP_UNLOCK(sc->vnode, 0); free(tmpbuf, M_TEMP); } return (error); } static int mdcreate_vnode(struct md_s *sc, struct md_ioctl *mdio, struct thread *td) { struct vattr vattr; struct nameidata nd; int error, flags, vfslocked; error = copyinstr(mdio->md_file, sc->file, sizeof(sc->file), NULL); if (error != 0) return (error); flags = FREAD|FWRITE; /* * If the user specified that this is a read only device, unset the * FWRITE mask before trying to open the backing store. */ if ((mdio->md_options & MD_READONLY) != 0) flags &= ~FWRITE; NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, sc->file, td); error = vn_open(&nd, &flags, 0, NULL); if (error != 0) return (error); vfslocked = NDHASGIANT(&nd); NDFREE(&nd, NDF_ONLY_PNBUF); if (nd.ni_vp->v_type != VREG) { error = EINVAL; goto bad; } error = VOP_GETATTR(nd.ni_vp, &vattr, td->td_ucred); if (error != 0) goto bad; if (VOP_ISLOCKED(nd.ni_vp) != LK_EXCLUSIVE) { vn_lock(nd.ni_vp, LK_UPGRADE | LK_RETRY); if (nd.ni_vp->v_iflag & VI_DOOMED) { /* Forced unmount. */ error = EBADF; goto bad; } } nd.ni_vp->v_vflag |= VV_MD; VOP_UNLOCK(nd.ni_vp, 0); if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; sc->flags = mdio->md_options & (MD_FORCE | MD_ASYNC); if (!(flags & FWRITE)) sc->flags |= MD_READONLY; sc->vnode = nd.ni_vp; error = mdsetcred(sc, td->td_ucred); if (error != 0) { sc->vnode = NULL; vn_lock(nd.ni_vp, LK_EXCLUSIVE | LK_RETRY); nd.ni_vp->v_vflag &= ~VV_MD; goto bad; } VFS_UNLOCK_GIANT(vfslocked); return (0); bad: VOP_UNLOCK(nd.ni_vp, 0); (void)vn_close(nd.ni_vp, flags, td->td_ucred, td); VFS_UNLOCK_GIANT(vfslocked); return (error); } static int mddestroy(struct md_s *sc, struct thread *td) { int vfslocked; if (sc->gp) { sc->gp->softc = NULL; g_topology_lock(); g_wither_geom(sc->gp, ENXIO); g_topology_unlock(); sc->gp = NULL; sc->pp = NULL; } if (sc->devstat) { devstat_remove_entry(sc->devstat); sc->devstat = NULL; } mtx_lock(&sc->queue_mtx); sc->flags |= MD_SHUTDOWN; wakeup(sc); while (!(sc->flags & MD_EXITING)) msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); mtx_unlock(&sc->queue_mtx); mtx_destroy(&sc->queue_mtx); if (sc->vnode != NULL) { vfslocked = VFS_LOCK_GIANT(sc->vnode->v_mount); vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); sc->vnode->v_vflag &= ~VV_MD; VOP_UNLOCK(sc->vnode, 0); (void)vn_close(sc->vnode, sc->flags & MD_READONLY ? FREAD : (FREAD|FWRITE), sc->cred, td); VFS_UNLOCK_GIANT(vfslocked); } if (sc->cred != NULL) crfree(sc->cred); if (sc->object != NULL) vm_object_deallocate(sc->object); if (sc->indir) destroy_indir(sc, sc->indir); if (sc->uma) uma_zdestroy(sc->uma); LIST_REMOVE(sc, list); free(sc, M_MD); return (0); } static int mdcreate_swap(struct md_s *sc, struct md_ioctl *mdio, struct thread *td) { vm_ooffset_t npage; int error; /* * Range check. Disallow negative sizes or any size less then the * size of a page. Then round to a page. */ if (sc->mediasize == 0 || (sc->mediasize % PAGE_SIZE) != 0) return (EDOM); /* * Allocate an OBJT_SWAP object. * * Note the truncation. */ npage = mdio->md_mediasize / PAGE_SIZE; if (mdio->md_fwsectors != 0) sc->fwsectors = mdio->md_fwsectors; if (mdio->md_fwheads != 0) sc->fwheads = mdio->md_fwheads; sc->object = vm_pager_allocate(OBJT_SWAP, NULL, PAGE_SIZE * npage, VM_PROT_DEFAULT, 0); if (sc->object == NULL) return (ENOMEM); sc->flags = mdio->md_options & MD_FORCE; if (mdio->md_options & MD_RESERVE) { if (swap_pager_reserve(sc->object, 0, npage) < 0) { vm_object_deallocate(sc->object); sc->object = NULL; return (EDOM); } } error = mdsetcred(sc, td->td_ucred); if (error != 0) { vm_object_deallocate(sc->object); sc->object = NULL; } return (error); } static int xmdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { struct md_ioctl *mdio; struct md_s *sc; int error, i; if (md_debug) printf("mdctlioctl(%s %lx %p %x %p)\n", devtoname(dev), cmd, addr, flags, td); mdio = (struct md_ioctl *)addr; if (mdio->md_version != MDIOVERSION) return (EINVAL); /* * We assert the version number in the individual ioctl * handlers instead of out here because (a) it is possible we * may add another ioctl in the future which doesn't read an * mdio, and (b) the correct return value for an unknown ioctl * is ENOIOCTL, not EINVAL. */ error = 0; switch (cmd) { case MDIOCATTACH: switch (mdio->md_type) { case MD_MALLOC: case MD_PRELOAD: case MD_VNODE: case MD_SWAP: break; default: return (EINVAL); } if (mdio->md_options & MD_AUTOUNIT) sc = mdnew(-1, &error, mdio->md_type); else sc = mdnew(mdio->md_unit, &error, mdio->md_type); if (sc == NULL) return (error); if (mdio->md_options & MD_AUTOUNIT) mdio->md_unit = sc->unit; sc->mediasize = mdio->md_mediasize; if (mdio->md_sectorsize == 0) sc->sectorsize = DEV_BSIZE; else sc->sectorsize = mdio->md_sectorsize; error = EDOOFUS; switch (sc->type) { case MD_MALLOC: sc->start = mdstart_malloc; error = mdcreate_malloc(sc, mdio); break; case MD_PRELOAD: sc->start = mdstart_preload; error = mdcreate_preload(sc, mdio); break; case MD_VNODE: sc->start = mdstart_vnode; error = mdcreate_vnode(sc, mdio, td); break; case MD_SWAP: sc->start = mdstart_swap; error = mdcreate_swap(sc, mdio, td); break; } if (error != 0) { mddestroy(sc, td); return (error); } /* Prune off any residual fractional sector */ i = sc->mediasize % sc->sectorsize; sc->mediasize -= i; mdinit(sc); return (0); case MDIOCDETACH: if (mdio->md_mediasize != 0 || (mdio->md_options & ~MD_FORCE) != 0) return (EINVAL); sc = mdfind(mdio->md_unit); if (sc == NULL) return (ENOENT); if (sc->opencount != 0 && !(sc->flags & MD_FORCE) && !(mdio->md_options & MD_FORCE)) return (EBUSY); return (mddestroy(sc, td)); case MDIOCQUERY: sc = mdfind(mdio->md_unit); if (sc == NULL) return (ENOENT); mdio->md_type = sc->type; mdio->md_options = sc->flags; mdio->md_mediasize = sc->mediasize; mdio->md_sectorsize = sc->sectorsize; if (sc->type == MD_VNODE) error = copyout(sc->file, mdio->md_file, strlen(sc->file) + 1); return (error); case MDIOCLIST: i = 1; LIST_FOREACH(sc, &md_softc_list, list) { if (i == MDNPAD - 1) mdio->md_pad[i] = -1; else mdio->md_pad[i++] = sc->unit; } mdio->md_pad[0] = i - 1; return (0); default: return (ENOIOCTL); }; } static int mdctlioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct thread *td) { int error; sx_xlock(&md_sx); error = xmdctlioctl(dev, cmd, addr, flags, td); sx_xunlock(&md_sx); return (error); } static void md_preloaded(u_char *image, size_t length) { struct md_s *sc; int error; sc = mdnew(-1, &error, MD_PRELOAD); if (sc == NULL) return; sc->mediasize = length; sc->sectorsize = DEV_BSIZE; sc->pl_ptr = image; sc->pl_len = length; sc->start = mdstart_preload; #ifdef MD_ROOT if (sc->unit == 0) rootdevnames[0] = "ufs:/dev/md0"; #endif mdinit(sc); } static void g_md_init(struct g_class *mp __unused) { caddr_t mod; caddr_t c; u_char *ptr, *name, *type; unsigned len; mod = NULL; sx_init(&md_sx, "MD config lock"); g_topology_unlock(); #ifdef MD_ROOT_SIZE sx_xlock(&md_sx); md_preloaded(mfs_root.start, sizeof(mfs_root.start)); sx_xunlock(&md_sx); #endif /* XXX: are preload_* static or do they need Giant ? */ while ((mod = preload_search_next_name(mod)) != NULL) { name = (char *)preload_search_info(mod, MODINFO_NAME); if (name == NULL) continue; type = (char *)preload_search_info(mod, MODINFO_TYPE); if (type == NULL) continue; if (strcmp(type, "md_image") && strcmp(type, "mfs_root")) continue; c = preload_search_info(mod, MODINFO_ADDR); ptr = *(u_char **)c; c = preload_search_info(mod, MODINFO_SIZE); len = *(size_t *)c; printf("%s%d: Preloaded image <%s> %d bytes at %p\n", MD_NAME, mdunits, name, len, ptr); sx_xlock(&md_sx); md_preloaded(ptr, len); sx_xunlock(&md_sx); } status_dev = make_dev(&mdctl_cdevsw, INT_MAX, UID_ROOT, GID_WHEEL, 0600, MDCTL_NAME); g_topology_lock(); } static void g_md_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, struct g_consumer *cp __unused, struct g_provider *pp) { struct md_s *mp; char *type; mp = gp->softc; if (mp == NULL) return; switch (mp->type) { case MD_MALLOC: type = "malloc"; break; case MD_PRELOAD: type = "preload"; break; case MD_VNODE: type = "vnode"; break; case MD_SWAP: type = "swap"; break; default: type = "unknown"; break; } if (pp != NULL) { if (indent == NULL) { sbuf_printf(sb, " u %d", mp->unit); sbuf_printf(sb, " s %ju", (uintmax_t) mp->sectorsize); sbuf_printf(sb, " f %ju", (uintmax_t) mp->fwheads); sbuf_printf(sb, " fs %ju", (uintmax_t) mp->fwsectors); sbuf_printf(sb, " l %ju", (uintmax_t) mp->mediasize); sbuf_printf(sb, " t %s", type); if (mp->type == MD_VNODE && mp->vnode != NULL) sbuf_printf(sb, " file %s", mp->file); } else { sbuf_printf(sb, "%s%d\n", indent, mp->unit); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->sectorsize); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwheads); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->fwsectors); sbuf_printf(sb, "%s%ju\n", indent, (uintmax_t) mp->mediasize); sbuf_printf(sb, "%s%s\n", indent, type); if (mp->type == MD_VNODE && mp->vnode != NULL) sbuf_printf(sb, "%s%s\n", indent, mp->file); } } } static void g_md_fini(struct g_class *mp __unused) { sx_destroy(&md_sx); if (status_dev != NULL) destroy_dev(status_dev); } diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index a74db11cdc2c..e64bcd2b66c9 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1,3580 +1,3590 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif #ifdef XEN /* XEN includes */ #include #include #include #include #include #include void Xhypervisor_callback(void); void failsafe_callback(void); extern trap_info_t trap_table[]; struct proc_ldt default_proc_ldt; extern int init_first; int running_xen = 1; extern unsigned long physfree; #endif /* XEN */ /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook", 7) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif sysenv = getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10); freeenv(sysenv); } else memsize = 0; if (memsize > 0) printf("real memory = %ju (%ju MB)\n", memsize << 10, memsize >> 10); else printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); #ifndef XEN cpu_setregs(); #endif mca_init(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(td->td_sigmask, scp->sc_mask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if (!tsc_present) return (EOPNOTSUPP); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } void (*cpu_idle_hook)(void) = NULL; /* ACPI idle hook. */ #ifdef XEN void cpu_halt(void) { HYPERVISOR_shutdown(SHUTDOWN_poweroff); } int scheduler_running; static void cpu_idle_hlt(int busy) { scheduler_running = 1; enable_intr(); idle_block(); } #else /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } static void cpu_idle_hlt(int busy) { /* * we must absolutely guarentee that hlt is the next instruction * after sti or we introduce a timing window. */ disable_intr(); if (sched_runnable()) enable_intr(); else __asm __volatile("sti; hlt"); } #endif static void cpu_idle_acpi(int busy) { disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(); else __asm __volatile("sti; hlt"); } static void cpu_idle_spin(int busy) { return; } #ifdef XEN void (*cpu_idle_fn)(int) = cpu_idle_hlt; #else void (*cpu_idle_fn)(int) = cpu_idle_acpi; #endif void cpu_idle(int busy) { #if defined(SMP) && !defined(XEN) if (mp_grab_cpu_hlt()) return; #endif cpu_idle_fn(busy); } /* * mwait cpu power states. Lower 4 bits are sub-states. */ #define MWAIT_C0 0xf0 #define MWAIT_C1 0x00 #define MWAIT_C2 0x10 #define MWAIT_C3 0x20 #define MWAIT_C4 0x30 #define MWAIT_DISABLED 0x0 #define MWAIT_WOKEN 0x1 #define MWAIT_WAITING 0x2 static void cpu_idle_mwait(int busy) { int *mwait; mwait = (int *)PCPU_PTR(monitorbuf); *mwait = MWAIT_WAITING; if (sched_runnable()) return; cpu_monitor(mwait, 0, 0); if (*mwait == MWAIT_WAITING) cpu_mwait(0, MWAIT_C1); } static void cpu_idle_mwait_hlt(int busy) { int *mwait; mwait = (int *)PCPU_PTR(monitorbuf); if (busy == 0) { *mwait = MWAIT_DISABLED; cpu_idle_hlt(busy); return; } *mwait = MWAIT_WAITING; if (sched_runnable()) return; cpu_monitor(mwait, 0, 0); if (*mwait == MWAIT_WAITING) cpu_mwait(0, MWAIT_C1); } int cpu_idle_wakeup(int cpu) { struct pcpu *pcpu; int *mwait; if (cpu_idle_fn == cpu_idle_spin) return (1); if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt) return (0); pcpu = pcpu_find(cpu); mwait = (int *)pcpu->pc_monitorbuf; /* * This doesn't need to be atomic since missing the race will * simply result in unnecessary IPIs. */ if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED) return (0); *mwait = MWAIT_WOKEN; return (1); } /* * Ordered by speed/power consumption. */ struct { void *id_fn; char *id_name; } idle_tbl[] = { { cpu_idle_spin, "spin" }, { cpu_idle_mwait, "mwait" }, { cpu_idle_mwait_hlt, "mwait_hlt" }, { cpu_idle_hlt, "hlt" }, { cpu_idle_acpi, "acpi" }, { NULL, NULL } }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; p += sprintf(p, "%s, ", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } static int idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16]; int error; char *p; int i; p = "unknown"; for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); for (i = 0; idle_tbl[i].id_name != NULL; i++) { if (strstr(idle_tbl[i].id_name, "mwait") && (cpu_feature2 & CPUID2_MON) == 0) continue; if (strcmp(idle_tbl[i].id_name, buf)) continue; cpu_idle_fn = idle_tbl[i].id_fn; return (0); } return (EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, idle_sysctl, "A", "currently selected idle function"); /* * Reset registers to default values on exec. */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; #ifdef XEN union descriptor *gdt; union descriptor *ldt; #else union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ #endif static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, #ifndef XEN /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, #endif /* !XEN */ }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { int i, physmap_idx; physmap_idx = *physmap_idxp; if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); if (smap->length == 0) return (1); #ifndef PAE if (smap->base >= 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(smap->length / 1024)); return (1); } #endif for (i = 0; i <= physmap_idx; i += 2) { if (smap->base < physmap[i + 1]) { if (boothowto & RB_VERBOSE) printf( "Overlapping or non-monotonic memory region, ignoring second region\n"); return (1); } } if (smap->base == physmap[physmap_idx + 1]) { physmap[physmap_idx + 1] += smap->length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } physmap[physmap_idx] = smap->base; physmap[physmap_idx + 1] = smap->base + smap->length; return (1); } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, off, physmap_idx, pa_indx, da_indx; int hasbrokenint12, has_smap; u_long physmem_tunable; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; struct bios_smap *smap, *smapbase, *smapend; u_int32_t smapsize; quad_t dcons_addr, dcons_size; caddr_t kmdp; has_smap = 0; #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif #if defined(XEN) has_smap = 0; Maxmem = xen_start_info->nr_pages - init_first; physmem = Maxmem; basemem = 0; physmap[0] = init_first << PAGE_SHIFT; physmap[1] = ptoa(Maxmem) - round_page(MSGBUF_SIZE); physmap_idx = 0; goto physmap_done; #endif hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Some newer BIOSes has broken INT 12H implementation which cause * kernel panic immediately. In this case, we need to scan SMAP * with INT 15:E820 first, then determine base memory size. */ if (hasbrokenint12) { goto int15e820; } /* * Perform "base memory" related probes & setup */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; int15e820: /* * Fetch the memory map with INT 15:E820. First, check to see * if the loader supplied it and use that if so. Otherwise, * use vm86 to invoke the BIOS call directly. */ physmap_idx = 0; smapbase = NULL; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); if (kmdp != NULL) smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { /* subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); has_smap = 1; for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } else { /* * map page 1 R/W into the kernel page table so we can use it * as a buffer. The kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); } /* * Perform "base memory" related probes & setup based on SMAP */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* * XXX this function is horribly organized and has to the same * things that it does above here. */ if (basemem == 0) basemem = 640; if (basemem > 640) { printf( "Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * Let vm86 scribble on pages between basemem and * ISA_HOLE_START, as above. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } if (physmap[1] != 0) goto physmap_done; /* * If we failed above, try memory map with INT 15:E801 */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #elif !defined(XEN) /* * Prefer the RTC value for extended memory. */ extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; #ifndef XEN /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); #else phys_avail[0] = physfree; phys_avail[1] = xen_start_info->nr_pages*PAGE_SIZE; #endif /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); PT_UPDATES_FLUSH(); } #ifdef XEN #define MTOPSIZE (1<<(14 + PAGE_SHIFT)) void init386(first) int first; { unsigned long gdtmachpfn; int error, gsel_tss, metadata_missing, x; struct pcpu *pc; struct callback_register event = { .type = CALLBACKTYPE_event, .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback }, }; struct callback_register failsafe = { .type = CALLBACKTYPE_failsafe, .address = {GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback }, }; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (xen_start_info->mod_start) { preload_metadata = (caddr_t)xen_start_info->mod_start; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if ((caddr_t)xen_start_info->cmd_line) kern_envp = xen_setbootenv((caddr_t)xen_start_info->cmd_line); boothowto |= xen_boothowto(kern_envp); /* Init basic tunables, hz etc */ init_param1(); /* * XEN occupies a portion of the upper virtual address space * At its base it manages an array mapping machine page frames * to physical page frames - hence we need to be able to * access 4GB - (64MB - 4MB + 64k) */ gdt_segs[GPRIV_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUFS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUGS_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUCODE_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GUDATA_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); gdt_segs[GBIOSLOWMEM_SEL].ssd_limit = atop(HYPERVISOR_VIRT_START + MTOPSIZE); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V | PG_RW); bzero(gdt, PAGE_SIZE); for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); gdtmachpfn = vtomach(gdt) >> PAGE_SHIFT; PT_SET_MA(gdt, xpmap_ptom(VTOP(gdt)) | PG_V); PANIC_IF(HYPERVISOR_set_gdt(&gdtmachpfn, 512) != 0); lgdt(&r_gdt); gdtset = 1; if ((error = HYPERVISOR_set_trap_table(trap_table)) != 0) { panic("set_trap_table failed - error %d\n", error); } error = HYPERVISOR_callback_op(CALLBACKOP_register, &event); if (error == 0) error = HYPERVISOR_callback_op(CALLBACKOP_register, &failsafe); #if CONFIG_XEN_COMPAT <= 0x030002 if (error == -ENOXENSYS) HYPERVISOR_set_callbacks(GSEL(GCODE_SEL, SEL_KPL), (unsigned long)Xhypervisor_callback, GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); #endif pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ PT_SET_MA(ldt, xpmap_ptom(VTOP(ldt)) | PG_V | PG_RW); bzero(ldt, PAGE_SIZE); ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); default_proc_ldt.ldt_base = (caddr_t)ldt; default_proc_ldt.ldt_len = 6; _default_ldt = (int)&default_proc_ldt; PCPU_SET(currentldt, _default_ldt) PT_SET_MA(ldt, *vtopte((unsigned long)ldt) & ~PG_RW); xen_set_ldt((unsigned long) ldt, (sizeof ldt_segs / sizeof ldt_segs[0])); #if defined(XEN_PRIVILEGED) /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); #endif /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA elcr_probe(); atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), PCPU_GET(common_tss.tss_esp0)); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0]; thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1]; } #else void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA elcr_probe(); atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #ifdef PAE dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #ifdef PAE thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } #endif void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); sdp = &td->td_pcb->pcb_gsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_fsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifndef DEV_APIC #include /* * Provide stub functions so that the MADT APIC enumerator in the acpi * kernel module will link against a kernel without 'device apic'. * * XXX - This is a gross hack. */ void apic_register_enumerator(struct apic_enumerator *enumerator) { } void * ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) { return (NULL); } int ioapic_disable_pin(void *cookie, u_int pin) { return (ENXIO); } int ioapic_get_vector(void *cookie, u_int pin) { return (-1); } void ioapic_register(void *cookie) { } int ioapic_remap_vector(void *cookie, u_int pin, int vector) { return (ENXIO); } int ioapic_set_extint(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_nmi(void *cookie, u_int pin) { return (ENXIO); } int ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) { return (ENXIO); } int ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) { return (ENXIO); } void lapic_create(u_int apic_id, int boot_cpu) { } void lapic_init(vm_paddr_t addr) { } int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode) { return (ENXIO); } int lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol) { return (ENXIO); } int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger) { return (ENXIO); } #endif #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 3c6e61d9872b..8ffdcf23ff1f 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -1,1540 +1,1555 @@ /*- * Copyright (c) 2003,2004 Marcel Moolenaar * Copyright (c) 2000,2001 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include u_int64_t processor_frequency; u_int64_t bus_frequency; u_int64_t itc_frequency; int cold = 1; u_int64_t pa_bootinfo; struct bootinfo bootinfo; struct pcpu pcpu0; extern u_int64_t kernel_text[], _end[]; extern u_int64_t ia64_gateway_page[]; extern u_int64_t break_sigtramp[]; extern u_int64_t epc_sigtramp[]; struct fpswa_iface *fpswa_iface; u_int64_t ia64_pal_base; u_int64_t ia64_port_base; static int ia64_inval_icache_needed; char machine[] = MACHINE; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static char cpu_model[64]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "The CPU model name"); static char cpu_family[64]; SYSCTL_STRING(_hw, OID_AUTO, family, CTLFLAG_RD, cpu_family, 0, "The CPU family name"); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); struct msgbuf *msgbufp = NULL; /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = NULL; long Maxmem = 0; long realmem = 0; #define PHYSMAP_SIZE (2 * VM_PHYSSEG_MAX) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(vm_offset_t)) - 2) struct kva_md_info kmi; #define Mhz 1000000L #define Ghz (1000L*Mhz) static void identifycpu(void) { char vendor[17]; char *family_name, *model_name; u_int64_t features, tmp; int number, revision, model, family, archrev; /* * Assumes little-endian. */ *(u_int64_t *) &vendor[0] = ia64_get_cpuid(0); *(u_int64_t *) &vendor[8] = ia64_get_cpuid(1); vendor[16] = '\0'; tmp = ia64_get_cpuid(3); number = (tmp >> 0) & 0xff; revision = (tmp >> 8) & 0xff; model = (tmp >> 16) & 0xff; family = (tmp >> 24) & 0xff; archrev = (tmp >> 32) & 0xff; family_name = model_name = "unknown"; switch (family) { case 0x07: family_name = "Itanium"; model_name = "Merced"; break; case 0x1f: family_name = "Itanium 2"; switch (model) { case 0x00: model_name = "McKinley"; break; case 0x01: /* * Deerfield is a low-voltage variant based on the * Madison core. We need circumstantial evidence * (i.e. the clock frequency) to identify those. * Allow for roughly 1% error margin. */ tmp = processor_frequency >> 7; if ((processor_frequency - tmp) < 1*Ghz && (processor_frequency + tmp) >= 1*Ghz) model_name = "Deerfield"; else model_name = "Madison"; break; case 0x02: model_name = "Madison II"; break; } break; case 0x20: ia64_inval_icache_needed = 1; family_name = "Itanium 2"; switch (model) { case 0x00: model_name = "Montecito"; break; } break; } snprintf(cpu_family, sizeof(cpu_family), "%s", family_name); snprintf(cpu_model, sizeof(cpu_model), "%s", model_name); features = ia64_get_cpuid(4); printf("CPU: %s (", model_name); if (processor_frequency) { printf("%ld.%02ld-Mhz ", (processor_frequency + 4999) / Mhz, ((processor_frequency + 4999) / (Mhz/100)) % 100); } printf("%s)\n", family_name); printf(" Origin = \"%s\" Revision = %d\n", vendor, revision); printf(" Features = 0x%b\n", (u_int32_t) features, "\020" "\001LB" /* long branch (brl) instruction. */ "\002SD" /* Spontaneous deferral. */ "\003AO" /* 16-byte atomic operations (ld, st, cmpxchg). */ ); } static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ identifycpu(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ld (%ld MB)\n", ia64_ptob(Maxmem), ia64_ptob(Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { long size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08lx - 0x%08lx, %ld bytes (%ld pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 >> PAGE_SHIFT); } } vm_ksubmap_init(&kmi); printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); if (fpswa_iface == NULL) printf("Warning: no FPSWA package supplied\n"); else printf("FPSWA Revision = 0x%lx, Entry = %p\n", (long)fpswa_iface->if_rev, (void *)fpswa_iface->if_fpswa); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); /* * Traverse the MADT to discover IOSAPIC and Local SAPIC * information. */ ia64_probe_sapics(); ia64_mca_init(); } void cpu_boot(int howto) { efi_reset_system(); } +void +cpu_flush_dcache(void *ptr, size_t len) +{ + vm_offset_t lim, va; + + va = (uintptr_t)ptr & ~31; + lim = (uintptr_t)ptr + len; + while (va < lim) { + ia64_fc(va); + va += 32; + } + + ia64_srlz_d(); +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); *rate = processor_frequency; return (0); } void cpu_halt() { efi_reset_system(); } void cpu_idle(int busy) { struct ia64_pal_result res; if (cpu_idle_hook != NULL) (*cpu_idle_hook)(); else res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); } int cpu_idle_wakeup(int cpu) { return (0); } void cpu_reset() { cpu_boot(0); } void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) { struct pcb *oldpcb, *newpcb; oldpcb = old->td_pcb; #ifdef COMPAT_IA32 ia32_savectx(oldpcb); #endif if (PCPU_GET(fpcurthread) == old) old->td_frame->tf_special.psr |= IA64_PSR_DFH; if (!savectx(oldpcb)) { old->td_lock = mtx; #if defined(SCHED_ULE) && defined(SMP) /* td_lock is volatile */ while (new->td_lock == &blocked_lock) ; #endif newpcb = new->td_pcb; oldpcb->pcb_current_pmap = pmap_switch(newpcb->pcb_current_pmap); PCPU_SET(curthread, new); #ifdef COMPAT_IA32 ia32_restorectx(newpcb); #endif if (PCPU_GET(fpcurthread) == new) new->td_frame->tf_special.psr &= ~IA64_PSR_DFH; restorectx(newpcb); /* We should not get here. */ panic("cpu_switch: restorectx() returned"); /* NOTREACHED */ } } void cpu_throw(struct thread *old __unused, struct thread *new) { struct pcb *newpcb; newpcb = new->td_pcb; (void)pmap_switch(newpcb->pcb_current_pmap); PCPU_SET(curthread, new); #ifdef COMPAT_IA32 ia32_restorectx(newpcb); #endif restorectx(newpcb); /* We should not get here. */ panic("cpu_throw: restorectx() returned"); /* NOTREACHED */ } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = cpuid; } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_intr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_intr); } void map_vhpt(uintptr_t vhpt) { pt_entry_t pte; uint64_t psr; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_RW; pte |= vhpt & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1" :: "r"(vhpt), "r"(IA64_ID_PAGE_SHIFT<<2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(vhpt); ia64_set_itir(IA64_ID_PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(2), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); } void map_pal_code(void) { pt_entry_t pte; uint64_t psr; if (ia64_pal_base == 0) return; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_RWX; pte |= ia64_pal_base & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(IA64_PHYS_TO_RR7(ia64_pal_base)), "r"(IA64_ID_PAGE_SHIFT<<2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(IA64_PHYS_TO_RR7(ia64_pal_base)); ia64_set_itir(IA64_ID_PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(1), "r"(pte)); ia64_srlz_d(); __asm __volatile("itr.i itr[%0]=%1" :: "r"(1), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); } void map_gateway_page(void) { pt_entry_t pte; uint64_t psr; pte = PTE_PRESENT | PTE_MA_WB | PTE_ACCESSED | PTE_DIRTY | PTE_PL_KERN | PTE_AR_X_RX; pte |= (uint64_t)ia64_gateway_page & PTE_PPN_MASK; __asm __volatile("ptr.d %0,%1; ptr.i %0,%1" :: "r"(VM_MAX_ADDRESS), "r"(PAGE_SHIFT << 2)); __asm __volatile("mov %0=psr" : "=r"(psr)); __asm __volatile("rsm psr.ic|psr.i"); ia64_srlz_i(); ia64_set_ifa(VM_MAX_ADDRESS); ia64_set_itir(PAGE_SHIFT << 2); ia64_srlz_d(); __asm __volatile("itr.d dtr[%0]=%1" :: "r"(3), "r"(pte)); ia64_srlz_d(); __asm __volatile("itr.i itr[%0]=%1" :: "r"(3), "r"(pte)); __asm __volatile("mov psr.l=%0" :: "r" (psr)); ia64_srlz_i(); /* Expose the mapping to userland in ar.k5 */ ia64_set_k5(VM_MAX_ADDRESS); } static void calculate_frequencies(void) { struct ia64_sal_result sal; struct ia64_pal_result pal; sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0); pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0); if (sal.sal_status == 0 && pal.pal_status == 0) { if (bootverbose) { printf("Platform clock frequency %ld Hz\n", sal.sal_result[0]); printf("Processor ratio %ld/%ld, Bus ratio %ld/%ld, " "ITC ratio %ld/%ld\n", pal.pal_result[0] >> 32, pal.pal_result[0] & ((1L << 32) - 1), pal.pal_result[1] >> 32, pal.pal_result[1] & ((1L << 32) - 1), pal.pal_result[2] >> 32, pal.pal_result[2] & ((1L << 32) - 1)); } processor_frequency = sal.sal_result[0] * (pal.pal_result[0] >> 32) / (pal.pal_result[0] & ((1L << 32) - 1)); bus_frequency = sal.sal_result[0] * (pal.pal_result[1] >> 32) / (pal.pal_result[1] & ((1L << 32) - 1)); itc_frequency = sal.sal_result[0] * (pal.pal_result[2] >> 32) / (pal.pal_result[2] & ((1L << 32) - 1)); } } struct ia64_init_return ia64_init(void) { struct ia64_init_return ret; int phys_avail_cnt; vm_offset_t kernstart, kernend; vm_offset_t kernstartpfn, kernendpfn, pfn0, pfn1; char *p; struct efi_md *md; int metadata_missing; /* NO OUTPUT ALLOWED UNTIL FURTHER NOTICE */ /* * TODO: Disable interrupts, floating point etc. * Maybe flush cache and tlb */ ia64_set_fpsr(IA64_FPSR_DEFAULT); /* * TODO: Get critical system information (if possible, from the * information provided by the boot program). */ /* * pa_bootinfo is the physical address of the bootinfo block as * passed to us by the loader and set in locore.s. */ bootinfo = *(struct bootinfo *)(IA64_PHYS_TO_RR7(pa_bootinfo)); if (bootinfo.bi_magic != BOOTINFO_MAGIC || bootinfo.bi_version != 1) { bzero(&bootinfo, sizeof(bootinfo)); bootinfo.bi_kernend = (vm_offset_t) round_page(_end); } /* * Look for the I/O ports first - we need them for console * probing. */ for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) { switch (md->md_type) { case EFI_MD_TYPE_IOPORT: ia64_port_base = IA64_PHYS_TO_RR6(md->md_phys); break; case EFI_MD_TYPE_PALCODE: ia64_pal_base = md->md_phys; break; } } metadata_missing = 0; if (bootinfo.bi_modulep) preload_metadata = (caddr_t)bootinfo.bi_modulep; else metadata_missing = 1; if (envmode == 0 && bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp; else kern_envp = static_env; /* * Look at arguments passed to us and compute boothowto. */ boothowto = bootinfo.bi_boothowto; /* * Catch case of boot_verbose set in environment. */ if ((p = getenv("boot_verbose")) != NULL) { if (strcmp(p, "yes") == 0 || strcmp(p, "YES") == 0) { boothowto |= RB_VERBOSE; } freeenv(p); } if (boothowto & RB_VERBOSE) bootverbose = 1; /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we * need to initialize at least pc_curthread. */ pcpup = &pcpu0; ia64_set_k4((u_int64_t)pcpup); pcpu_init(pcpup, 0, sizeof(pcpu0)); PCPU_SET(curthread, &thread0); /* * Initialize the console before we print anything out. */ cninit(); /* OUTPUT NOW ALLOWED */ if (ia64_pal_base != 0) { ia64_pal_base &= ~IA64_ID_PAGE_MASK; /* * We use a TR to map the first 256M of memory - this might * cover the palcode too. */ if (ia64_pal_base == 0) printf("PAL code mapped by the kernel's TR\n"); } else printf("PAL code not found\n"); /* * Wire things up so we can call the firmware. */ map_pal_code(); efi_boot_minimal(bootinfo.bi_systab); ia64_sal_init(); calculate_frequencies(); /* * Find the beginning and end of the kernel. */ kernstart = trunc_page(kernel_text); #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; kernend = (vm_offset_t)round_page(ksym_end); #else kernend = (vm_offset_t)round_page(_end); #endif /* But if the bootstrap tells us otherwise, believe it! */ if (bootinfo.bi_kernend) kernend = round_page(bootinfo.bi_kernend); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); /* Get FPSWA interface */ fpswa_iface = (bootinfo.bi_fpswa == 0) ? NULL : (struct fpswa_iface *)IA64_PHYS_TO_RR7(bootinfo.bi_fpswa); /* Init basic tunables, including hz */ init_param1(); p = getenv("kernelname"); if (p) { strncpy(kernelname, p, sizeof(kernelname) - 1); freeenv(p); } kernstartpfn = atop(IA64_RR_MASK(kernstart)); kernendpfn = atop(IA64_RR_MASK(kernend)); /* * Size the memory regions and load phys_avail[] with the results. */ /* * Find out how much memory is available, by looking at * the memory descriptors. */ #ifdef DEBUG_MD printf("Memory descriptor count: %d\n", mdcount); #endif phys_avail_cnt = 0; for (md = efi_md_first(); md != NULL; md = efi_md_next(md)) { #ifdef DEBUG_MD printf("MD %p: type %d pa 0x%lx cnt 0x%lx\n", md, md->md_type, md->md_phys, md->md_pages); #endif pfn0 = ia64_btop(round_page(md->md_phys)); pfn1 = ia64_btop(trunc_page(md->md_phys + md->md_pages * 4096)); if (pfn1 <= pfn0) continue; if (md->md_type != EFI_MD_TYPE_FREE) continue; /* * We have a memory descriptor that describes conventional * memory that is for general use. We must determine if the * loader has put the kernel in this region. */ physmem += (pfn1 - pfn0); if (pfn0 <= kernendpfn && kernstartpfn <= pfn1) { /* * Must compute the location of the kernel * within the segment. */ #ifdef DEBUG_MD printf("Descriptor %p contains kernel\n", mp); #endif if (pfn0 < kernstartpfn) { /* * There is a chunk before the kernel. */ #ifdef DEBUG_MD printf("Loading chunk before kernel: " "0x%lx / 0x%lx\n", pfn0, kernstartpfn); #endif phys_avail[phys_avail_cnt] = ia64_ptob(pfn0); phys_avail[phys_avail_cnt+1] = ia64_ptob(kernstartpfn); phys_avail_cnt += 2; } if (kernendpfn < pfn1) { /* * There is a chunk after the kernel. */ #ifdef DEBUG_MD printf("Loading chunk after kernel: " "0x%lx / 0x%lx\n", kernendpfn, pfn1); #endif phys_avail[phys_avail_cnt] = ia64_ptob(kernendpfn); phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1); phys_avail_cnt += 2; } } else { /* * Just load this cluster as one chunk. */ #ifdef DEBUG_MD printf("Loading descriptor %d: 0x%lx / 0x%lx\n", i, pfn0, pfn1); #endif phys_avail[phys_avail_cnt] = ia64_ptob(pfn0); phys_avail[phys_avail_cnt+1] = ia64_ptob(pfn1); phys_avail_cnt += 2; } } phys_avail[phys_avail_cnt] = 0; Maxmem = physmem; init_param2(physmem); /* * Initialize error message buffer (at end of core). */ msgbufp = (struct msgbuf *)pmap_steal_memory(MSGBUF_SIZE); msgbufinit(msgbufp, MSGBUF_SIZE); proc_linkup0(&proc0, &thread0); /* * Init mapping for kernel stack for proc 0 */ thread0.td_kstack = pmap_steal_memory(KSTACK_PAGES * PAGE_SIZE); thread0.td_kstack_pages = KSTACK_PAGES; mutex_init(); /* * Initialize the rest of proc 0's PCB. * * Set the kernel sp, reserving space for an (empty) trapframe, * and make proc0's trapframe pointer point to it for sanity. * Initialise proc0's backing store to start after u area. */ cpu_thread_alloc(&thread0); thread0.td_frame->tf_flags = FRAME_SYSCALL; thread0.td_pcb->pcb_special.sp = (u_int64_t)thread0.td_frame - 16; thread0.td_pcb->pcb_special.bspstore = thread0.td_kstack; /* * Initialize the virtual memory system. */ pmap_bootstrap(); /* * Initialize debuggers, and break into them if appropriate. */ kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger\n"); #endif ia64_set_tpr(0); ia64_srlz_d(); ret.bspstore = thread0.td_pcb->pcb_special.bspstore; ret.sp = thread0.td_pcb->pcb_special.sp; return (ret); } __volatile void * ia64_ioport_address(u_int port) { uint64_t addr; addr = (port > 0xffff) ? IA64_PHYS_TO_RR6((uint64_t)port) : ia64_port_base | ((port & 0xfffc) << 10) | (port & 0xFFF); return ((__volatile void *)addr); } uint64_t ia64_get_hcdp(void) { return (bootinfo.bi_hcdp); } void bzero(void *buf, size_t len) { caddr_t p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } void DELAY(int n) { u_int64_t start, end, now; sched_pin(); start = ia64_get_itc(); end = start + (itc_frequency * n) / 1000000; /* printf("DELAY from 0x%lx to 0x%lx\n", start, end); */ do { now = ia64_get_itc(); } while (now < end || (now > start && end < start)); sched_unpin(); } /* * Send an interrupt (signal) to a process. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *tf; struct sigacts *psp; struct sigframe sf, *sfp; u_int64_t sbs, sp; int oonstack; int sig; u_long code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_special.sp; oonstack = sigonstack(sp); sbs = 0; /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* * Allocate and validate space for the signal handler * context. Note that if the stack is in P0 space, the * call to grow() is a nop, and the useracc() check * will fail if the process has not already allocated * the space with a `brk'. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sbs = (u_int64_t)td->td_sigstk.ss_sp; sbs = (sbs + 15) & ~15; sfp = (struct sigframe *)(sbs + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe *)sp; sfp = (struct sigframe *)((u_int64_t)(sfp - 1) & ~15); /* Fill in the siginfo structure for POSIX handlers. */ if (SIGISMEMBER(psp->ps_siginfo, sig)) { sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* * XXX this shouldn't be here after code in trap.c * is fixed */ sf.sf_si.si_addr = (void*)tf->tf_special.ifa; code = (u_int64_t)&sfp->sf_si; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); /* Copy the frame out to userland. */ if (copyout(&sf, sfp, sizeof(sf)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); return; } if ((tf->tf_flags & FRAME_SYSCALL) == 0) { tf->tf_special.psr &= ~IA64_PSR_RI; tf->tf_special.iip = ia64_get_k5() + ((uint64_t)break_sigtramp - (uint64_t)ia64_gateway_page); } else tf->tf_special.iip = ia64_get_k5() + ((uint64_t)epc_sigtramp - (uint64_t)ia64_gateway_page); /* * Setup the trapframe to return to the signal trampoline. We pass * information to the trampoline in the following registers: * * gp new backing store or NULL * r8 signal number * r9 signal code or siginfo pointer * r10 signal handler (function descriptor) */ tf->tf_special.sp = (u_int64_t)sfp - 16; tf->tf_special.gp = sbs; tf->tf_special.bspstore = sf.sf_uc.uc_mcontext.mc_special.bspstore; tf->tf_special.ndirty = 0; tf->tf_special.rnat = sf.sf_uc.uc_mcontext.mc_special.rnat; tf->tf_scratch.gr8 = sig; tf->tf_scratch.gr9 = code; tf->tf_scratch.gr10 = (u_int64_t)catcher; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args /* { ucontext_t *sigcntxp; } */ *uap) { ucontext_t uc; struct trapframe *tf; struct proc *p; struct pcb *pcb; tf = td->td_frame; p = td->td_proc; pcb = td->td_pcb; /* * Fetch the entire context structure at once for speed. * We don't use a normal argument to simplify RSE handling. */ if (copyin(uap->sigcntxp, (caddr_t)&uc, sizeof(uc))) return (EFAULT); set_mcontext(td, &uc.uc_mcontext); PROC_LOCK(p); #if defined(COMPAT_43) if (sigonstack(tf->tf_special.sp)) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_special = tf->tf_special; pcb->pcb_special.__spare = ~0UL; /* XXX see unwind.c */ save_callee_saved(&pcb->pcb_preserved); save_callee_saved_fp(&pcb->pcb_preserved_fp); } int ia64_flush_dirty(struct thread *td, struct _special *r) { struct iovec iov; struct uio uio; uint64_t bspst, kstk, rnat; int error, locked; if (r->ndirty == 0) return (0); kstk = td->td_kstack + (r->bspstore & 0x1ffUL); if (td == curthread) { __asm __volatile("mov ar.rsc=0;;"); __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); /* Make sure we have all the user registers written out. */ if (bspst - kstk < r->ndirty) { __asm __volatile("flushrs;;"); __asm __volatile("mov %0=ar.bspstore" : "=r"(bspst)); } __asm __volatile("mov %0=ar.rnat;;" : "=r"(rnat)); __asm __volatile("mov ar.rsc=3"); error = copyout((void*)kstk, (void*)r->bspstore, r->ndirty); kstk += r->ndirty; r->rnat = (bspst > kstk && (bspst & 0x1ffL) < (kstk & 0x1ffL)) ? *(uint64_t*)(kstk | 0x1f8L) : rnat; } else { locked = PROC_LOCKED(td->td_proc); if (!locked) PHOLD(td->td_proc); iov.iov_base = (void*)(uintptr_t)kstk; iov.iov_len = r->ndirty; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = r->bspstore; uio.uio_resid = r->ndirty; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; error = proc_rwmem(td->td_proc, &uio); /* * XXX proc_rwmem() doesn't currently return ENOSPC, * so I think it can bogusly return 0. Neither do * we allow short writes. */ if (uio.uio_resid != 0 && error == 0) error = ENOSPC; if (!locked) PRELE(td->td_proc); } r->bspstore += r->ndirty; r->ndirty = 0; return (error); } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; int error; tf = td->td_frame; bzero(mc, sizeof(*mc)); mc->mc_special = tf->tf_special; error = ia64_flush_dirty(td, &mc->mc_special); if (tf->tf_flags & FRAME_SYSCALL) { mc->mc_flags |= _MC_FLAGS_SYSCALL_CONTEXT; mc->mc_scratch = tf->tf_scratch; if (flags & GET_MC_CLEAR_RET) { mc->mc_scratch.gr8 = 0; mc->mc_scratch.gr9 = 0; mc->mc_scratch.gr10 = 0; mc->mc_scratch.gr11 = 0; } } else { mc->mc_flags |= _MC_FLAGS_ASYNC_CONTEXT; mc->mc_scratch = tf->tf_scratch; mc->mc_scratch_fp = tf->tf_scratch_fp; /* * XXX If the thread never used the high FP registers, we * probably shouldn't waste time saving them. */ ia64_highfp_save(td); mc->mc_flags |= _MC_FLAGS_HIGHFP_VALID; mc->mc_high_fp = td->td_pcb->pcb_high_fp; } save_callee_saved(&mc->mc_preserved); save_callee_saved_fp(&mc->mc_preserved_fp); return (error); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct _special s; struct trapframe *tf; uint64_t psrmask; tf = td->td_frame; KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0, ("Whoa there! We have more than 8KB of dirty registers!")); s = mc->mc_special; /* * Only copy the user mask and the restart instruction bit from * the new context. */ psrmask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_RI; s.psr = (tf->tf_special.psr & ~psrmask) | (s.psr & psrmask); /* We don't have any dirty registers of the new context. */ s.ndirty = 0; if (mc->mc_flags & _MC_FLAGS_ASYNC_CONTEXT) { /* * We can get an async context passed to us while we * entered the kernel through a syscall: sigreturn(2) * takes contexts that could previously be the result of * a trap or interrupt. * Hence, we cannot assert that the trapframe is not * a syscall frame, but we can assert that it's at * least an expected syscall. */ if (tf->tf_flags & FRAME_SYSCALL) { KASSERT(tf->tf_scratch.gr15 == SYS_sigreturn, ("foo")); tf->tf_flags &= ~FRAME_SYSCALL; } tf->tf_scratch = mc->mc_scratch; tf->tf_scratch_fp = mc->mc_scratch_fp; if (mc->mc_flags & _MC_FLAGS_HIGHFP_VALID) td->td_pcb->pcb_high_fp = mc->mc_high_fp; } else { KASSERT((tf->tf_flags & FRAME_SYSCALL) != 0, ("foo")); if ((mc->mc_flags & _MC_FLAGS_SYSCALL_CONTEXT) == 0) { s.cfm = tf->tf_special.cfm; s.iip = tf->tf_special.iip; tf->tf_scratch.gr15 = 0; /* Clear syscall nr. */ } else tf->tf_scratch = mc->mc_scratch; } tf->tf_special = s; restore_callee_saved(&mc->mc_preserved); restore_callee_saved_fp(&mc->mc_preserved_fp); return (0); } /* * Clear registers on exec. */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; uint64_t *ksttop, *kst; tf = td->td_frame; ksttop = (uint64_t*)(td->td_kstack + tf->tf_special.ndirty + (tf->tf_special.bspstore & 0x1ffUL)); /* * We can ignore up to 8KB of dirty registers by masking off the * lower 13 bits in exception_restore() or epc_syscall(). This * should be enough for a couple of years, but if there are more * than 8KB of dirty registers, we lose track of the bottom of * the kernel stack. The solution is to copy the active part of * the kernel stack down 1 page (or 2, but not more than that) * so that we always have less than 8KB of dirty registers. */ KASSERT((tf->tf_special.ndirty & ~PAGE_MASK) == 0, ("Whoa there! We have more than 8KB of dirty registers!")); bzero(&tf->tf_special, sizeof(tf->tf_special)); if ((tf->tf_flags & FRAME_SYSCALL) == 0) { /* break syscalls. */ bzero(&tf->tf_scratch, sizeof(tf->tf_scratch)); bzero(&tf->tf_scratch_fp, sizeof(tf->tf_scratch_fp)); tf->tf_special.cfm = (1UL<<63) | (3UL<<7) | 3UL; tf->tf_special.bspstore = IA64_BACKINGSTORE; /* * Copy the arguments onto the kernel register stack so that * they get loaded by the loadrs instruction. Skip over the * NaT collection points. */ kst = ksttop - 1; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst-- = 0; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst-- = ps_strings; if (((uintptr_t)kst & 0x1ff) == 0x1f8) *kst-- = 0; *kst = stack; tf->tf_special.ndirty = (ksttop - kst) << 3; } else { /* epc syscalls (default). */ tf->tf_special.cfm = (3UL<<62) | (3UL<<7) | 3UL; tf->tf_special.bspstore = IA64_BACKINGSTORE + 24; /* * Write values for out0, out1 and out2 to the user's backing * store and arrange for them to be restored into the user's * initial register frame. * Assumes that (bspstore & 0x1f8) < 0x1e0. */ suword((caddr_t)tf->tf_special.bspstore - 24, stack); suword((caddr_t)tf->tf_special.bspstore - 16, ps_strings); suword((caddr_t)tf->tf_special.bspstore - 8, 0); } tf->tf_special.iip = entry; tf->tf_special.sp = (stack & ~15) - 16; tf->tf_special.rsc = 0xf; tf->tf_special.fpsr = IA64_FPSR_DEFAULT; tf->tf_special.psr = IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | IA64_PSR_DFH | IA64_PSR_BN | IA64_PSR_CPL_USER; } int ptrace_set_pc(struct thread *td, unsigned long addr) { uint64_t slot; switch (addr & 0xFUL) { case 0: slot = IA64_PSR_RI_0; break; case 1: /* XXX we need to deal with MLX bundles here */ slot = IA64_PSR_RI_1; break; case 2: slot = IA64_PSR_RI_2; break; default: return (EINVAL); } td->td_frame->tf_special.iip = addr & ~0x0FULL; td->td_frame->tf_special.psr = (td->td_frame->tf_special.psr & ~IA64_PSR_RI) | slot; return (0); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; /* * There's no way to set single stepping when we're leaving the * kernel through the EPC syscall path. The way we solve this is * by enabling the lower-privilege trap so that we re-enter the * kernel as soon as the privilege level changes. See trap.c for * how we proceed from there. */ tf = td->td_frame; if (tf->tf_flags & FRAME_SYSCALL) tf->tf_special.psr |= IA64_PSR_LP; else tf->tf_special.psr |= IA64_PSR_SS; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; /* * Clear any and all status bits we may use to implement single * stepping. */ tf = td->td_frame; tf->tf_special.psr &= ~IA64_PSR_SS; tf->tf_special.psr &= ~IA64_PSR_LP; tf->tf_special.psr &= ~IA64_PSR_TB; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; regs->r_special = tf->tf_special; regs->r_scratch = tf->tf_scratch; save_callee_saved(®s->r_preserved); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; int error; tf = td->td_frame; error = ia64_flush_dirty(td, &tf->tf_special); if (!error) { tf->tf_special = regs->r_special; tf->tf_special.bspstore += tf->tf_special.ndirty; tf->tf_special.ndirty = 0; tf->tf_scratch = regs->r_scratch; restore_callee_saved(®s->r_preserved); } return (error); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; /* Save the high FP registers. */ ia64_highfp_save(td); fpregs->fpr_scratch = frame->tf_scratch_fp; save_callee_saved_fp(&fpregs->fpr_preserved); fpregs->fpr_high = pcb->pcb_high_fp; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *frame = td->td_frame; struct pcb *pcb = td->td_pcb; /* Throw away the high FP registers (should be redundant). */ ia64_highfp_drop(td); frame->tf_scratch_fp = fpregs->fpr_scratch; restore_callee_saved_fp(&fpregs->fpr_preserved); pcb->pcb_high_fp = fpregs->fpr_high; return (0); } /* * High FP register functions. */ int ia64_highfp_drop(struct thread *td) { struct pcb *pcb; struct pcpu *cpu; struct thread *thr; mtx_lock_spin(&td->td_md.md_highfp_mtx); pcb = td->td_pcb; cpu = pcb->pcb_fpcpu; if (cpu == NULL) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); return (0); } pcb->pcb_fpcpu = NULL; thr = cpu->pc_fpcurthread; cpu->pc_fpcurthread = NULL; mtx_unlock_spin(&td->td_md.md_highfp_mtx); /* Post-mortem sanity checking. */ KASSERT(thr == td, ("Inconsistent high FP state")); return (1); } int ia64_highfp_save(struct thread *td) { struct pcb *pcb; struct pcpu *cpu; struct thread *thr; /* Don't save if the high FP registers weren't modified. */ if ((td->td_frame->tf_special.psr & IA64_PSR_MFH) == 0) return (ia64_highfp_drop(td)); mtx_lock_spin(&td->td_md.md_highfp_mtx); pcb = td->td_pcb; cpu = pcb->pcb_fpcpu; if (cpu == NULL) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); return (0); } #ifdef SMP if (td == curthread) sched_pin(); if (cpu != pcpup) { mtx_unlock_spin(&td->td_md.md_highfp_mtx); ipi_send(cpu, IPI_HIGH_FP); if (td == curthread) sched_unpin(); while (pcb->pcb_fpcpu == cpu) DELAY(100); return (1); } else { save_high_fp(&pcb->pcb_high_fp); if (td == curthread) sched_unpin(); } #else save_high_fp(&pcb->pcb_high_fp); #endif pcb->pcb_fpcpu = NULL; thr = cpu->pc_fpcurthread; cpu->pc_fpcurthread = NULL; mtx_unlock_spin(&td->td_md.md_highfp_mtx); /* Post-mortem sanity cxhecking. */ KASSERT(thr == td, ("Inconsistent high FP state")); return (1); } void ia64_invalidate_icache(vm_offset_t va, vm_offset_t sz) { vm_offset_t lim; if (!ia64_inval_icache_needed) return; lim = va + sz; while (va < lim) { ia64_fc_i(va); va += 32; /* XXX */ } ia64_sync_i(); ia64_srlz_i(); } diff --git a/sys/mips/mips/machdep.c b/sys/mips/mips/machdep.c index 9aa3044dc81c..6bd518018e7c 100644 --- a/sys/mips/mips/machdep.c +++ b/sys/mips/mips/machdep.c @@ -1,421 +1,431 @@ /* $OpenBSD: machdep.c,v 1.33 1998/09/15 10:58:54 pefo Exp $ */ /* tracked to 1.38 */ /* * Copyright (c) 1988 University of Utah. * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, The Mach Operating System project at * Carnegie-Mellon University and Ralph Campbell. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 8.3 (Berkeley) 1/12/94 * Id: machdep.c,v 1.33 1998/09/15 10:58:54 pefo Exp * JNPR: machdep.c,v 1.11.2.3 2007/08/29 12:24:49 */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include "opt_md.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #include #include #endif #include #include #define BOOTINFO_DEBUG 0 char machine[] = "mips"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); static char cpu_model[30]; SYSCTL_STRING(_hw, HW_MODEL, model, CTLFLAG_RD, cpu_model, 0, "Machine model"); int cold = 1; long realmem = 0; int cpu_clock = MIPS_DEFAULT_HZ; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &cpu_clock, 0, "CPU instruction clock rate"); int clocks_running = 0; vm_offset_t kstack0; #ifdef SMP struct pcpu __pcpu[32]; char pcpu_boot_stack[KSTACK_PAGES * PAGE_SIZE * (MAXCPU-1)]; #else struct pcpu pcpu; struct pcpu *pcpup = &pcpu; #endif vm_offset_t phys_avail[10]; #ifdef UNIMPLEMENTED struct platform platform; #endif vm_paddr_t mips_wired_tlb_physmem_start; vm_paddr_t mips_wired_tlb_physmem_end; u_int need_wired_tlb_page_pool; static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); struct kva_md_info kmi; int cpucfg; /* Value of processor config register */ int num_tlbentries = 64; /* Size of the CPU tlb */ int cputype; extern char MipsException[], MipsExceptionEnd[]; /* TLB miss handler address and end */ extern char MipsTLBMiss[], MipsTLBMissEnd[]; /* Cache error handler */ extern char MipsCache[], MipsCacheEnd[]; extern char edata[], end[]; u_int32_t bootdev; struct bootinfo bootinfo; static void cpu_startup(void *dummy) { if (boothowto & RB_VERBOSE) bootverbose++; bootverbose++; printf("real memory = %lu (%luK bytes)\n", ptoa(realmem), ptoa(realmem) / 1024); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %u bytes (%u pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %lu (%luMB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } /* * Shutdown the CPU as much as possible */ void cpu_reset(void) { platform_reset(); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) ; } SYSCTL_STRUCT(_machdep, CPU_BOOTINFO, bootinfo, CTLFLAG_RD, &bootinfo, bootinfo, "Bootinfo struct: kernel filename, BIOS harddisk geometry, etc"); #ifdef PORT_TO_JMIPS static int sysctl_machdep_adjkerntz(SYSCTL_HANDLER_ARGS) { } SYSCTL_PROC(_machdep, CPU_ADJKERNTZ, adjkerntz, CTLTYPE_INT | CTLFLAG_RW, &adjkerntz, 0, sysctl_machdep_adjkerntz, "I", "Local offset from GMT in seconds"); SYSCTL_INT(_machdep, CPU_DISRTCSET, disable_rtc_set, CTLFLAG_RW, &disable_rtc_set, 0, "Disable setting the real time clock to system time"); SYSCTL_INT(_machdep, CPU_WALLCLOCK, wall_cmos_clock, CTLFLAG_RW, &wall_cmos_clock, 0, "Wall CMOS clock assumed"); #endif /* PORT_TO_JMIPS */ /* * Initialize mips and configure to run kernel */ void mips_proc0_init(void) { proc_linkup(&proc0, &thread0); thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES - 1; if (thread0.td_kstack & (1 << PAGE_SHIFT)) thread0.td_md.md_realstack = thread0.td_kstack + PAGE_SIZE; else thread0.td_md.md_realstack = thread0.td_kstack; /* Initialize pcpu info of cpu-zero */ #ifdef SMP pcpu_init(&__pcpu[0], 0, sizeof(struct pcpu)); #else pcpu_init(pcpup, 0, sizeof(struct pcpu)); #endif /* * Do not use cpu_thread_alloc to initialize these fields * thread0 is the only thread that has kstack located in KSEG0 * while cpu_thread_alloc handles kstack allocated in KSEG2. */ thread0.td_pcb = (struct pcb *)(thread0.td_md.md_realstack + (thread0.td_kstack_pages - 1) * PAGE_SIZE) - 1; thread0.td_frame = &thread0.td_pcb->pcb_regs; /* * There is no need to initialize md_upte array for thread0 as it's * located in .bss section and should be explicitly zeroed during * kernel initialization. */ PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); } struct msgbuf *msgbufp=0; /* * Initialize the hardware exception vectors, and the jump table used to * call locore cache and TLB management functions, based on the kind * of CPU the kernel is running on. */ void mips_vector_init(void) { /* * Copy down exception vector code. */ if (MipsTLBMissEnd - MipsTLBMiss > 0x80) panic("startup: UTLB code too large"); if (MipsCacheEnd - MipsCache > 0x80) panic("startup: Cache error code too large"); bcopy(MipsTLBMiss, (void *)TLB_MISS_EXC_VEC, MipsTLBMissEnd - MipsTLBMiss); #ifdef TARGET_OCTEON /* Fake, but sufficient, for the 32-bit with 64-bit hardware addresses */ bcopy(MipsTLBMiss, (void *)XTLB_MISS_EXC_VEC, MipsTLBMissEnd - MipsTLBMiss); #endif bcopy(MipsException, (void *)GEN_EXC_VEC, MipsExceptionEnd - MipsException); bcopy(MipsCache, (void *)CACHE_ERR_EXC_VEC, MipsCacheEnd - MipsCache); /* * Clear out the I and D caches. */ mips_icache_sync_all(); mips_dcache_wbinv_all(); /* * Mask all interrupts. Each interrupt will be enabled * when handler is installed for it */ set_intr_mask (ALL_INT_MASK); /* Clear BEV in SR so we start handling our own exceptions */ mips_cp0_status_write(mips_cp0_status_read() & ~SR_BOOT_EXC_VEC); } /* * Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { #ifdef SMP if (cpuid != 0) pcpu->pc_boot_stack = (void *)(pcpu_boot_stack + cpuid * (KSTACK_PAGES * PAGE_SIZE)); #endif pcpu->pc_next_asid = 1; pcpu->pc_asid_generation = 1; } int sysarch(struct thread *td, register struct sysarch_args *uap) { return (ENOSYS); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on mips */ return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on mips */ return (ENOSYS); } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_intr = disableintr(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restoreintr(td->td_md.md_saved_intr); } u_int32_t get_cyclecount(void) { u_int32_t count; mfc0_macro(count, 9); return (count); } /* * call platform specific code to halt (until next interrupt) for the idle loop */ void cpu_idle(int busy) { if (mips_cp0_status_read() & SR_INT_ENAB) __asm __volatile ("wait"); else panic("ints disabled in idleproc!"); } void dumpsys(struct dumperinfo *di __unused) { printf("Kernel dumps not implemented on this architecture\n"); } int cpu_idle_wakeup(int cpu) { return (0); } diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c index 4c0a2c994c9b..faa9302c31d1 100644 --- a/sys/nfs/nfs_common.c +++ b/sys/nfs/nfs_common.c @@ -1,337 +1,338 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include enum vtype nv3tov_type[8]= { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO }; nfstype nfsv3_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFSOCK, NFFIFO, NFNON }; static void *nfsm_dissect_xx_sub(int s, struct mbuf **md, caddr_t *dpos, int how); u_quad_t nfs_curusec(void) { struct timeval tv; getmicrotime(&tv); return ((u_quad_t)tv.tv_sec * 1000000 + (u_quad_t)tv.tv_usec); } /* * copies mbuf chain to the uio scatter/gather list */ int nfsm_mbuftouio(struct mbuf **mrep, struct uio *uiop, int siz, caddr_t *dpos) { char *mbufcp, *uiocp; int xfer, left, len; struct mbuf *mp; long uiosiz, rem; int error = 0; mp = *mrep; mbufcp = *dpos; len = mtod(mp, caddr_t)+mp->m_len-mbufcp; rem = nfsm_rndup(siz)-siz; while (siz > 0) { if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL) return (EFBIG); left = uiop->uio_iov->iov_len; uiocp = uiop->uio_iov->iov_base; if (left > siz) left = siz; uiosiz = left; while (left > 0) { while (len == 0) { mp = mp->m_next; if (mp == NULL) return (EBADRPC); mbufcp = mtod(mp, caddr_t); len = mp->m_len; } xfer = (left > len) ? len : left; #ifdef notdef /* Not Yet.. */ if (uiop->uio_iov->iov_op != NULL) (*(uiop->uio_iov->iov_op)) (mbufcp, uiocp, xfer); else #endif - if (uiop->uio_segflg == UIO_SYSSPACE) + if (uiop->uio_segflg == UIO_SYSSPACE) { bcopy(mbufcp, uiocp, xfer); - else + cpu_flush_dcache(uiocp, xfer); + } else copyout(mbufcp, uiocp, xfer); left -= xfer; len -= xfer; mbufcp += xfer; uiocp += xfer; uiop->uio_offset += xfer; uiop->uio_resid -= xfer; } if (uiop->uio_iov->iov_len <= siz) { uiop->uio_iovcnt--; uiop->uio_iov++; } else { uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + uiosiz; uiop->uio_iov->iov_len -= uiosiz; } siz -= uiosiz; } *dpos = mbufcp; *mrep = mp; if (rem > 0) { if (len < rem) error = nfs_adv(mrep, dpos, rem, len); else *dpos += rem; } return (error); } /* * Help break down an mbuf chain by setting the first siz bytes contiguous * pointed to by returned val. * This is used by the macros nfsm_dissect for tough * cases. (The macros use the vars. dpos and dpos2) */ void * nfsm_disct(struct mbuf **mdp, caddr_t *dposp, int siz, int left, int how) { struct mbuf *mp, *mp2; int siz2, xfer; caddr_t ptr, npos = NULL; void *ret; mp = *mdp; while (left == 0) { *mdp = mp = mp->m_next; if (mp == NULL) return NULL; left = mp->m_len; *dposp = mtod(mp, caddr_t); } if (left >= siz) { ret = *dposp; *dposp += siz; } else if (mp->m_next == NULL) { return NULL; } else if (siz > MHLEN) { panic("nfs S too big"); } else { MGET(mp2, how, MT_DATA); if (mp2 == NULL) return NULL; mp2->m_len = siz; mp2->m_next = mp->m_next; mp->m_next = mp2; mp->m_len -= left; mp = mp2; ptr = mtod(mp, caddr_t); ret = ptr; bcopy(*dposp, ptr, left); /* Copy what was left */ siz2 = siz-left; ptr += left; mp2 = mp->m_next; npos = mtod(mp2, caddr_t); /* Loop around copying up the siz2 bytes */ while (siz2 > 0) { if (mp2 == NULL) return NULL; xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2; if (xfer > 0) { bcopy(mtod(mp2, caddr_t), ptr, xfer); mp2->m_data += xfer; mp2->m_len -= xfer; ptr += xfer; siz2 -= xfer; } if (siz2 > 0) { mp2 = mp2->m_next; if (mp2 != NULL) npos = mtod(mp2, caddr_t); } } *mdp = mp2; *dposp = mtod(mp2, caddr_t); if (!nfsm_aligned(*dposp, u_int32_t)) { bcopy(*dposp, npos, mp2->m_len); mp2->m_data = npos; *dposp = npos; } } return ret; } /* * Advance the position in the mbuf chain. */ int nfs_adv(struct mbuf **mdp, caddr_t *dposp, int offs, int left) { struct mbuf *m; int s; m = *mdp; s = left; while (s < offs) { offs -= s; m = m->m_next; if (m == NULL) return (EBADRPC); s = m->m_len; } *mdp = m; *dposp = mtod(m, caddr_t)+offs; return (0); } void * nfsm_build_xx(int s, struct mbuf **mb, caddr_t *bpos) { struct mbuf *mb2; void *ret; if (s > M_TRAILINGSPACE(*mb)) { MGET(mb2, M_WAIT, MT_DATA); if (s > MLEN) panic("build > MLEN"); (*mb)->m_next = mb2; *mb = mb2; (*mb)->m_len = 0; *bpos = mtod(*mb, caddr_t); } ret = *bpos; (*mb)->m_len += s; *bpos += s; return ret; } void * nfsm_dissect_xx(int s, struct mbuf **md, caddr_t *dpos) { return nfsm_dissect_xx_sub(s, md, dpos, M_WAIT); } void * nfsm_dissect_xx_nonblock(int s, struct mbuf **md, caddr_t *dpos) { return nfsm_dissect_xx_sub(s, md, dpos, M_DONTWAIT); } static void * nfsm_dissect_xx_sub(int s, struct mbuf **md, caddr_t *dpos, int how) { int t1; char *cp2; void *ret; t1 = mtod(*md, caddr_t) + (*md)->m_len - *dpos; if (t1 >= s) { ret = *dpos; *dpos += s; return ret; } cp2 = nfsm_disct(md, dpos, s, t1, how); return cp2; } int nfsm_strsiz_xx(int *s, int m, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; tl = nfsm_dissect_xx(NFSX_UNSIGNED, mb, bpos); if (tl == NULL) return EBADRPC; *s = fxdr_unsigned(int32_t, *tl); if (*s > m) return EBADRPC; return 0; } int nfsm_adv_xx(int s, struct mbuf **md, caddr_t *dpos) { int t1; t1 = mtod(*md, caddr_t) + (*md)->m_len - *dpos; if (t1 >= s) { *dpos += s; return 0; } t1 = nfs_adv(md, dpos, s, t1); if (t1) return t1; return 0; } diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index ba3d04796c62..0025a8aa52d5 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1,2817 +1,2827 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atalk.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_ipx.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_msgbuf.h" #include "opt_npx.h" #include "opt_perfmon.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef DEV_ISA #include #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern void init386(int first); extern void dblfault_handler(void); extern void printcpuinfo(void); /* XXX header file */ extern void finishidentcpu(void); extern void panicifcpuunsupported(void); extern void initializecpu(void); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp); static int set_fpcontext(struct thread *td, const mcontext_t *mcp); #ifdef CPU_ENABLE_SSE static void set_fpregs_xmm(struct save87 *, struct savexmm *); static void fill_fpregs_xmm(struct savexmm *, struct save87 *); #endif /* CPU_ENABLE_SSE */ SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int _udatasel, _ucodesel; u_int basemem; static int ispc98 = 1; SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; static void cpu_startup(dummy) void *dummy; { /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ju (%ju MB)\n", ptoa((uintmax_t)Maxmem), ptoa((uintmax_t)Maxmem) / 1048576); realmem = Maxmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)cnt.v_free_count), ptoa((uintmax_t)cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); mca_init(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct osigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; regs->tf_eip = PS_STRINGS - szosigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; struct proc *p = td->td_proc; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; PROC_LOCK(p); #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif SIGSETOLD(td->td_sigmask, scp->sc_mask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct proc *p = td->td_proc; struct trapframe *regs; const struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("freebsd4_sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("freebsd4_sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p = td->td_proc; struct trapframe *regs; const ucontext_t *ucp; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_eflags for faults. Debuggers * should sometimes set it there too. tf_eflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ if (!EFL_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) { printf("sigreturn: eflags = 0x%x\n", eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { printf("sigreturn: cs = 0x%x\n", cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } ret = set_fpcontext(td, &ucp->uc_mcontext); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } PROC_LOCK(p); #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif td->td_sigmask = ucp->uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); return (EJUSTRETURN); } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* Not applicable */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { register_t reg; uint64_t tsc1, tsc2; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); if (!tsc_present) return (EOPNOTSUPP); /* If we're booting, trust the rate calibrated moments ago. */ if (cold) { *rate = tsc_freq; return (0); } #ifdef SMP /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); #ifdef SMP thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); #endif /* * Calculate the difference in readings, convert to Mhz, and * subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) __asm__ ("hlt"); } /* * Hook to idle the CPU when possible. In the SMP case we default to * off because a halted cpu will not currently pick up a new thread in the * run queue until the next timer tick. If turned on this will result in * approximately a 4.2% loss in real time performance in buildworld tests * (but improves user and sys times oddly enough), and saves approximately * 5% in power consumption on an idle machine (tests w/2xCPU 1.1GHz P3). * * XXX we need to have a cpu mask of idle cpus and generate an IPI or * otherwise generate some sort of interrupt to wake up cpus sitting in HLT. * Then we can have our cake and eat it too. * * XXX I'm turning it on for SMP as well by default for now. It seems to * help lock contention somewhat, and this is critical for HTT. -Peter */ static int cpu_idle_hlt = 1; TUNABLE_INT("machdep.cpu_idle_hlt", &cpu_idle_hlt); SYSCTL_INT(_machdep, OID_AUTO, cpu_idle_hlt, CTLFLAG_RW, &cpu_idle_hlt, 0, "Idle loop HLT enable"); static void cpu_idle_default(void) { /* * we must absolutely guarentee that hlt is the * absolute next instruction after sti or we * introduce a timing window. */ __asm __volatile("sti; hlt"); } /* * Note that we have to be careful here to avoid a race between checking * sched_runnable() and actually halting. If we don't do this, we may waste * the time between calling hlt and the next interrupt even though there * is a runnable process. */ void cpu_idle(int busy) { #ifdef SMP if (mp_grab_cpu_hlt()) return; #endif if (cpu_idle_hlt) { disable_intr(); if (sched_runnable()) enable_intr(); else (*cpu_idle_hook)(); } } int cpu_idle_wakeup(int cpu) { return (0); } /* Other subsystems (e.g., ACPI) can hook this later. */ void (*cpu_idle_hook)(void) = cpu_idle_default; /* * Reset registers to default values on exec. */ void exec_setregs(td, entry, stack, ps_strings) struct thread *td; u_long entry; u_long stack; u_long ps_strings; { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = entry; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == PCPU_GET(curpcb)) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } /* * Initialize the math emulator (if any) for the current process. * Actually, just clear the bit that says that the emulator has * been initialized. Initialization is delayed until the process * traps to the emulator (if it is done at all) mainly because * emulators don't provide an entry point for initialization. */ td->td_pcb->pcb_flags &= ~FP_SOFTFP; pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) extern int has_f00f_bug; #endif static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(int first) { int i, off, physmap_idx, pa_indx, da_indx; int pg_n; u_long physmem_tunable; u_int extmem, under16; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size; bzero(physmap, sizeof(physmap)); /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case EPSON_PC486_HX: case EPSON_PC486_HG: case EPSON_PC486_HA: pg_n = 0; break; } } /* * Perform "base memory" related probes & setup */ under16 = pc98_getmemsize(&basemem, &extmem); if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * if basemem != 640, map pages r/w into vm86 page table so * that the bios can scribble on it. */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* * We need to divide chunk if Maxmem is larger than 16MB and * under 16MB area is not full of memory. * (1) system area (15-16MB region) is cut off * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY") */ if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) { /* 15M - 16M region is cut off, so need to divide chunk */ physmap[physmap_idx + 1] = under16 * 1024; physmap_idx += 2; physmap[physmap_idx] = 0x1000000; physmap[physmap_idx + 1] = physmap[2] + extmem * 1024; } /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | pg_n; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(MSGBUF_SIZE) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(MSGBUF_SIZE); /* Map the message buffer. */ for (off = 0; off < round_page(MSGBUF_SIZE); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } void init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x; struct pcpu *pc; thread0.td_kstack = proc0kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); /* * Initialize DMAC */ pc98_init_dmac(); metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (envmode == 1) kern_envp = static_env; else if (bootinfo.bi_envp) kern_envp = (caddr_t)bootinfo.bi_envp + KERNBASE; /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(curpcb, thread0.td_pcb); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); /* * Initialize the i8254 before the console so that console * initialization can use DELAY(). */ i8254_init(); /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA atpic_startup(); #endif #ifdef DDB ksym_start = bootinfo.bi_symtab; ksym_end = bootinfo.bi_esymtab; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE - sizeof(struct pcb) - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_cr3 = (int)IdlePTD; dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ msgbufinit(msgbufp, MSGBUF_SIZE); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_cr3 = (int)IdlePTD; thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_flags = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_alloc(kernel_map, PAGE_SIZE * 2); if (tmp == 0) panic("kmem_alloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE, VM_PROT_READ, FALSE) != KERN_SUCCESS) panic("vm_map_protect failed"); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; regs->r_gs = pcb->pcb_gs; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } #ifdef CPU_ENABLE_SSE static void fill_fpregs_xmm(sv_xmm, sv_87) struct savexmm *sv_xmm; struct save87 *sv_87; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; bzero(sv_87, sizeof(*sv_87)); /* FPU control/status */ penv_87->en_cw = penv_xmm->en_cw; penv_87->en_sw = penv_xmm->en_sw; penv_87->en_tw = penv_xmm->en_tw; penv_87->en_fip = penv_xmm->en_fip; penv_87->en_fcs = penv_xmm->en_fcs; penv_87->en_opcode = penv_xmm->en_opcode; penv_87->en_foo = penv_xmm->en_foo; penv_87->en_fos = penv_xmm->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_87->sv_ac[i] = sv_xmm->sv_fp[i].fp_acc; } static void set_fpregs_xmm(sv_87, sv_xmm) struct save87 *sv_87; struct savexmm *sv_xmm; { register struct env87 *penv_87 = &sv_87->sv_env; register struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* FPU control/status */ penv_xmm->en_cw = penv_87->en_cw; penv_xmm->en_sw = penv_87->en_sw; penv_xmm->en_tw = penv_87->en_tw; penv_xmm->en_fip = penv_87->en_fip; penv_xmm->en_fcs = penv_87->en_fcs; penv_xmm->en_opcode = penv_87->en_opcode; penv_xmm->en_foo = penv_87->en_foo; penv_xmm->en_fos = penv_87->en_fos; /* FPU registers */ for (i = 0; i < 8; ++i) sv_xmm->sv_fp[i].fp_acc = sv_87->sv_ac[i]; } #endif /* CPU_ENABLE_SSE */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { fill_fpregs_xmm(&td->td_pcb->pcb_save.sv_xmm, (struct save87 *)fpregs); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(&td->td_pcb->pcb_save.sv_87, fpregs, sizeof *fpregs); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) { set_fpregs_xmm((struct save87 *)fpregs, &td->td_pcb->pcb_save.sv_xmm); return (0); } #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &td->td_pcb->pcb_save.sv_87, sizeof *fpregs); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp); sdp = &td->td_pcb->pcb_gsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_fsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct trapframe *tp; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if ((ret = set_fpcontext(td, mcp)) == 0) { tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; ret = 0; } return (ret); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; #else union savefpu *addr; /* * XXX mc_fpstate might be misaligned, since its declaration is not * unportabilized using __attribute__((aligned(16))) like the * declaration of struct savemm, and anyway, alignment doesn't work * for auto variables since we don't use gcc's pessimal stack * alignment. Work around this by abusing the spare fields after * mcp->mc_fpstate. * * XXX unpessimize most cases by only aligning when fxsave might be * called, although this requires knowing too much about * npxgetregs()'s internals. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); } mcp->mc_ownedfp = npxgetregs(td, addr); if (addr != (union savefpu *)&mcp->mc_fpstate) { bcopy(addr, &mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); bzero(&mcp->mc_spare2, sizeof(mcp->mc_spare2)); } mcp->mc_fpformat = npxformat(); #endif } static int set_fpcontext(struct thread *td, const mcontext_t *mcp) { union savefpu *addr; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { /* XXX align as above. */ addr = (union savefpu *)&mcp->mc_fpstate; if (td == PCPU_GET(fpcurthread) && #ifdef CPU_ENABLE_SSE cpu_fxsr && #endif ((uintptr_t)(void *)addr & 0xF)) { do addr = (void *)((char *)addr + 4); while ((uintptr_t)(void *)addr & 0xF); bcopy(&mcp->mc_fpstate, addr, sizeof(mcp->mc_fpstate)); } #ifdef DEV_NPX #ifdef CPU_ENABLE_SSE if (cpu_fxsr) addr->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif /* * XXX we violate the dubious requirement that npxsetregs() * be called with interrupts disabled. */ npxsetregs(td, addr); #endif /* * Don't bother putting things back where they were in the * misaligned case, since we know that the caller won't use * them again. */ } else return (EINVAL); return (0); } static void fpstate_drop(struct thread *td) { register_t s; s = intr_disable(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~PCB_NPXINITDONE; intr_restore(s); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c index 590741ce4cbe..662fd1445420 100644 --- a/sys/powerpc/aim/machdep.c +++ b/sys/powerpc/aim/machdep.c @@ -1,1172 +1,1182 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int cold = 1; int cacheline_size = 32; int ppc64 = 0; int hw_direct_map = 1; struct pcpu __pcpu[MAXCPU]; static struct trapframe frame0; char machine[] = "powerpc"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, CTLFLAG_RD, &cacheline_size, 0, ""); u_int powerpc_init(u_int, u_int, u_int, void *); int save_ofw_mapping(void); int restore_ofw_mapping(void); void install_extint(void (*)(void)); int setfault(faultbuf); /* defined in locore.S */ static int grab_mcontext(struct thread *, mcontext_t *, int); void asm_panic(char *); long Maxmem = 0; long realmem = 0; struct pmap ofw_pmap; extern int ofmsr; struct bat battable[16]; struct kva_md_info kmi; static void powerpc_ofw_shutdown(void *junk, int howto) { if (howto & RB_HALT) { OF_halt(); } OF_reboot(); } static void cpu_startup(void *dummy) { /* * Initialise the decrementer-based clock. */ decr_init(); /* * Good {morning,afternoon,evening,night}. */ cpu_setup(PCPU_GET(cpuid)); #ifdef PERFMON perfmon_init(); #endif printf("real memory = %ld (%ld MB)\n", ptoa(physmem), ptoa(physmem) / 1048576); realmem = physmem; /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { int size1 = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size1, size1 / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, powerpc_ofw_shutdown, 0, SHUTDOWN_PRI_LAST); } extern char kernel_text[], _end[]; extern void *testppc64, *testppc64size; extern void *restorebridge, *restorebridgesize; extern void *rfid_patch, *rfi_patch1, *rfi_patch2; #ifdef SMP extern void *rstcode, *rstsize; #endif extern void *trapcode, *trapcode64, *trapsize; extern void *alitrap, *alisize; extern void *dsitrap, *dsisize; extern void *decrint, *decrsize; extern void *extint, *extsize; extern void *dblow, *dbsize; u_int powerpc_init(u_int startkernel, u_int endkernel, u_int basekernel, void *mdp) { struct pcpu *pc; vm_offset_t end; void *generictrap; size_t trap_offset; void *kmdp; char *env; uint32_t msr, scratch; uint8_t *cache_check; end = 0; kmdp = NULL; trap_offset = 0; /* * Parse metadata if present and fetch parameters. Must be done * before console is inited so cninit gets the right value of * boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif } } /* * Init params/tunables that can be overridden by the loader */ init_param1(); /* * Start initializing proc0 and thread0. */ proc_linkup0(&proc0, &thread0); thread0.td_frame = &frame0; /* * Set up per-cpu data. */ pc = __pcpu; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_cpuid = 0; __asm __volatile("mtsprg 0, %0" :: "r"(pc)); /* * Init mutexes, which we use heavily in PMAP */ mutex_init(); /* * Install the OF client interface */ OF_bootstrap(); /* * Initialize the console before printing anything. */ cninit(); /* * Complain if there is no metadata. */ if (mdp == NULL || kmdp == NULL) { printf("powerpc_init: no loader metadata.\n"); } /* * Init KDB */ kdb_init(); /* * PowerPC 970 CPUs have a misfeature requested by Apple that makes * them pretend they have a 32-byte cacheline. Turn this off * before we measure the cacheline size. */ switch (mfpvr() >> 16) { case IBM970: case IBM970FX: case IBM970MP: case IBM970GX: scratch = mfspr64upper(SPR_HID5,msr); scratch &= ~HID5_970_DCBZ_SIZE_HI; mtspr64(SPR_HID5, scratch, mfspr(SPR_HID5), msr); break; } /* * Initialize the interrupt tables and figure out our cache line * size and whether or not we need the 64-bit bridge code. */ /* * Disable translation in case the vector area hasn't been * mapped (G5). */ msr = mfmsr(); mtmsr(msr & ~(PSL_IR | PSL_DR)); isync(); /* * Measure the cacheline size using dcbz * * Use EXC_PGM as a playground. We are about to overwrite it * anyway, we know it exists, and we know it is cache-aligned. */ cache_check = (void *)EXC_PGM; for (cacheline_size = 0; cacheline_size < 0x100; cacheline_size++) cache_check[cacheline_size] = 0xff; __asm __volatile("dcbz %0,0":: "r" (cache_check) : "memory"); /* Find the first byte dcbz did not zero to get the cache line size */ for (cacheline_size = 0; cacheline_size < 0x100 && cache_check[cacheline_size] == 0; cacheline_size++); /* * Figure out whether we need to use the 64 bit PMAP. This works by * executing an instruction that is only legal on 64-bit PPC (mtmsrd), * and setting ppc64 = 0 if that causes a trap. */ ppc64 = 1; bcopy(&testppc64, (void *)EXC_PGM, (size_t)&testppc64size); __syncicache((void *)EXC_PGM, (size_t)&testppc64size); __asm __volatile("\ mfmsr %0; \ mtsprg2 %1; \ \ mtmsrd %0; \ mfsprg2 %1;" : "=r"(scratch), "=r"(ppc64)); /* * Now copy restorebridge into all the handlers, if necessary, * and set up the trap tables. */ if (ppc64) { /* Patch the two instances of rfi -> rfid */ bcopy(&rfid_patch,&rfi_patch1,4); #ifdef KDB /* rfi_patch2 is at the end of dbleave */ bcopy(&rfid_patch,&rfi_patch2,4); #endif /* * Copy a code snippet to restore 32-bit bridge mode * to the top of every non-generic trap handler */ trap_offset += (size_t)&restorebridgesize; bcopy(&restorebridge, (void *)EXC_RST, trap_offset); bcopy(&restorebridge, (void *)EXC_DSI, trap_offset); bcopy(&restorebridge, (void *)EXC_ALI, trap_offset); bcopy(&restorebridge, (void *)EXC_PGM, trap_offset); bcopy(&restorebridge, (void *)EXC_MCHK, trap_offset); bcopy(&restorebridge, (void *)EXC_TRC, trap_offset); bcopy(&restorebridge, (void *)EXC_BPT, trap_offset); /* * Set the common trap entry point to the one that * knows to restore 32-bit operation on execution. */ generictrap = &trapcode64; } else { generictrap = &trapcode; } #ifdef SMP bcopy(&rstcode, (void *)(EXC_RST + trap_offset), (size_t)&rstsize); #else bcopy(generictrap, (void *)EXC_RST, (size_t)&trapsize); #endif #ifdef KDB bcopy(&dblow, (void *)(EXC_MCHK + trap_offset), (size_t)&dbsize); bcopy(&dblow, (void *)(EXC_PGM + trap_offset), (size_t)&dbsize); bcopy(&dblow, (void *)(EXC_TRC + trap_offset), (size_t)&dbsize); bcopy(&dblow, (void *)(EXC_BPT + trap_offset), (size_t)&dbsize); #else bcopy(generictrap, (void *)EXC_MCHK, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_PGM, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_TRC, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_BPT, (size_t)&trapsize); #endif bcopy(&dsitrap, (void *)(EXC_DSI + trap_offset), (size_t)&dsisize); bcopy(&alitrap, (void *)(EXC_ALI + trap_offset), (size_t)&alisize); bcopy(generictrap, (void *)EXC_ISI, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_EXI, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_FPU, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_DECR, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_SC, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_FPA, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_VEC, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_VECAST, (size_t)&trapsize); bcopy(generictrap, (void *)EXC_THRM, (size_t)&trapsize); __syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD); /* * Restore MSR */ mtmsr(msr); isync(); /* * Choose a platform module so we can get the physical memory map. */ platform_probe_and_attach(); /* * Initialise virtual memory. Use BUS_PROBE_GENERIC priority * in case the platform module had a better idea of what we * should do. */ if (ppc64) pmap_mmu_install(MMU_TYPE_G5, BUS_PROBE_GENERIC); else pmap_mmu_install(MMU_TYPE_OEA, BUS_PROBE_GENERIC); pmap_bootstrap(startkernel, endkernel); mtmsr(mfmsr() | PSL_IR|PSL_DR|PSL_ME|PSL_RI); isync(); /* * Initialize params/tunables that are derived from memsize */ init_param2(physmem); /* * Grab booted kernel's name */ env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Finish setting up thread0. */ thread0.td_pcb = (struct pcb *) ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~15); bzero((void *)thread0.td_pcb, sizeof(struct pcb)); pc->pc_curpcb = thread0.td_pcb; /* Initialise the message buffer. */ msgbufinit(msgbufp, MSGBUF_SIZE); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif return (((uintptr_t)thread0.td_pcb - 16) & ~15); } void bzero(void *buf, size_t len) { caddr_t p; p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct proc *p; int oonstack, rndfsize; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->fixreg[1]); rndfsize = ((sizeof(sf) + 15) / 16) * 16; CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* * Save user context */ memset(&sf, 0, sizeof(sf)); grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; /* * Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - rndfsize); } else { sfp = (struct sigframe *)(tf->fixreg[1] - rndfsize); } /* * Translate the signal if appropriate (Linux emu ?) */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Save the floating-point state, if necessary, then copy it. */ /* XXX */ /* * Set up the registers to return to sigcode. * * r1/sp - sigframe ptr * lr - sig function, dispatched to by blrl in trampoline * r3 - sig number * r4 - SIGINFO ? &siginfo : exception code * r5 - user context * srr0 - trampoline function addr */ tf->lr = (register_t)catcher; tf->fixreg[1] = (register_t)sfp; tf->fixreg[FIRSTARG] = sig; tf->fixreg[FIRSTARG+2] = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. */ tf->fixreg[FIRSTARG+1] = (register_t)&sfp->sf_si; /* * Fill siginfo structure. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; sf.sf_si.si_addr = (void *)((tf->exc == EXC_DSI) ? tf->cpu.aim.dar : tf->srr0); } else { /* Old FreeBSD-style arguments. */ tf->fixreg[FIRSTARG+1] = code; tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? tf->cpu.aim.dar : tf->srr0; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); tf->srr0 = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); /* * copy the frame out to userland. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { /* * Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); } CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->srr0, tf->fixreg[1]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; ucontext_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); p = td->td_proc; PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_lr = tf->srr0; pcb->pcb_sp = tf->fixreg[1]; } /* * get_mcontext/sendsig helper routine that doesn't touch the * proc lock */ static int grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; pcb = td->td_pcb; memset(mcp, 0, sizeof(mcontext_t)); mcp->mc_vers = _MC_VERSION; mcp->mc_flags = 0; memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); if (flags & GET_MC_CLEAR_RET) { mcp->mc_gpr[3] = 0; mcp->mc_gpr[4] = 0; } /* * This assumes that floating-point context is *not* lazy, * so if the thread has used FP there would have been a * FP-unavailable exception that would have set things up * correctly. */ if (pcb->pcb_flags & PCB_FPU) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_fpu(td); critical_exit(); mcp->mc_flags |= _MC_FP_VALID; memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); memcpy(mcp->mc_fpreg, pcb->pcb_fpu.fpr, 32*sizeof(double)); } /* * Repeat for Altivec context */ if (pcb->pcb_flags & PCB_VEC) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_vec(td); critical_exit(); mcp->mc_flags |= _MC_AV_VALID; mcp->mc_vscr = pcb->pcb_vec.vscr; mcp->mc_vrsave = pcb->pcb_vec.vrsave; memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec)); } mcp->mc_len = sizeof(*mcp); return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { int error; error = grab_mcontext(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tf; pcb = td->td_pcb; tf = td->td_frame; if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) return (EINVAL); /* * Don't let the user set privileged MSR bits */ if ((mcp->mc_srr1 & PSL_USERSTATIC) != (tf->srr1 & PSL_USERSTATIC)) { return (EINVAL); } memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); if (mcp->mc_flags & _MC_FP_VALID) { if ((pcb->pcb_flags & PCB_FPU) != PCB_FPU) { critical_enter(); enable_fpu(td); critical_exit(); } memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double)); memcpy(pcb->pcb_fpu.fpr, mcp->mc_fpreg, 32*sizeof(double)); } if (mcp->mc_flags & _MC_AV_VALID) { if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) { critical_enter(); enable_vec(td); critical_exit(); } pcb->pcb_vec.vscr = mcp->mc_vscr; pcb->pcb_vec.vrsave = mcp->mc_vrsave; memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec)); } return (0); } void cpu_boot(int howto) { } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + void cpu_initclocks(void) { decr_tc_init(); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Shutdown the CPU as much as possible. */ void cpu_halt(void) { OF_exit(); } void cpu_idle(int busy) { uint32_t msr; msr = mfmsr(); #ifdef INVARIANTS if ((msr & PSL_EE) != PSL_EE) { struct thread *td = curthread; printf("td msr %x\n", td->td_md.md_saved_msr); panic("ints disabled in idleproc!"); } #endif if (powerpc_pow_enabled) { powerpc_sync(); mtmsr(msr | PSL_POW); isync(); } } int cpu_idle_wakeup(int cpu) { return (0); } /* * Set set up registers on exec. */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct ps_strings arginfo; tf = trapframe(td); bzero(tf, sizeof *tf); tf->fixreg[1] = -roundup(-stack + 8, 16); /* * XXX Machine-independent code has already copied arguments and * XXX environment to userland. Get them back here. */ (void)copyin((char *)PS_STRINGS, &arginfo, sizeof(arginfo)); /* * Set up arguments for _start(): * _start(argc, argv, envp, obj, cleanup, ps_strings); * * Notes: * - obj and cleanup are the auxilliary and termination * vectors. They are fixed up by ld.elf_so. * - ps_strings is a NetBSD extention, and will be * ignored by executables which are strictly * compliant with the SVR4 ABI. * * XXX We have to set both regs and retval here due to different * XXX calling convention in trap.c and init_main.c. */ /* * XXX PG: these get overwritten in the syscall return code. * execve() should return EJUSTRETURN, like it does on NetBSD. * Emulate by setting the syscall return value cells. The * registers still have to be set for init's fork trampoline. */ td->td_retval[0] = arginfo.ps_nargvstr; td->td_retval[1] = (register_t)arginfo.ps_argvstr; tf->fixreg[3] = arginfo.ps_nargvstr; tf->fixreg[4] = (register_t)arginfo.ps_argvstr; tf->fixreg[5] = (register_t)arginfo.ps_envstr; tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)PS_STRINGS; /* NetBSD extension */ tf->srr0 = entry; tf->srr1 = PSL_MBO | PSL_USERSET | PSL_FE_DFLT; td->td_pcb->pcb_flags = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(regs, tf, sizeof(struct reg)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPU) == 0) memset(fpregs, 0, sizeof(struct fpreg)); else memcpy(fpregs, &pcb->pcb_fpu, sizeof(struct fpreg)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(tf, regs, sizeof(struct reg)); return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPU) == 0) enable_fpu(td); memcpy(&pcb->pcb_fpu, fpregs, sizeof(struct fpreg)); return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { struct trapframe *tf; tf = td->td_frame; tf->srr0 = (register_t)addr; return (0); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_SE; return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_SE; return (0); } void kdb_cpu_clear_singlestep(void) { kdb_frame->srr1 &= ~PSL_SE; } void kdb_cpu_set_singlestep(void) { kdb_frame->srr1 |= PSL_SE; } /* * Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_msr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_msr); } /* * kcopy(const void *src, void *dst, size_t len); * * Copy len bytes from src to dst, aborting if we encounter a fatal * page fault. * * kcopy() _must_ save and restore the old fault handler since it is * called by uiomove(), which may be in the path of servicing a non-fatal * page fault. */ int kcopy(const void *src, void *dst, size_t len) { struct thread *td; faultbuf env, *oldfault; int rv; td = PCPU_GET(curthread); oldfault = td->td_pcb->pcb_onfault; if ((rv = setfault(env)) != 0) { td->td_pcb->pcb_onfault = oldfault; return rv; } memcpy(dst, src, len); td->td_pcb->pcb_onfault = oldfault; return (0); } void asm_panic(char *pstr) { panic(pstr); } int db_trap_glue(struct trapframe *); /* Called from trap_subr.S */ int db_trap_glue(struct trapframe *frame) { if (!(frame->srr1 & PSL_PR) && (frame->exc == EXC_TRC || frame->exc == EXC_RUNMODETRC || (frame->exc == EXC_PGM && (frame->srr1 & 0x20000)) || frame->exc == EXC_BPT || frame->exc == EXC_DSI)) { int type = frame->exc; if (type == EXC_PGM && (frame->srr1 & 0x20000)) { type = T_BREAKPOINT; } return (kdb_trap(type, 0, frame)); } return (0); } diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c index 3d3dcadc6749..6d37653fcbc4 100644 --- a/sys/powerpc/booke/machdep.c +++ b/sys/powerpc/booke/machdep.c @@ -1,1004 +1,1014 @@ /*- * Copyright (C) 2006 Semihalf, Marian Balakowicz * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif extern unsigned char kernel_text[]; extern unsigned char _etext[]; extern unsigned char _edata[]; extern unsigned char __bss_start[]; extern unsigned char __sbss_start[]; extern unsigned char __sbss_end[]; extern unsigned char _end[]; extern void dcache_enable(void); extern void dcache_inval(void); extern void icache_enable(void); extern void icache_inval(void); struct kva_md_info kmi; struct pcpu __pcpu[MAXCPU]; struct trapframe frame0; int cold = 1; long realmem = 0; long Maxmem = 0; struct bootinfo *bootinfo; char machine[] = "powerpc"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, ""); int cacheline_size = 32; SYSCTL_INT(_machdep, CPU_CACHELINE, cacheline_size, CTLFLAG_RD, &cacheline_size, 0, ""); int hw_direct_map = 0; int ppc64 = 0; static void cpu_e500_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_e500_startup, NULL); void print_kernel_section_addr(void); void print_bootinfo(void); void print_kenv(void); u_int e500_init(u_int32_t, u_int32_t, void *); static void cpu_e500_startup(void *dummy) { int indx, size; /* Initialise the decrementer-based clock. */ decr_init(); /* Good {morning,afternoon,evening,night}. */ cpu_setup(PCPU_GET(cpuid)); printf("real memory = %ld (%ld MB)\n", ptoa(physmem), ptoa(physmem) / 1048576); realmem = physmem; /* Display any holes after the first chunk of extended memory. */ if (bootverbose) { printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { size = phys_avail[indx + 1] - phys_avail[indx]; printf("0x%08x - 0x%08x, %d bytes (%d pages)\n", phys_avail[indx], phys_avail[indx + 1] - 1, size, size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ld (%ld MB)\n", ptoa(cnt.v_free_count), ptoa(cnt.v_free_count) / 1048576); /* Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); } static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } void print_kenv(void) { int len; char *cp; debugf("loader passed (static) kenv:\n"); if (kern_envp == NULL) { debugf(" no env, null ptr\n"); return; } debugf(" kern_envp = 0x%08x\n", (u_int32_t)kern_envp); len = 0; for (cp = kern_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (u_int32_t)cp, cp); } void print_bootinfo(void) { struct bi_mem_region *mr; struct bi_eth_addr *eth; int i, j; debugf("bootinfo:\n"); if (bootinfo == NULL) { debugf(" no bootinfo, null ptr\n"); return; } debugf(" version = 0x%08x\n", bootinfo->bi_version); debugf(" ccsrbar = 0x%08x\n", bootinfo->bi_bar_base); debugf(" cpu_clk = 0x%08x\n", bootinfo->bi_cpu_clk); debugf(" bus_clk = 0x%08x\n", bootinfo->bi_bus_clk); debugf(" mem regions:\n"); mr = (struct bi_mem_region *)bootinfo->bi_data; for (i = 0; i < bootinfo->bi_mem_reg_no; i++, mr++) debugf(" #%d, base = 0x%08x, size = 0x%08x\n", i, mr->mem_base, mr->mem_size); debugf(" eth addresses:\n"); eth = (struct bi_eth_addr *)mr; for (i = 0; i < bootinfo->bi_eth_addr_no; i++, eth++) { debugf(" #%d, addr = ", i); for (j = 0; j < 6; j++) debugf("%02x ", eth->mac_addr[j]); debugf("\n"); } } void print_kernel_section_addr(void) { debugf("kernel image addresses:\n"); debugf(" kernel_text = 0x%08x\n", (uint32_t)kernel_text); debugf(" _etext (sdata) = 0x%08x\n", (uint32_t)_etext); debugf(" _edata = 0x%08x\n", (uint32_t)_edata); debugf(" __sbss_start = 0x%08x\n", (uint32_t)__sbss_start); debugf(" __sbss_end = 0x%08x\n", (uint32_t)__sbss_end); debugf(" __sbss_start = 0x%08x\n", (uint32_t)__bss_start); debugf(" _end = 0x%08x\n", (uint32_t)_end); } struct bi_mem_region * bootinfo_mr(void) { return ((struct bi_mem_region *)bootinfo->bi_data); } struct bi_eth_addr * bootinfo_eth(void) { struct bi_mem_region *mr; struct bi_eth_addr *eth; int i; /* Advance to the eth section */ mr = bootinfo_mr(); for (i = 0; i < bootinfo->bi_mem_reg_no; i++, mr++) ; eth = (struct bi_eth_addr *)mr; return (eth); } u_int e500_init(u_int32_t startkernel, u_int32_t endkernel, void *mdp) { struct pcpu *pc; void *kmdp; vm_offset_t end; uint32_t csr; kmdp = NULL; end = endkernel; /* * Parse metadata and fetch parameters. This must be done as the first * step as we need bootinfo data to at least init the console */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { bootinfo = (struct bootinfo *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_BOOTINFO); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); #endif } } else { /* * We should scream but how? - without CCSR bar (in bootinfo) * cannot even output anything... */ /* * FIXME add return value and handle in the locore so we can * return to the loader maybe? (this seems not very easy to * restore everything as the TLB have all been reprogrammed * in the locore etc...) */ while(1); } /* Initialize TLB1 handling */ tlb1_init(bootinfo->bi_bar_base); /* Init params/tunables that can be overridden by the loader. */ init_param1(); /* Start initializing proc0 and thread0. */ proc_linkup(&proc0, &thread0); thread0.td_frame = &frame0; /* Set up per-cpu data and store the pointer in SPR general 0. */ pc = &__pcpu[0]; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; __asm __volatile("mtsprg 0, %0" :: "r"(pc)); /* Initialize system mutexes. */ mutex_init(); /* Initialize the console before printing anything. */ cninit(); /* Print out some debug info... */ debugf("e500_init: console initialized\n"); debugf(" arg1 startkernel = 0x%08x\n", startkernel); debugf(" arg2 endkernel = 0x%08x\n", endkernel); debugf(" arg3 mdp = 0x%08x\n", (u_int32_t)mdp); debugf(" end = 0x%08x\n", (u_int32_t)end); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" kernel ccsrbar = 0x%08x\n", CCSRBAR_VA); debugf(" MSR = 0x%08x\n", mfmsr()); debugf(" HID0 = 0x%08x\n", mfspr(SPR_HID0)); debugf(" HID1 = 0x%08x\n", mfspr(SPR_HID1)); print_bootinfo(); print_kernel_section_addr(); print_kenv(); //tlb1_print_entries(); //tlb1_print_tlbentries(); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif /* Initialise platform module */ platform_probe_and_attach(); /* Initialise virtual memory. */ pmap_mmu_install(MMU_TYPE_BOOKE, 0); pmap_bootstrap(startkernel, end); debugf("MSR = 0x%08x\n", mfmsr()); //tlb1_print_entries(); //tlb1_print_tlbentries(); /* Initialize params/tunables that are derived from memsize. */ init_param2(physmem); /* Finish setting up thread0. */ thread0.td_pcb = (struct pcb *) ((thread0.td_kstack + thread0.td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~15); bzero((void *)thread0.td_pcb, sizeof(struct pcb)); pc->pc_curpcb = thread0.td_pcb; /* Initialise the message buffer. */ msgbufinit(msgbufp, MSGBUF_SIZE); /* Enable Machine Check interrupt. */ mtmsr(mfmsr() | PSL_ME); isync(); /* Enable D-cache if applicable */ csr = mfspr(SPR_L1CSR0); if ((csr & L1CSR0_DCE) == 0) { dcache_inval(); dcache_enable(); } csr = mfspr(SPR_L1CSR0); if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR0_DCE) == 0) printf("L1 D-cache %sabled\n", (csr & L1CSR0_DCE) ? "en" : "dis"); /* Enable L1 I-cache if applicable. */ csr = mfspr(SPR_L1CSR1); if ((csr & L1CSR1_ICE) == 0) { icache_inval(); icache_enable(); } csr = mfspr(SPR_L1CSR1); if ((boothowto & RB_VERBOSE) != 0 || (csr & L1CSR1_ICE) == 0) printf("L1 I-cache %sabled\n", (csr & L1CSR1_ICE) ? "en" : "dis"); debugf("e500_init: SP = 0x%08x\n", ((uintptr_t)thread0.td_pcb - 16) & ~15); debugf("e500_init: e\n"); return (((uintptr_t)thread0.td_pcb - 16) & ~15); } /* Initialise a struct pcpu. */ void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t sz) { pcpu->pc_tid_next = TID_MIN; } /* Set set up registers on exec. */ void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct ps_strings arginfo; tf = trapframe(td); bzero(tf, sizeof *tf); tf->fixreg[1] = -roundup(-stack + 8, 16); /* * XXX Machine-independent code has already copied arguments and * XXX environment to userland. Get them back here. */ (void)copyin((char *)PS_STRINGS, &arginfo, sizeof(arginfo)); /* * Set up arguments for _start(): * _start(argc, argv, envp, obj, cleanup, ps_strings); * * Notes: * - obj and cleanup are the auxilliary and termination * vectors. They are fixed up by ld.elf_so. * - ps_strings is a NetBSD extention, and will be * ignored by executables which are strictly * compliant with the SVR4 ABI. * * XXX We have to set both regs and retval here due to different * XXX calling convention in trap.c and init_main.c. */ /* * XXX PG: these get overwritten in the syscall return code. * execve() should return EJUSTRETURN, like it does on NetBSD. * Emulate by setting the syscall return value cells. The * registers still have to be set for init's fork trampoline. */ td->td_retval[0] = arginfo.ps_nargvstr; td->td_retval[1] = (register_t)arginfo.ps_argvstr; tf->fixreg[3] = arginfo.ps_nargvstr; tf->fixreg[4] = (register_t)arginfo.ps_argvstr; tf->fixreg[5] = (register_t)arginfo.ps_envstr; tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)PS_STRINGS; /* NetBSD extension */ tf->srr0 = entry; tf->srr1 = PSL_USERSET; td->td_pcb->pcb_flags = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(regs, tf, sizeof(struct reg)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { return (0); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_lr = tf->srr0; pcb->pcb_sp = tf->fixreg[1]; } /* * get_mcontext/sendsig helper routine that doesn't touch the * proc lock. */ static int grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; pcb = td->td_pcb; memset(mcp, 0, sizeof(mcontext_t)); mcp->mc_vers = _MC_VERSION; mcp->mc_flags = 0; memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); if (flags & GET_MC_CLEAR_RET) { mcp->mc_gpr[3] = 0; mcp->mc_gpr[4] = 0; } /* XXX Altivec context ? */ mcp->mc_len = sizeof(*mcp); return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { int error; error = grab_mcontext(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } int set_mcontext(struct thread *td, const mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tf; pcb = td->td_pcb; tf = td->td_frame; if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) return (EINVAL); memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); /* XXX Altivec context? */ return (0); } int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; ucontext_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); p = td->td_proc; PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * cpu_idle * * Set Wait state enable. */ void cpu_idle (int busy) { register_t msr; msr = mfmsr(); #ifdef INVARIANTS if ((msr & PSL_EE) != PSL_EE) { struct thread *td = curthread; printf("td msr %x\n", td->td_md.md_saved_msr); panic("ints disabled in idleproc!"); } #endif #if 0 /* * Freescale E500 core RM section 6.4.1 */ msr = msr | PSL_WE; __asm__(" msync;" " mtmsr %0;" " isync;" "loop: b loop" : /* no output */ : "r" (msr)); #endif } int cpu_idle_wakeup(int cpu) { return (0); } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) td->td_md.md_saved_msr = intr_disable(); td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(td->td_md.md_saved_msr); } /* Shutdown the CPU as much as possible. */ void cpu_halt(void) { mtmsr(mfmsr() & ~(PSL_CE | PSL_EE | PSL_ME | PSL_DE)); while (1); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(tf, regs, sizeof(struct reg)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { struct trapframe *tf; tf = td->td_frame; tf->srr0 = (register_t)addr; return (0); } int ptrace_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 |= PSL_DE; tf->cpu.booke.dbcr0 |= (DBCR0_IDM | DBCR0_IC); return (0); } int ptrace_clear_single_step(struct thread *td) { struct trapframe *tf; tf = td->td_frame; tf->srr1 &= ~PSL_DE; tf->cpu.booke.dbcr0 &= ~(DBCR0_IDM | DBCR0_IC); return (0); } void kdb_cpu_clear_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r & ~DBCR0_IC); kdb_frame->srr1 &= ~PSL_DE; } void kdb_cpu_set_singlestep(void) { register_t r; r = mfspr(SPR_DBCR0); mtspr(SPR_DBCR0, r | DBCR0_IC | DBCR0_IDM); kdb_frame->srr1 |= PSL_DE; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct proc *p; int oonstack, rndfsize; int sig, code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->fixreg[1]); rndfsize = ((sizeof(sf) + 15) / 16) * 16; CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* * Save user context */ memset(&sf, 0, sizeof(sf)); grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; /* * Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)((caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - rndfsize); } else { sfp = (struct sigframe *)(tf->fixreg[1] - rndfsize); } /* * Translate the signal if appropriate (Linux emu ?) */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* * Save the floating-point state, if necessary, then copy it. */ /* XXX */ /* * Set up the registers to return to sigcode. * * r1/sp - sigframe ptr * lr - sig function, dispatched to by blrl in trampoline * r3 - sig number * r4 - SIGINFO ? &siginfo : exception code * r5 - user context * srr0 - trampoline function addr */ tf->lr = (register_t)catcher; tf->fixreg[1] = (register_t)sfp; tf->fixreg[FIRSTARG] = sig; tf->fixreg[FIRSTARG+2] = (register_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. */ tf->fixreg[FIRSTARG+1] = (register_t)&sfp->sf_si; /* * Fill siginfo structure. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; sf.sf_si.si_addr = (void *) ((tf->exc == EXC_DSI) ? tf->cpu.booke.dear : tf->srr0); } else { /* Old FreeBSD-style arguments. */ tf->fixreg[FIRSTARG+1] = code; tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? tf->cpu.booke.dear : tf->srr0; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); tf->srr0 = (register_t)(PS_STRINGS - *(p->p_sysent->sv_szsigcode)); /* * copy the frame out to userland. */ if (copyout((caddr_t)&sf, (caddr_t)sfp, sizeof(sf)) != 0) { /* * Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); } CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->srr0, tf->fixreg[1]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } void bzero(void *buf, size_t len) { caddr_t p; p = buf; while (((vm_offset_t) p & (sizeof(u_long) - 1)) && len) { *p++ = 0; len--; } while (len >= sizeof(u_long) * 8) { *(u_long*) p = 0; *((u_long*) p + 1) = 0; *((u_long*) p + 2) = 0; *((u_long*) p + 3) = 0; len -= sizeof(u_long) * 8; *((u_long*) p + 4) = 0; *((u_long*) p + 5) = 0; *((u_long*) p + 6) = 0; *((u_long*) p + 7) = 0; p += sizeof(u_long) * 8; } while (len >= sizeof(u_long)) { *(u_long*) p = 0; len -= sizeof(u_long); p += sizeof(u_long); } while (len) { *p++ = 0; len--; } } /* * XXX what is the better/proper place for this routine? */ int mem_valid(vm_offset_t addr, int len) { return (1); } diff --git a/sys/sparc64/sparc64/machdep.c b/sys/sparc64/sparc64/machdep.c index 4eb8d9c4cb04..164a8b391fea 100644 --- a/sys/sparc64/sparc64/machdep.c +++ b/sys/sparc64/sparc64/machdep.c @@ -1,970 +1,980 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef int ofw_vec_t(void *); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif int dtlb_slots; int itlb_slots; struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; char sparc64_model[32]; static int cpu_use_vis = 1; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz / PAGE_SIZE; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", cnt.v_free_count * PAGE_SIZE, cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); cpu_identify(rdpr(ver), PCPU_GET(clock), curcpu); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = rdpr(pil); wrpr(pil, 0, PIL_TICK); td->td_md.md_saved_pil = pil; } td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) wrpr(pil, td->td_md.md_saved_pil, 0); } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { char type[8]; char *env; struct pcpu *pc; vm_offset_t end; caddr_t kmdp; phandle_t child; phandle_t root; uint32_t portid; end = 0; kmdp = NULL; /* * Find out what kind of CPU we have first, for anything that changes * behaviour. */ cpu_impl = VER_IMPL(rdpr(ver)); /* * Do CPU-specific Initialization. */ if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) cheetah_init(); /* * Clear (S)TICK timer (including NPT). */ tick_clear(); /* * UltraSparc II[e,i] based systems come up with the tick interrupt * enabled and a handler that resets the tick counter, causing DELAY() * to not work properly when used early in boot. * UltraSPARC III based systems come up with the system tick interrupt * enabled, causing an interrupt storm on startup since they are not * handled. */ tick_stop(); /* * Set up Open Firmware entry points. */ ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } init_param1(); /* * Initialize Open Firmware (needed for console). */ OF_install(OFW_STD_DIRECT, 0); OF_init(ofw_entry); /* * Prime our per-CPU data page for use. Note, we are using it for * our stack, so don't pass the real size (PAGE_SIZE) to pcpu_init * or it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_addr = (vm_offset_t)pcpu0; pc->pc_mid = UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG)); pc->pc_tlb_ctx = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Determine the OFW node and frequency of the BSP (and ensure the * BSP is in the device tree in the first place). */ pc->pc_node = 0; root = OF_peer(0); for (child = OF_child(root); child != 0; child = OF_peer(child)) { if (OF_getprop(child, "device_type", type, sizeof(type)) <= 0) continue; if (strcmp(type, "cpu") != 0) continue; if (OF_getprop(child, cpu_impl < CPU_IMPL_ULTRASPARCIII ? "upa-portid" : "portid", &portid, sizeof(portid)) <= 0) continue; if (portid == pc->pc_mid) { pc->pc_node = child; break; } } if (pc->pc_node == 0) OF_exit(); if (OF_getprop(child, "clock-frequency", &pc->pc_clock, sizeof(pc->pc_clock)) <= 0) OF_exit(); /* * Provide a DELAY() that works before PCPU_REG is set. We can't * set PCPU_REG without also taking over the trap table or the * firmware will overwrite it. Unfortunately, it's way to early * to also take over the trap table at this point. */ clock_boot = pc->pc_clock; delay_func = delay_boot; /* * Initialize the console before printing anything. * NB: the low-level console drivers require a working DELAY() at * this point. */ cninit(); /* * Panic if there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL) { printf("sparc64_init: no loader metadata.\n" "This probably means you are not using loader(8).\n"); panic("sparc64_init"); } /* * Sanity check the kernel end, which is important. */ if (end == 0) { printf("sparc64_init: warning, kernel end not specified.\n" "Attempting to continue anyway.\n"); end = (vm_offset_t)_end; } /* * Determine the TLB slot maxima, which are expected to be * equal across all CPUs. * NB: for Cheetah-class CPUs, these properties only refer * to the t16s. */ if (OF_getprop(pc->pc_node, "#dtlb-entries", &dtlb_slots, sizeof(dtlb_slots)) == -1) panic("sparc64_init: cannot determine number of dTLB slots"); if (OF_getprop(pc->pc_node, "#itlb-entries", &itlb_slots, sizeof(itlb_slots)) == -1) panic("sparc64_init: cannot determine number of iTLB slots"); cache_init(pc); cache_enable(); uma_set_align(pc->pc_cache.dc_linesize - 1); cpu_block_copy = bcopy; cpu_block_zero = bzero; getenv_int("machdep.use_vis", &cpu_use_vis); if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: case CPU_IMPL_ULTRASPARCIII: /* NB: we've disabled P$. */ case CPU_IMPL_ULTRASPARCIIIp: case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: case CPU_IMPL_ULTRASPARCIIIip: cpu_block_copy = spitfire_block_copy; cpu_block_zero = spitfire_block_zero; break; } } #ifdef SMP mp_init(); #endif /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(end); /* * Initialize tunables. */ init_param2(physmem); env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize the interrupt tables. */ intr_init1(); /* * Initialize proc0, set kstack0, frame0, curthread and curpcb. */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; thread0.td_kstack = kstack0; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; /* * Initialize global registers. */ cpu_setregs(pc); /* * Take over the trap table via the PROM. Using the PROM for this * is necessary in order to set obp-control-relinquished to true * within the PROM so obtaining /virtual-memory/translations doesn't * trigger a fatal reset error or worse things further down the road. * XXX it should be possible to use this soley instead of writing * %tba in cpu_setregs(). Doing so causes a hang however. */ sun4u_set_traptable(tl0_base); /* * It's now safe to use the real DELAY(). */ delay_func = delay_tick; /* * Initialize the message buffer (after setting trap table). */ msgbufinit(msgbufp, MSGBUF_SIZE); mutex_init(); intr_init2(); /* * Finish pmap initialization now that we're ready for mutexes. */ PMAP_LOCK_INIT(kernel_pmap); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; u_long sp; int oonstack; int sig; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal trampoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->mc_tpc, mc->mc_sp, mc->mc_tstate); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; bcopy(tf, mc, sizeof(*tf)); if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } mc->mc_flags = _MC_VERSION; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); tf->tf_fprs &= ~FPRS_FEF; pcb->pcb_flags |= PCB_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; uint64_t wstate; if (!TSTATE_SECURE(mc->mc_tstate) || (mc->mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); wstate = tf->tf_wstate; bcopy(mc, tf, sizeof(*tf)); tf->tf_wstate = wstate; if ((mc->mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif ofw_exit(args); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { struct pcpu *pc; pc = pcpu_find(cpu_id); if (pc == NULL || rate == NULL) return (EINVAL); *rate = pc->pc_clock; return (0); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware. */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(int busy) { /* Insert code to halt (until next interrupt) for the idle loop. */ } int cpu_idle_wakeup(int cpu) { return (0); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct pcb *pcb; struct proc *p; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = entry + 4; tf->tf_tpc = entry; tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } diff --git a/sys/sun4v/sun4v/machdep.c b/sys/sun4v/sun4v/machdep.c index b7c0869c36c5..79750408c01f 100644 --- a/sys/sun4v/sun4v/machdep.c +++ b/sys/sun4v/sun4v/machdep.c @@ -1,1007 +1,1017 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_msgbuf.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* XXX move this to a header */ extern void mdesc_init(void); typedef int ofw_vec_t(void *); #ifdef DDB extern vm_offset_t ksym_start, ksym_end; #endif struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; int trap_conversion[256]; vm_paddr_t mmu_fault_status_area; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; /* * Note: timer quality for CPU's is set low to try and prevent them from * being chosen as the primary timecounter. The CPU counters are not * synchronized among the CPU's so in MP machines this causes problems * when calculating the time. With this value the CPU's should only be * chosen as the primary timecounter as a last resort. */ #define UP_TICK_QUALITY 1000 #ifdef SUN4V #define MP_TICK_QUALITY 1000 #else #define MP_TICK_QUALITY -100 #endif static struct timecounter tick_tc; char sparc64_model[32]; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; static timecounter_get_t tick_get_timecount; void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); CTASSERT((sizeof(struct pcpu) & ((1<<6)-1)) == 0); #define BVPRINTF(x) \ if (bootverbose) \ printf(x); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; tick_tc.tc_get_timecount = tick_get_timecount; tick_tc.tc_poll_pps = NULL; tick_tc.tc_counter_mask = ~0u; tick_tc.tc_frequency = tick_freq; tick_tc.tc_name = "tick"; tick_tc.tc_quality = UP_TICK_QUALITY; #ifdef SMP /* * We do not know if each CPU's tick counter is synchronized. */ if (cpu_mp_probe()) tick_tc.tc_quality = MP_TICK_QUALITY; #endif tc_init(&tick_tc); physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", cnt.v_free_count * PAGE_SIZE, cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); #ifdef notyet cpu_identify(rdpr(ver), tick_freq, PCPU_GET(cpuid)); #endif } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = intr_disable(); td->td_md.md_saved_pil = pil; } td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; td = curthread; critical_exit(); td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) { intr_restore(td->td_md.md_saved_pil); } } unsigned tick_get_timecount(struct timecounter *tc) { return ((unsigned)rd(tick)); } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { phandle_t child; phandle_t root; struct pcpu *pc; vm_offset_t end; caddr_t kmdp; u_int clock; char *env; char type[8]; vm_paddr_t mmfsa; int i; end = 0; kmdp = NULL; /* * XXX */ bootverbose = 1; /* * Set up Open Firmware entry points */ ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); kern_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } if (boothowto & RB_VERBOSE) bootverbose = 1; init_param1(); /* * Initialize Open Firmware (needed for console). */ OF_install(OFW_STD_DIRECT, 0); OF_init(ofw_entry); root = OF_peer(0); for (child = OF_child(root); child != 0; child = OF_peer(child)) { OF_getprop(child, "device_type", type, sizeof(type)); if (strcmp(type, "cpu") == 0) break; } OF_getprop(child, "clock-frequency", &clock, sizeof(clock)); /* * Initialize the console before printing anything. * console uses the pcpu area for serialization */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; cpu_setregs(pc); /* * Initialize proc0 stuff (p_contested needs to be done early). */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; if ((u_long)thread0.td_frame & 0x3f) { panic("unaligned frame0"); } /* * Prime our per-cpu data page for use. Note, we are using it for our * stack, so don't pass the real size (PAGE_SIZE) to pcpu_init or * it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_curthread = &thread0; pc->pc_addr = (vm_offset_t)pcpu0; cninit(); tick_init(clock); printf("cpu0: UltraSparc T1 Processor (%d.%02d MHz CPU)\n", (clock + 4999) / 1000000, ((clock + 4999) / 10000) % 100); /* * Panic is there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL) { printf("sparc64_init: no loader metadata.\n" "This probably means you are not using loader(8).\n"); panic("sparc64_init"); } /* * Sanity check the kernel end, which is important. */ if (end == 0) { printf("sparc64_init: warning, kernel end not specified.\n" "Attempting to continue anyway.\n"); end = (vm_offset_t)_end; } cpu_block_copy = bcopy; cpu_block_zero = bzero; #ifdef SMP mp_tramp = mp_tramp_alloc(); #endif env = getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize global registers. * needed for curthread to work */ cpu_setregs(pc); /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(end); thread0.td_kstack = kstack0; thread0.td_md.md_saved_pil = 0; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; thread0.td_pcb->pcb_kstack = (uint64_t)(((char *)thread0.td_pcb) - (CCFSZ + SPOFF)); thread0.td_pcb = (struct pcb *)TLB_PHYS_TO_DIRECT(vtophys((vm_offset_t)thread0.td_pcb)); pc->pc_curpcb = thread0.td_pcb; if (((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f) != 0) { printf("unaligned stack pcb_kstack & 0x3f == 0x%lx\n", ((thread0.td_pcb->pcb_kstack + SPOFF) & 0x3f)); } /* * Update PCPU_REG to point to direct address * to support easy phys <-> virt translation in trap handler */ pc = (struct pcpu *)TLB_PHYS_TO_DIRECT(vtophys(pc)); BVPRINTF("initializing cpu regs\n"); cpu_setregs(pc); /* * Initialize tunables. */ BVPRINTF("initialize tunables\n"); init_param2(physmem); /* * setup trap table and fault status area */ BVPRINTF("initialize trap tables\n"); mmfsa = mmu_fault_status_area + MMFSA_SIZE; BVPRINTF("setwstate\n"); set_wstate(WSTATE_KERN); BVPRINTF("set_mmfsa_scratchpad\n"); set_mmfsa_scratchpad(mmfsa); BVPRINTF("init_mondo_queue\n"); init_mondo_queue(); BVPRINTF("set_mmfsa_traptable\n"); set_mmfsa_traptable(&tl0_base, mmfsa); BVPRINTF("trap conversion\n"); for (i = 0; i < 256; i++) trap_conversion[i] = 0; trap_conversion[TT_INSTRUCTION_EXCEPTION] = T_INSTRUCTION_EXCEPTION; trap_conversion[TT_INSTRUCTION_MISS] = T_INSTRUCTION_MISS; trap_conversion[TT_ILLEGAL_INSTRUCTION] = T_ILLEGAL_INSTRUCTION; trap_conversion[TT_PRIVILEGED_OPCODE] = T_PRIVILEGED_OPCODE; trap_conversion[TT_FP_EXCEPTION_IEEE_754] = T_FP_EXCEPTION_IEEE_754; trap_conversion[TT_TAG_OVERFLOW] = T_TAG_OVERFLOW; trap_conversion[TT_DIVISION_BY_ZERO] = T_DIVISION_BY_ZERO; trap_conversion[TT_DATA_EXCEPTION] = T_DATA_EXCEPTION; trap_conversion[TT_DATA_MISS] = T_DATA_MISS; trap_conversion[TT_ALIGNMENT] = T_ALIGNMENT; trap_conversion[TT_DATA_PROTECTION] = T_DATA_PROTECTION; /* * Initialize the message buffer (after setting trap table). */ BVPRINTF("initialize msgbuf\n"); msgbufinit(msgbufp, MSGBUF_SIZE); BVPRINTF("initialize mutexes\n"); mutex_init(); BVPRINTF("initialize machine descriptor table\n"); mdesc_init(); BVPRINTF("initialize get model name\n"); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); BVPRINTF("initialize kdb\n"); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif BVPRINTF("sparc64_init done\n"); } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; int oonstack; u_long sp; int sig; int code; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal tramoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { sfp = (struct sigframe *)(td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Translate the signal if appropriate. */ if (p->p_sysent->sv_sigtbl && sig <= p->p_sysent->sv_sigsize) sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; /* Fill siginfo structure. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_addr = (void *)tf->tf_tpc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); PROC_LOCK(p); td->td_sigmask = uc.uc_sigmask; SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->mc_tpc, mc->mc_sp, mc->mc_tstate); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; bcopy(tf, mc, sizeof(*tf)); if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } mc->mc_flags = _MC_VERSION; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); pcb->pcb_flags |= PCB_FEF; tf->tf_fprs &= ~FPRS_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, const mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; uint64_t wstate; if (!TSTATE_SECURE(mc->mc_tstate) || (mc->mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); wstate = tf->tf_wstate; bcopy(mc, tf, sizeof(*tf)); tf->tf_wstate = wstate; if ((mc->mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif hv_mach_exit(0); } +/* + * Flush the D-cache for non-DMA I/O so that the I-cache can + * be made coherent later. + */ +void +cpu_flush_dcache(void *ptr, size_t len) +{ + /* TBD */ +} + /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(int busy) { if (rdpr(pil) != 0) panic("pil in cpu_idle not 0 - %ld", rdpr(pil)); if (rdpr(pstate) != 0x16) panic("interrupts disabled in cpu_idle 0x%lx", rdpr(pstate)); /* XXX heinous hack begin*/ cpu_yield(); } int cpu_idle_wakeup(int cpu) { return (0); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, u_long entry, u_long stack, u_long ps_strings) { struct trapframe *tf; struct pcb *pcb; struct proc *p; uint64_t kstack; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; kstack = pcb->pcb_kstack; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); pcb->pcb_kstack = kstack; tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = entry + 4; tf->tf_tpc = entry; tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs = ~FPRS_FEF; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } void cpu_yield(void) { if (rdpr(pil) < PIL_TICK) hv_cpu_yield(); } diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 31fb750a206a..c20536a01a3c 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -1,398 +1,399 @@ /*- * Copyright (c) 1982, 1988, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)systm.h 8.7 (Berkeley) 3/29/95 * $FreeBSD$ */ #ifndef _SYS_SYSTM_H_ #define _SYS_SYSTM_H_ #include #include #include #include #include #include /* for people using printf mainly */ extern int securelevel; /* system security level (see init(8)) */ extern int cold; /* nonzero if we are doing a cold boot */ extern int rebooting; /* boot() has been called. */ extern const char *panicstr; /* panic message */ extern char version[]; /* system version */ extern char copyright[]; /* system copyright */ extern int kstack_pages; /* number of kernel stack pages */ extern int nswap; /* size of swap space */ extern long physmem; /* physical memory */ extern long realmem; /* 'real' memory */ extern char *rootdevnames[2]; /* names of possible root devices */ extern int boothowto; /* reboot flags, from console subsystem */ extern int bootverbose; /* nonzero to print verbose messages */ extern int maxusers; /* system tune hint */ #ifdef INVARIANTS /* The option is always available */ #define KASSERT(exp,msg) do { \ if (__predict_false(!(exp))) \ panic msg; \ } while (0) #define VNASSERT(exp, vp, msg) do { \ if (__predict_false(!(exp))) { \ vn_printf(vp, "VNASSERT failed\n"); \ panic msg; \ } \ } while (0) #else #define KASSERT(exp,msg) do { \ } while (0) #define VNASSERT(exp, vp, msg) do { \ } while (0) #endif #ifndef CTASSERT /* Allow lint to override */ #define CTASSERT(x) _CTASSERT(x, __LINE__) #define _CTASSERT(x, y) __CTASSERT(x, y) #define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1] #endif /* * XXX the hints declarations are even more misplaced than most declarations * in this file, since they are needed in one file (per arch) and only used * in two files. * XXX most of these variables should be const. */ extern int osreldate; extern int envmode; extern int hintmode; /* 0 = off. 1 = config, 2 = fallback */ extern int dynamic_kenv; extern struct mtx kenv_lock; extern char *kern_envp; extern char static_env[]; extern char static_hints[]; /* by config for now */ extern char **kenvp; /* * General function declarations. */ struct inpcb; struct lock_object; struct malloc_type; struct mtx; struct proc; struct socket; struct thread; struct tty; struct ucred; struct uio; struct _jmp_buf; int setjmp(struct _jmp_buf *); void longjmp(struct _jmp_buf *, int) __dead2; int dumpstatus(vm_offset_t addr, off_t count); int nullop(void); int eopnotsupp(void); int ureadc(int, struct uio *); void hashdestroy(void *, struct malloc_type *, u_long); void *hashinit(int count, struct malloc_type *type, u_long *hashmark); void *hashinit_flags(int count, struct malloc_type *type, u_long *hashmask, int flags); #define HASH_NOWAIT 0x00000001 #define HASH_WAITOK 0x00000002 void *phashinit(int count, struct malloc_type *type, u_long *nentries); void g_waitidle(void); #ifdef RESTARTABLE_PANICS void panic(const char *, ...) __printflike(1, 2); #else void panic(const char *, ...) __dead2 __printflike(1, 2); #endif void cpu_boot(int); +void cpu_flush_dcache(void *, size_t); void cpu_rootconf(void); void critical_enter(void); void critical_exit(void); void init_param1(void); void init_param2(long physpages); void init_param3(long kmempages); void tablefull(const char *); int kvprintf(char const *, void (*)(int, void*), void *, int, __va_list) __printflike(1, 0); void log(int, const char *, ...) __printflike(2, 3); void log_console(struct uio *); int printf(const char *, ...) __printflike(1, 2); int snprintf(char *, size_t, const char *, ...) __printflike(3, 4); int sprintf(char *buf, const char *, ...) __printflike(2, 3); int uprintf(const char *, ...) __printflike(1, 2); int vprintf(const char *, __va_list) __printflike(1, 0); int vsnprintf(char *, size_t, const char *, __va_list) __printflike(3, 0); int vsnrprintf(char *, size_t, int, const char *, __va_list) __printflike(4, 0); int vsprintf(char *buf, const char *, __va_list) __printflike(2, 0); int ttyprintf(struct tty *, const char *, ...) __printflike(2, 3); int sscanf(const char *, char const *, ...) __nonnull(1) __nonnull(2); int vsscanf(const char *, char const *, __va_list) __nonnull(1) __nonnull(2); long strtol(const char *, char **, int) __nonnull(1); u_long strtoul(const char *, char **, int) __nonnull(1); quad_t strtoq(const char *, char **, int) __nonnull(1); u_quad_t strtouq(const char *, char **, int) __nonnull(1); void tprintf(struct proc *p, int pri, const char *, ...) __printflike(3, 4); void hexdump(const void *ptr, int length, const char *hdr, int flags); #define HD_COLUMN_MASK 0xff #define HD_DELIM_MASK 0xff00 #define HD_OMIT_COUNT (1 << 16) #define HD_OMIT_HEX (1 << 17) #define HD_OMIT_CHARS (1 << 18) #define ovbcopy(f, t, l) bcopy((f), (t), (l)) void bcopy(const void *from, void *to, size_t len) __nonnull(1) __nonnull(2); void bzero(void *buf, size_t len) __nonnull(1); void *memcpy(void *to, const void *from, size_t len) __nonnull(1) __nonnull(2); void *memmove(void *dest, const void *src, size_t n) __nonnull(1) __nonnull(2); int copystr(const void * __restrict kfaddr, void * __restrict kdaddr, size_t len, size_t * __restrict lencopied) __nonnull(1) __nonnull(2); int copyinstr(const void * __restrict udaddr, void * __restrict kaddr, size_t len, size_t * __restrict lencopied) __nonnull(1) __nonnull(2); int copyin(const void * __restrict udaddr, void * __restrict kaddr, size_t len) __nonnull(1) __nonnull(2); int copyout(const void * __restrict kaddr, void * __restrict udaddr, size_t len) __nonnull(1) __nonnull(2); int fubyte(const void *base); long fuword(const void *base); int fuword16(void *base); int32_t fuword32(const void *base); int64_t fuword64(const void *base); int subyte(void *base, int byte); int suword(void *base, long word); int suword16(void *base, int word); int suword32(void *base, int32_t word); int suword64(void *base, int64_t word); uint32_t casuword32(volatile uint32_t *base, uint32_t oldval, uint32_t newval); u_long casuword(volatile u_long *p, u_long oldval, u_long newval); void realitexpire(void *); int sysbeep(int hertz, int period); void hardclock(int usermode, uintfptr_t pc); void hardclock_cpu(int usermode); void softclock(void *); void statclock(int usermode); void profclock(int usermode, uintfptr_t pc); void startprofclock(struct proc *); void stopprofclock(struct proc *); void cpu_startprofclock(void); void cpu_stopprofclock(void); int cr_cansee(struct ucred *u1, struct ucred *u2); int cr_canseesocket(struct ucred *cred, struct socket *so); int cr_canseeinpcb(struct ucred *cred, struct inpcb *inp); char *getenv(const char *name); void freeenv(char *env); int getenv_int(const char *name, int *data); int getenv_uint(const char *name, unsigned int *data); int getenv_long(const char *name, long *data); int getenv_ulong(const char *name, unsigned long *data); int getenv_string(const char *name, char *data, int size); int getenv_quad(const char *name, quad_t *data); int setenv(const char *name, const char *value); int unsetenv(const char *name); int testenv(const char *name); typedef uint64_t (cpu_tick_f)(void); void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var); extern cpu_tick_f *cpu_ticks; uint64_t cpu_tickrate(void); uint64_t cputick2usec(uint64_t tick); #ifdef APM_FIXUP_CALLTODO struct timeval; void adjust_timeout_calltodo(struct timeval *time_change); #endif /* APM_FIXUP_CALLTODO */ #include /* Initialize the world */ void consinit(void); void cpu_initclocks(void); void usrinfoinit(void); /* Finalize the world */ void shutdown_nice(int); /* Timeouts */ typedef void timeout_t(void *); /* timeout function type */ #define CALLOUT_HANDLE_INITIALIZER(handle) \ { NULL } void callout_handle_init(struct callout_handle *); struct callout_handle timeout(timeout_t *, void *, int); void untimeout(timeout_t *, void *, struct callout_handle); caddr_t kern_timeout_callwheel_alloc(caddr_t v); void kern_timeout_callwheel_init(void); /* Stubs for obsolete functions that used to be for interrupt management */ static __inline void spl0(void) { return; } static __inline intrmask_t splbio(void) { return 0; } static __inline intrmask_t splcam(void) { return 0; } static __inline intrmask_t splclock(void) { return 0; } static __inline intrmask_t splhigh(void) { return 0; } static __inline intrmask_t splimp(void) { return 0; } static __inline intrmask_t splnet(void) { return 0; } static __inline intrmask_t splsoftcam(void) { return 0; } static __inline intrmask_t splsoftclock(void) { return 0; } static __inline intrmask_t splsofttty(void) { return 0; } static __inline intrmask_t splsoftvm(void) { return 0; } static __inline intrmask_t splsofttq(void) { return 0; } static __inline intrmask_t splstatclock(void) { return 0; } static __inline intrmask_t spltty(void) { return 0; } static __inline intrmask_t splvm(void) { return 0; } static __inline void splx(intrmask_t ipl __unused) { return; } /* * Common `proc' functions are declared here so that proc.h can be included * less often. */ int _sleep(void *chan, struct lock_object *lock, int pri, const char *wmesg, int timo) __nonnull(1); #define msleep(chan, mtx, pri, wmesg, timo) \ _sleep((chan), &(mtx)->lock_object, (pri), (wmesg), (timo)) int msleep_spin(void *chan, struct mtx *mtx, const char *wmesg, int timo) __nonnull(1); int pause(const char *wmesg, int timo); #define tsleep(chan, pri, wmesg, timo) \ _sleep((chan), NULL, (pri), (wmesg), (timo)) void wakeup(void *chan) __nonnull(1); void wakeup_one(void *chan) __nonnull(1); /* * Common `struct cdev *' stuff are declared here to avoid #include poisoning */ struct cdev; dev_t dev2udev(struct cdev *x); const char *devtoname(struct cdev *cdev); int poll_no_poll(int events); /* XXX: Should be void nanodelay(u_int nsec); */ void DELAY(int usec); /* Root mount holdback API */ struct root_hold_token; struct root_hold_token *root_mount_hold(const char *identifier); void root_mount_rel(struct root_hold_token *h); void root_mount_wait(void); int root_mounted(void); /* * Unit number allocation API. (kern/subr_unit.c) */ struct unrhdr; struct unrhdr *new_unrhdr(int low, int high, struct mtx *mutex); void delete_unrhdr(struct unrhdr *uh); void clean_unrhdr(struct unrhdr *uh); void clean_unrhdrl(struct unrhdr *uh); int alloc_unr(struct unrhdr *uh); int alloc_unrl(struct unrhdr *uh); void free_unr(struct unrhdr *uh, u_int item); /* * This is about as magic as it gets. fortune(1) has got similar code * for reversing bits in a word. Who thinks up this stuff?? * * Yes, it does appear to be consistently faster than: * while (i = ffs(m)) { * m >>= i; * bits++; * } * and * while (lsb = (m & -m)) { // This is magic too * m &= ~lsb; // or: m ^= lsb * bits++; * } * Both of these latter forms do some very strange things on gcc-3.1 with * -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2. * There is probably an SSE or MMX popcnt instruction. * * I wonder if this should be in libkern? * * XXX Stop the presses! Another one: * static __inline u_int32_t * popcnt1(u_int32_t v) * { * v -= ((v >> 1) & 0x55555555); * v = (v & 0x33333333) + ((v >> 2) & 0x33333333); * v = (v + (v >> 4)) & 0x0F0F0F0F; * return (v * 0x01010101) >> 24; * } * The downside is that it has a multiply. With a pentium3 with * -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use * an imull, and in that case it is faster. In most other cases * it appears slightly slower. * * Another variant (also from fortune): * #define BITCOUNT(x) (((BX_(x)+(BX_(x)>>4)) & 0x0F0F0F0F) % 255) * #define BX_(x) ((x) - (((x)>>1)&0x77777777) \ * - (((x)>>2)&0x33333333) \ * - (((x)>>3)&0x11111111)) */ static __inline uint32_t bitcount32(uint32_t x) { x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1); x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2); x = (x + (x >> 4)) & 0x0f0f0f0f; x = (x + (x >> 8)); x = (x + (x >> 16)) & 0x000000ff; return (x); } #endif /* !_SYS_SYSTM_H_ */