Index: head/sys/amd64/amd64/machdep.c =================================================================== --- head/sys/amd64/amd64/machdep.c (revision 294929) +++ head/sys/amd64/amd64/machdep.c (revision 294930) @@ -1,2474 +1,2474 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_sched.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #include #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_ATPIC #include #else #include #endif #include #include #include /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern u_int64_t hammer_time(u_int64_t, u_int64_t); #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void cpu_startup(void *); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Preload data parse function */ static caddr_t native_parse_preload_data(u_int64_t); /* Native function to fetch and parse the e820 map */ static void native_parse_memmap(caddr_t, vm_paddr_t *, int *); /* Default init_ops implementation. */ struct init_ops init_ops = { .parse_preload_data = native_parse_preload_data, .early_clock_source_init = i8254_init, .early_delay = i8254_delay, .parse_memmap = native_parse_memmap, #ifdef SMP .mp_bootaddress = mp_bootaddress, .start_all_aps = native_start_all_aps, #endif .msi_init = msi_init, }; /* * The file "conf/ldscript.amd64" defines the symbol "kernphys". Its value is * the physical address at which the kernel is loaded. */ extern char kernphys[]; struct msgbuf *msgbufp; /* * Physical address of the EFI System Table. Stashed from the metadata hints * passed into the kernel and used by the EFI code to call runtime services. */ vm_paddr_t efi_systbl; /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel, _ucode32sel, _ufssel, _ugssel; int cold = 1; long Maxmem = 0; long realmem = 0; /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct region_descriptor r_gdt, r_idt; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; struct mtx dt_lock; /* lock for GDT and LDT */ void (*vmm_resume_p)(void); static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct pcb *pcb; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; pcb = td->td_pcb; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; bcopy(regs, &sf.sf_uc.uc_mcontext.mc_rdi, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = pcb->pcb_gsbase; bzero(sf.sf_uc.uc_mcontext.mc_spare, sizeof(sf.sf_uc.uc_mcontext.mc_spare)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; + sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_rsp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned long)sp & ~0x3Ful); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned long)sp & ~0xFul); /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rdx = (register_t)&sfp->sf_uc; /* arg 3 in %rdx */ bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ } else { /* Old FreeBSD-style arguments. */ regs->tf_rsi = ksi->ksi_code; /* arg 2 in %rsi */ regs->tf_rcx = (register_t)ksi->ksi_addr; /* arg 4 in %rcx */ sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct pcb *pcb; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; long rflags; int cs, error, ret; ksiginfo_t ksi; pcb = td->td_pcb; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) { uprintf("pid %d (%s): sigreturn copyin failed\n", p->p_pid, td->td_name); return (error); } ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; rflags = ucp->uc_mcontext.mc_rflags; /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(rflags, regs->tf_rflags)) { uprintf("pid %d (%s): sigreturn rflags = 0x%lx\n", p->p_pid, td->td_name, rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", p->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) { uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", p->p_pid, td->td_name, ret); return (ret); } bcopy(&ucp->uc_mcontext.mc_rdi, regs, sizeof(*regs)); pcb->pcb_fsbase = ucp->uc_mcontext.mc_fsbase; pcb->pcb_gsbase = ucp->uc_mcontext.mc_gsbase; #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); set_pcb_flags(pcb, PCB_FULL_IRET); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __INITIAL_FPUCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = ((stack - 8) & ~0xFul) + 8; regs->tf_rdi = stack; /* argv */ regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; td->td_retval[1] = 0; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } void cpu_setregs(void) { register_t cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are also set by npx_probe() for the * BSP. See the comments there about why we set them. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); } /* * Initialize amd64 and configure to run kernel */ /* * Initialize segments & interrupt table */ struct user_segment_descriptor gdt[NGDT * MAXCPU];/* global descriptor tables */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ static char dblfault_stack[PAGE_SIZE] __aligned(16); static char nmi0_stack[PAGE_SIZE] __aligned(16); CTASSERT(sizeof(struct nmi_pcpu) == 16); struct amd64tss common_tss[MAXCPU]; /* * Software prototypes -- in more palatable form. * * Keep GUFS32, GUGS32, GUCODE32 and GUDATA at the same * slots as corresponding segments for i386 kernel. */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GNULL2_SEL 1 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUFS32_SEL 2 32 bit %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS32_SEL 3 32 bit %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GUCODE32_SEL 6 32 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 32/64 bit Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 8 64 bit Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_long = 1, .ssd_def32 = 0, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE - 1, .ssd_type = SDT_SYSTSS, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Actually, the TSS is a system descriptor which is double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 LDT Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 12 LDT Descriptor, double size */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_long = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; void setidt(int idx, inthand_t *func, int typ, int dpl, int ist) { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (uintptr_t)func; ip->gd_selector = GSEL(GCODE_SEL, SEL_KPL); ip->gd_ist = ist; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((uintptr_t)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), IDTVEC(dblfault), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(fast_syscall), IDTVEC(fast_syscall32); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = ((long)ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { struct { uint16_t limit; uint64_t base; } __packed idtr, gdtr; uint16_t ldt, tr; __asm __volatile("sidt %0" : "=m" (idtr)); db_printf("idtr\t0x%016lx/%04x\n", (u_long)idtr.base, (u_int)idtr.limit); __asm __volatile("sgdt %0" : "=m" (gdtr)); db_printf("gdtr\t0x%016lx/%04x\n", (u_long)gdtr.base, (u_int)gdtr.limit); __asm __volatile("sldt %0" : "=r" (ldt)); db_printf("ldtr\t0x%04x\n", ldt); __asm __volatile("str %0" : "=r" (tr)); db_printf("tr\t0x%04x\n", tr); db_printf("cr0\t0x%016lx\n", rcr0()); db_printf("cr2\t0x%016lx\n", rcr2()); db_printf("cr3\t0x%016lx\n", rcr3()); db_printf("cr4\t0x%016lx\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016lx\n", rxcr(0)); db_printf("EFER\t0x%016lx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t%016lx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); db_printf("DEBUG_CTL\t0x%016lx\n", rdmsr(MSR_DEBUGCTLMSR)); db_printf("PAT\t0x%016lx\n", rdmsr(MSR_PAT)); db_printf("GSBASE\t0x%016lx\n", rdmsr(MSR_GSBASE)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%016lx\n", rdr0()); db_printf("dr1\t0x%016lx\n", rdr1()); db_printf("dr2\t0x%016lx\n", rdr2()); db_printf("dr3\t0x%016lx\n", rdr3()); db_printf("dr6\t0x%016lx\n", rdr6()); db_printf("dr7\t0x%016lx\n", rdr7()); } #endif void sdtossd(sd, ssd) struct user_segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_long = sd->sd_long; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } void ssdtosd(ssd, sd) struct soft_segment_descriptor *ssd; struct user_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xff; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_long = ssd->ssd_long; sd->sd_def32 = ssd->ssd_def32; sd->sd_gran = ssd->ssd_gran; } void ssdtosyssd(ssd, sd) struct soft_segment_descriptor *ssd; struct system_segment_descriptor *sd; { sd->sd_lobase = (ssd->ssd_base) & 0xffffff; sd->sd_hibase = (ssd->ssd_base >> 24) & 0xfffffffffful; sd->sd_lolimit = (ssd->ssd_limit) & 0xffff; sd->sd_hilimit = (ssd->ssd_limit >> 16) & 0xf; sd->sd_type = ssd->ssd_type; sd->sd_dpl = ssd->ssd_dpl; sd->sd_p = ssd->ssd_p; sd->sd_gran = ssd->ssd_gran; } #if !defined(DEV_ATPIC) && defined(DEV_ISA) #include #include /* * Return a bitmap of the current interrupt requests. This is 8259-specific * and is only suitable for use at probe time. * This is only here to pacify sio. It is NOT FATAL if this doesn't work. * It shouldn't be here. There should probably be an APIC centric * implementation in the apic driver code, if at all. */ intrmask_t isa_irq_pending(void) { u_char irr1; u_char irr2; irr1 = inb(IO_ICU1); irr2 = inb(IO_ICU2); return ((irr2 << 8) | irr1); } #endif u_int basemem; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. * * NB: physmap_idx points to the next free slot. */ insert_idx = physmap_idx; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = (physmap_idx - 2); i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } void bios_add_smap_entries(struct bios_smap *smapbase, u_int32_t smapsize, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap, *smapend; smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016lx len=%016lx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) continue; if (!add_physmap_entry(smap->base, smap->length, physmap, physmap_idx)) break; } } #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, int *physmap_idx) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idx)) break; } } static char bootmethod[16] = ""; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); static void native_parse_memmap(caddr_t kmdp, vm_paddr_t *physmap, int *physmap_idx) { struct bios_smap *smap; struct efi_map_header *efihdr; u_int32_t size; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes smap. */ efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); smap = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (efihdr == NULL && smap == NULL) panic("No BIOS smap or EFI map info from loader!"); if (efihdr != NULL) { add_efi_map_entries(efihdr, physmap, physmap_idx); strlcpy(bootmethod, "UEFI", sizeof(bootmethod)); } else { size = *((u_int32_t *)smap - 1); bios_add_smap_entries(smap, size, physmap, physmap_idx); strlcpy(bootmethod, "BIOS", sizeof(bootmethod)); } } #define PAGES_PER_GB (1024 * 1024 * 1024 / PAGE_SIZE) /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ static void getmemsize(caddr_t kmdp, u_int64_t first) { int i, physmap_idx, pa_indx, da_indx; vm_paddr_t pa, physmap[PHYSMAP_SIZE]; u_long physmem_start, physmem_tunable, memtest; pt_entry_t *pte; quad_t dcons_addr, dcons_size; int page_counter; bzero(physmap, sizeof(physmap)); physmap_idx = 0; init_ops.parse_memmap(kmdp, physmap, &physmap_idx); physmap_idx -= 2; /* * Find the 'base memory' segment for SMP */ basemem = 0; for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] <= 0xA0000) { basemem = physmap[i + 1] / 1024; break; } } if (basemem == 0 || basemem > 640) { if (bootverbose) printf( "Memory map doesn't contain a basemem segment, faking it"); basemem = 640; } /* * Make hole for "AP -> long mode" bootstrap code. The * mp_bootaddress vector is only available when the kernel * is configured to support APs and APs for the system start * in 32bit mode (e.g. SMP bare metal). */ if (init_ops.mp_bootaddress) { if (physmap[1] >= 0x100000000) panic( "Basemem segment is not suitable for AP bootstrap code!"); physmap[1] = init_ops.mp_bootaddress(physmap[1] / 1024); } /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * The boot memory test is disabled by default, as it takes a * significant amount of time on large-memory systems, and is * unfriendly to virtual machines as it unnecessarily touches all * pages. * * A general name is used as the code may be extended to support * additional tests beyond the current "page present" test. */ memtest = 0; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); /* * Don't allow MAXMEM or hw.physmem to extend the amount of memory * in the system. */ if (Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(&first); /* * Size up each available chunk of physical memory. * * XXX Some BIOSes corrupt low 64KB between suspend and resume. * By default, mask off the first 16 pages unless we appear to be * running in a VM. */ physmem_start = (vm_guest > VM_GUEST_NO ? 1 : 16) << PAGE_SHIFT; TUNABLE_ULONG_FETCH("hw.physmem.start", &physmem_start); if (physmap[0] < physmem_start) { if (physmem_start < PAGE_SIZE) physmap[0] = PAGE_SIZE; else if (physmem_start >= physmap[1]) physmap[0] = round_page(physmap[1] - PAGE_SIZE); else physmap[0] = round_page(physmem_start); } pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP1; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ page_counter = 0; if (memtest != 0) printf("Testing system memory"); for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR1; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= (vm_paddr_t)kernphys && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * Print a "." every GB to show we're making * progress. */ page_counter++; if ((page_counter % PAGES_PER_GB) == 0) printf("."); /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_NC_PWT | PG_NC_PCD; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); if (memtest != 0) printf("\n"); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ msgbufp = (struct msgbuf *)PHYS_TO_DMAP(phys_avail[pa_indx]); } static caddr_t native_parse_preload_data(u_int64_t modulep) { caddr_t kmdp; char *envp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif preload_metadata = (caddr_t)(uintptr_t)(modulep + KERNBASE); preload_bootstrap_relocate(KERNBASE); kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); if (envp != NULL) envp += KERNBASE; init_static_kenv(envp, 0); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif efi_systbl = MD_FETCH(kmdp, MODINFOMD_FW_HANDLE, vm_paddr_t); return (kmdp); } u_int64_t hammer_time(u_int64_t modulep, u_int64_t physfree) { caddr_t kmdp; int gsel_tss, x; struct pcpu *pc; struct nmi_pcpu *np; struct xstate_hdr *xhdr; u_int64_t msr; char *env; size_t kstack0_sz; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); kmdp = init_ops.parse_preload_data(modulep); /* Init basic tunables, hz etc */ init_param1(); thread0.td_kstack = physfree + KERNBASE; thread0.td_kstack_pages = kstack_pages; kstack0_sz = thread0.td_kstack_pages * PAGE_SIZE; bzero((void *)thread0.td_kstack, kstack0_sz); physfree += kstack0_sz; /* * make gdt memory segments */ for (x = 0; x < NGDT; x++) { if (x != GPROC0_SEL && x != (GPROC0_SEL + 1) && x != GUSERLDT_SEL && x != (GUSERLDT_SEL) + 1) ssdtosd(&gdt_segs[x], &gdt[x]); } gdt_segs[GPROC0_SEL].ssd_base = (uintptr_t)&common_tss[0]; ssdtosyssd(&gdt_segs[GPROC0_SEL], (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (long) gdt; lgdt(&r_gdt); pc = &__pcpu[0]; wrmsr(MSR_FSBASE, 0); /* User value */ wrmsr(MSR_GSBASE, (u_int64_t)pc); wrmsr(MSR_KGSBASE, 0); /* User value while in the kernel */ pcpu_init(pc, 0, sizeof(struct pcpu)); dpcpu_init((void *)(physfree + KERNBASE), 0); physfree += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); PCPU_SET(tssp, &common_tss[0]); PCPU_SET(commontssp, &common_tss[0]); PCPU_SET(tss, (struct system_segment_descriptor *)&gdt[GPROC0_SEL]); PCPU_SET(ldt, (struct system_segment_descriptor *)&gdt[GUSERLDT_SEL]); PCPU_SET(fs32p, &gdt[GUFS32_SEL]); PCPU_SET(gs32p, &gdt[GUGS32_SEL]); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS); mtx_init(&dt_lock, "descriptor tables", NULL, MTX_DEF); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DE, &IDTVEC(div), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYSIGT, SEL_KPL, 2); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYSIGT, SEL_UPL, 0); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_UD, &IDTVEC(ill), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NM, &IDTVEC(dna), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_DF, &IDTVEC(dblfault), SDT_SYSIGT, SEL_KPL, 1); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_TS, &IDTVEC(tss), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_NP, &IDTVEC(missing), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_SS, &IDTVEC(stk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_GP, &IDTVEC(prot), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_PF, &IDTVEC(page), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_AC, &IDTVEC(align), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYSIGT, SEL_KPL, 0); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYSIGT, SEL_UPL, 0); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYSIGT, SEL_UPL, 0); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (long) idt; lidt(&r_idt); /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); /* * Use vt(4) by default for UEFI boot (during the sc(4)/vt(4) * transition). * Once bootblocks have updated, we can test directly for * efi_systbl != NULL here... */ if (preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP) != NULL) vty_set_preferred(VTY_VT); identify_cpu(); /* Final stage of CPU initialization */ initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* doublefault stack space, runs on ist1 */ common_tss[0].tss_ist1 = (long)&dblfault_stack[sizeof(dblfault_stack)]; /* * NMI stack, runs on ist2. The pcpu pointer is stored just * above the start of the ist2 stack. */ np = ((struct nmi_pcpu *) &nmi0_stack[sizeof(nmi0_stack)]) - 1; np->np_pcpu = (register_t) pc; common_tss[0].tss_ist2 = (long) np; /* Set the IO permission bitmap (empty due to tss seg limit) */ common_tss[0].tss_iobase = sizeof(struct amd64tss) + IOPERM_BITMAP_SIZE; gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); ltr(gsel_tss); /* Set up the fast syscall stuff */ msr = rdmsr(MSR_EFER) | EFER_SCE; wrmsr(MSR_EFER, msr); wrmsr(MSR_LSTAR, (u_int64_t)IDTVEC(fast_syscall)); wrmsr(MSR_CSTAR, (u_int64_t)IDTVEC(fast_syscall32)); msr = ((u_int64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | ((u_int64_t)GSEL(GUCODE32_SEL, SEL_UPL) << 48); wrmsr(MSR_STAR, msr); wrmsr(MSR_SF_MASK, PSL_NT|PSL_T|PSL_I|PSL_C|PSL_D); getmemsize(kmdp, physfree); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ cninit(); #ifdef DEV_ISA #ifdef DEV_ATPIC elcr_probe(); atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYSIGT, SEL_KPL, 0); #endif #else #error "have you forgotten the isa device?"; #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif msgbufinit(msgbufp, msgbufsize); fpuinit(); /* * Set up thread0 pcb after fpuinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } /* make an initial tss so cpu can get interrupt stack on syscall! */ common_tss[0].tss_rsp0 = (vm_offset_t)thread0.td_pcb; /* Ensure the stack is aligned to 16 bytes */ common_tss[0].tss_rsp0 &= ~0xFul; PCPU_SET(rsp0, common_tss[0].tss_rsp0); PCPU_SET(curpcb, thread0.td_pcb); /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); _ucode32sel = GSEL(GUCODE32_SEL, SEL_UPL); _ufssel = GSEL(GUFS32_SEL, SEL_UPL); _ugssel = GSEL(GUGS32_SEL, SEL_UPL); load_ds(_udatasel); load_es(_udatasel); load_fs(_ufssel); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; thread0.td_frame = &proc0_tf; env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((u_int64_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((uint32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); static int efi_map_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct efi_map_header *efihdr; caddr_t kmdp; uint32_t efisize; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr == NULL) return (0); efisize = *((uint32_t *)efihdr - 1); return (SYSCTL_OUT(req, efihdr, efisize)); } SYSCTL_PROC(_machdep, OID_AUTO, efi_map, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, efi_map_sysctl_handler, "S,efi_map_header", "Raw EFI Memory Map"); void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_r12 = tf->tf_r12; pcb->pcb_r13 = tf->tf_r13; pcb->pcb_r14 = tf->tf_r14; pcb->pcb_r15 = tf->tf_r15; pcb->pcb_rbp = tf->tf_rbp; pcb->pcb_rbx = tf->tf_rbx; pcb->pcb_rip = tf->tf_rip; pcb->pcb_rsp = tf->tf_rsp; } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_rip = addr; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_rflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_rflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; tp = td->td_frame; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_r15 = tp->tf_r15; regs->r_r14 = tp->tf_r14; regs->r_r13 = tp->tf_r13; regs->r_r12 = tp->tf_r12; regs->r_r11 = tp->tf_r11; regs->r_r10 = tp->tf_r10; regs->r_r9 = tp->tf_r9; regs->r_r8 = tp->tf_r8; regs->r_rdi = tp->tf_rdi; regs->r_rsi = tp->tf_rsi; regs->r_rbp = tp->tf_rbp; regs->r_rbx = tp->tf_rbx; regs->r_rdx = tp->tf_rdx; regs->r_rcx = tp->tf_rcx; regs->r_rax = tp->tf_rax; regs->r_rip = tp->tf_rip; regs->r_cs = tp->tf_cs; regs->r_rflags = tp->tf_rflags; regs->r_rsp = tp->tf_rsp; regs->r_ss = tp->tf_ss; if (tp->tf_flags & TF_HASSEGS) { regs->r_ds = tp->tf_ds; regs->r_es = tp->tf_es; regs->r_fs = tp->tf_fs; regs->r_gs = tp->tf_gs; } else { regs->r_ds = 0; regs->r_es = 0; regs->r_fs = 0; regs->r_gs = 0; } return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tp; register_t rflags; tp = td->td_frame; rflags = regs->r_rflags & 0xffffffff; if (!EFL_SECURE(rflags, tp->tf_rflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); tp->tf_r15 = regs->r_r15; tp->tf_r14 = regs->r_r14; tp->tf_r13 = regs->r_r13; tp->tf_r12 = regs->r_r12; tp->tf_r11 = regs->r_r11; tp->tf_r10 = regs->r_r10; tp->tf_r9 = regs->r_r9; tp->tf_r8 = regs->r_r8; tp->tf_rdi = regs->r_rdi; tp->tf_rsi = regs->r_rsi; tp->tf_rbp = regs->r_rbp; tp->tf_rbx = regs->r_rbx; tp->tf_rdx = regs->r_rdx; tp->tf_rcx = regs->r_rcx; tp->tf_rax = regs->r_rax; tp->tf_rip = regs->r_rip; tp->tf_cs = regs->r_cs; tp->tf_rflags = rflags; tp->tf_rsp = regs->r_rsp; tp->tf_ss = regs->r_ss; if (0) { /* XXXKIB */ tp->tf_ds = regs->r_ds; tp->tf_es = regs->r_es; tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; } set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } /* XXX check all this stuff! */ /* externalize from sv_xmm */ static void fill_fpregs_xmm(struct savefpu *sv_xmm, struct fpreg *fpregs) { struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; struct envxmm *penv_xmm = &sv_xmm->sv_env; int i; /* pcb -> fpregs */ bzero(fpregs, sizeof(*fpregs)); /* FPU control/status */ penv_fpreg->en_cw = penv_xmm->en_cw; penv_fpreg->en_sw = penv_xmm->en_sw; penv_fpreg->en_tw = penv_xmm->en_tw; penv_fpreg->en_opcode = penv_xmm->en_opcode; penv_fpreg->en_rip = penv_xmm->en_rip; penv_fpreg->en_rdp = penv_xmm->en_rdp; penv_fpreg->en_mxcsr = penv_xmm->en_mxcsr; penv_fpreg->en_mxcsr_mask = penv_xmm->en_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(sv_xmm->sv_fp[i].fp_acc.fp_bytes, fpregs->fpr_acc[i], 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(sv_xmm->sv_xmm[i].xmm_bytes, fpregs->fpr_xacc[i], 16); } /* internalize from fpregs into sv_xmm */ static void set_fpregs_xmm(struct fpreg *fpregs, struct savefpu *sv_xmm) { struct envxmm *penv_xmm = &sv_xmm->sv_env; struct envxmm *penv_fpreg = (struct envxmm *)&fpregs->fpr_env; int i; /* fpregs -> pcb */ /* FPU control/status */ penv_xmm->en_cw = penv_fpreg->en_cw; penv_xmm->en_sw = penv_fpreg->en_sw; penv_xmm->en_tw = penv_fpreg->en_tw; penv_xmm->en_opcode = penv_fpreg->en_opcode; penv_xmm->en_rip = penv_fpreg->en_rip; penv_xmm->en_rdp = penv_fpreg->en_rdp; penv_xmm->en_mxcsr = penv_fpreg->en_mxcsr; penv_xmm->en_mxcsr_mask = penv_fpreg->en_mxcsr_mask & cpu_mxcsr_mask; /* FPU registers */ for (i = 0; i < 8; ++i) bcopy(fpregs->fpr_acc[i], sv_xmm->sv_fp[i].fp_acc.fp_bytes, 10); /* SSE registers */ for (i = 0; i < 16; ++i) bcopy(fpregs->fpr_xacc[i], sv_xmm->sv_xmm[i].xmm_bytes, 16); } /* externalize from td->pcb */ int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); fpugetregs(td); fill_fpregs_xmm(get_pcb_user_save_td(td), fpregs); return (0); } /* internalize to td->pcb */ int set_fpregs(struct thread *td, struct fpreg *fpregs) { set_fpregs_xmm(fpregs, get_pcb_user_save_td(td)); fpuuserinited(td); return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; struct trapframe *tp; pcb = td->td_pcb; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); mcp->mc_r15 = tp->tf_r15; mcp->mc_r14 = tp->tf_r14; mcp->mc_r13 = tp->tf_r13; mcp->mc_r12 = tp->tf_r12; mcp->mc_r11 = tp->tf_r11; mcp->mc_r10 = tp->tf_r10; mcp->mc_r9 = tp->tf_r9; mcp->mc_r8 = tp->tf_r8; mcp->mc_rdi = tp->tf_rdi; mcp->mc_rsi = tp->tf_rsi; mcp->mc_rbp = tp->tf_rbp; mcp->mc_rbx = tp->tf_rbx; mcp->mc_rcx = tp->tf_rcx; mcp->mc_rflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_rax = 0; mcp->mc_rdx = 0; mcp->mc_rflags &= ~PSL_C; } else { mcp->mc_rax = tp->tf_rax; mcp->mc_rdx = tp->tf_rdx; } mcp->mc_rip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_rsp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_ds = tp->tf_ds; mcp->mc_es = tp->tf_es; mcp->mc_fs = tp->tf_fs; mcp->mc_gs = tp->tf_gs; mcp->mc_flags = tp->tf_flags; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare, sizeof(mcp->mc_spare)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tp; char *xfpustate; long rflags; int ret; pcb = td->td_pcb; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); rflags = (mcp->mc_rflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_r15 = mcp->mc_r15; tp->tf_r14 = mcp->mc_r14; tp->tf_r13 = mcp->mc_r13; tp->tf_r12 = mcp->mc_r12; tp->tf_r11 = mcp->mc_r11; tp->tf_r10 = mcp->mc_r10; tp->tf_r9 = mcp->mc_r9; tp->tf_r8 = mcp->mc_r8; tp->tf_rdi = mcp->mc_rdi; tp->tf_rsi = mcp->mc_rsi; tp->tf_rbp = mcp->mc_rbp; tp->tf_rbx = mcp->mc_rbx; tp->tf_rdx = mcp->mc_rdx; tp->tf_rcx = mcp->mc_rcx; tp->tf_rax = mcp->mc_rax; tp->tf_rip = mcp->mc_rip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_rsp; tp->tf_ss = mcp->mc_ss; tp->tf_flags = mcp->mc_flags; if (tp->tf_flags & TF_HASSEGS) { tp->tf_ds = mcp->mc_ds; tp->tf_es = mcp->mc_es; tp->tf_fs = mcp->mc_fs; tp->tf_gs = mcp->mc_gs; } if (mcp->mc_flags & _MC_HASBASES) { pcb->pcb_fsbase = mcp->mc_fsbase; pcb->pcb_gsbase = mcp->mc_gsbase; } set_pcb_flags(pcb, PCB_FULL_IRET); return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(struct savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { struct savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { fpstate = (struct savefpu *)&mcp->mc_fpstate; fpstate->sv_env.en_mxcsr &= cpu_mxcsr_mask; error = fpusetregs(td, fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); if (PCPU_GET(fpcurthread) == td) fpudrop(); /* * XXX force a full drop of the fpu. The above only drops it if we * owned it. * * XXX I don't much like fpugetuserregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of fpugetuserregs()... perhaps we just * have too many layers. */ clear_pcb_flags(curthread->td_pcb, PCB_FPUINITDONE | PCB_USERFPUINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[8] = 0; dbregs->dr[9] = 0; dbregs->dr[10] = 0; dbregs->dr[11] = 0; dbregs->dr[12] = 0; dbregs->dr[13] = 0; dbregs->dr[14] = 0; dbregs->dr[15] = 0; return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP or a general protection fault right here. * Upper bits of dr6 and dr7 must not be set */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (td->td_frame->tf_cs == _ucode32sel && DBREG_DR7_LEN(dbregs->dr[7], i) == DBREG_DR7_LEN_8) return (EINVAL); } if ((dbregs->dr[6] & 0xffffffff00000000ul) != 0 || (dbregs->dr[7] & 0xffffffff00000000ul) != 0) return (EINVAL); pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; set_pcb_flags(pcb, PCB_DBREGS); } return (0); } void reset_dbregs(void) { load_dr7(0); /* Turn off the control bits first */ load_dr0(0); load_dr1(0); load_dr2(0); load_dr3(0); load_dr6(0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int64_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int64_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ Index: head/sys/amd64/ia32/ia32_signal.c =================================================================== --- head/sys/amd64/ia32/ia32_signal.c (revision 294929) +++ head/sys/amd64/ia32/ia32_signal.c (revision 294930) @@ -1,972 +1,972 @@ /*- * Copyright (c) 2003 Peter Wemm * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD4 static void freebsd4_ia32_sendsig(sig_t, ksiginfo_t *, sigset_t *); #endif #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) #define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) static void ia32_get_fpcontext(struct thread *td, struct ia32_mcontext *mcp, char *xfpusave, size_t xfpusave_len) { size_t max_len, len; /* * XXX Format of 64bit and 32bit FXSAVE areas differs. FXSAVE * in 32bit mode saves %cs and %ds, while on 64bit it saves * 64bit instruction and data pointers. Ignore the difference * for now, it should be irrelevant for most applications. */ mcp->mc_ownedfp = fpugetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = fpuformat(); if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(struct savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_IA32_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); } static int ia32_set_fpcontext(struct thread *td, struct ia32_mcontext *mcp, char *xfpustate, size_t xfpustate_len) { int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { error = fpusetregs(td, (struct savefpu *)&mcp->mc_fpstate, xfpustate, xfpustate_len); } else return (EINVAL); return (error); } /* * Get machine context. */ static int ia32_get_mcontext(struct thread *td, struct ia32_mcontext *mcp, int flags) { struct pcb *pcb; struct trapframe *tp; pcb = td->td_pcb; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_rsp); PROC_UNLOCK(curthread->td_proc); /* Entry into kernel always sets TF_HASSEGS */ mcp->mc_gs = tp->tf_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_rdi; mcp->mc_esi = tp->tf_rsi; mcp->mc_ebp = tp->tf_rbp; mcp->mc_isp = tp->tf_rsp; mcp->mc_eflags = tp->tf_rflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_rax; mcp->mc_edx = tp->tf_rdx; } mcp->mc_ebx = tp->tf_rbx; mcp->mc_ecx = tp->tf_rcx; mcp->mc_eip = tp->tf_rip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_rsp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); mcp->mc_flags = tp->tf_flags; ia32_get_fpcontext(td, mcp, NULL, 0); mcp->mc_fsbase = pcb->pcb_fsbase; mcp->mc_gsbase = pcb->pcb_gsbase; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ static int ia32_set_mcontext(struct thread *td, struct ia32_mcontext *mcp) { struct trapframe *tp; char *xfpustate; long rflags; int ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp)) return (EINVAL); rflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_rflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_IA32_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin(PTRIN(mcp->mc_xfpustate), xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = ia32_set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_gs = mcp->mc_gs; tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_flags = TF_HASSEGS; tp->tf_rdi = mcp->mc_edi; tp->tf_rsi = mcp->mc_esi; tp->tf_rbp = mcp->mc_ebp; tp->tf_rbx = mcp->mc_ebx; tp->tf_rdx = mcp->mc_edx; tp->tf_rcx = mcp->mc_ecx; tp->tf_rax = mcp->mc_eax; /* trapno, err */ tp->tf_rip = mcp->mc_eip; tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (0); } /* * The first two fields of a ucontext_t are the signal mask and * the machine context. The next field is uc_link; we want to * avoid destroying the link when copying out contexts. */ #define UC_COPY_SIZE offsetof(struct ia32_ucontext, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { struct ia32_ucontext uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ia32_get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); bzero(&uc.__spare__, sizeof(uc.__spare__)); ret = copyout(&uc, uap->ucp, UC_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { struct ia32_ucontext uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC_COPY_SIZE); if (ret == 0) { ret = ia32_set_mcontext(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { struct ia32_ucontext uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { ia32_get_mcontext(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC_COPY_SIZE); if (ret == 0) { ret = ia32_set_mcontext(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void ia32_osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct ia32_sigframe3 sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct ia32_sigframe3 *)(td->td_sigstk.ss_sp + + fp = (struct ia32_sigframe3 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(sf)); td->td_sigstk.ss_flags |= SS_ONSTACK; } else fp = (struct ia32_sigframe3 *)regs->tf_rsp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ah = (uintptr_t)catcher; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ah = (uintptr_t)catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_rax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_rbx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_rcx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_rdx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_rsi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_rdi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = regs->tf_gs; sf.sf_siginfo.si_sc.sc_isp = regs->tf_rsp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_esp = regs->tf_rsp; sf.sf_siginfo.si_sc.sc_ebp = regs->tf_rbp; sf.sf_siginfo.si_sc.sc_eip = regs->tf_rip; sf.sf_siginfo.si_sc.sc_eflags = regs->tf_rflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (uintptr_t)fp; regs->tf_rip = p->p_sysent->sv_psstrings - sz_ia32_osigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct ia32_sigframe4 sf, *sfp; struct siginfo32 siginfo; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int oonstack; int sig; td = curthread; p = td->td_proc; siginfo_to_siginfo32(&ksi->ksi_info, &siginfo); PROC_LOCK_ASSERT(p, MA_OWNED); sig = siginfo.si_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi; sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi; sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp; sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */ sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx; sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx; sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx; sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax; sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno; sf.sf_uc.uc_mcontext.mc_err = regs->tf_err; sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip; sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs; sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags; sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp; sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss; sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds; sf.sf_uc.uc_mcontext.mc_es = regs->tf_es; sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct ia32_sigframe4 *)(td->td_sigstk.ss_sp + + sfp = (struct ia32_sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(sf)); } else sfp = (struct ia32_sigframe4 *)regs->tf_rsp - 1; PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si; sf.sf_ah = (u_int32_t)(uintptr_t)catcher; /* Fill in POSIX parts */ sf.sf_si = siginfo; sf.sf_si.si_signo = sig; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = siginfo.si_code; sf.sf_addr = (u_int32_t)siginfo.si_addr; sf.sf_ah = (u_int32_t)(uintptr_t)catcher; } mtx_unlock(&psp->ps_mtx); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (uintptr_t)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base + sz_ia32_sigcode - sz_freebsd4_ia32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* leave user %fs and %gs untouched */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void ia32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct ia32_sigframe sf, *sfp; struct siginfo32 siginfo; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; char *xfpusave; size_t xfpusave_len; int oonstack; int sig; siginfo_to_siginfo32(&ksi->ksi_info, &siginfo); td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = siginfo.si_signo; psp = p->p_sigacts; #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_ia32_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { ia32_osendsig(catcher, ksi, mask); return; } #endif mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); if (cpu_max_ext_state_size > sizeof(struct savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(struct savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_edi = regs->tf_rdi; sf.sf_uc.uc_mcontext.mc_esi = regs->tf_rsi; sf.sf_uc.uc_mcontext.mc_ebp = regs->tf_rbp; sf.sf_uc.uc_mcontext.mc_isp = regs->tf_rsp; /* XXX */ sf.sf_uc.uc_mcontext.mc_ebx = regs->tf_rbx; sf.sf_uc.uc_mcontext.mc_edx = regs->tf_rdx; sf.sf_uc.uc_mcontext.mc_ecx = regs->tf_rcx; sf.sf_uc.uc_mcontext.mc_eax = regs->tf_rax; sf.sf_uc.uc_mcontext.mc_trapno = regs->tf_trapno; sf.sf_uc.uc_mcontext.mc_err = regs->tf_err; sf.sf_uc.uc_mcontext.mc_eip = regs->tf_rip; sf.sf_uc.uc_mcontext.mc_cs = regs->tf_cs; sf.sf_uc.uc_mcontext.mc_eflags = regs->tf_rflags; sf.sf_uc.uc_mcontext.mc_esp = regs->tf_rsp; sf.sf_uc.uc_mcontext.mc_ss = regs->tf_ss; sf.sf_uc.uc_mcontext.mc_ds = regs->tf_ds; sf.sf_uc.uc_mcontext.mc_es = regs->tf_es; sf.sf_uc.uc_mcontext.mc_fs = regs->tf_fs; sf.sf_uc.uc_mcontext.mc_gs = regs->tf_gs; sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ ia32_get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); sf.sf_uc.uc_mcontext.mc_fsbase = td->td_pcb->pcb_fsbase; sf.sf_uc.uc_mcontext.mc_gsbase = td->td_pcb->pcb_gsbase; bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) - sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; + sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; else sp = (char *)regs->tf_rsp; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned long)sp & ~0x3Ful); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(sf); /* Align to 16 bytes. */ sfp = (struct ia32_sigframe *)((uintptr_t)sp & ~0xF); PROC_UNLOCK(p); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (u_int32_t)(uintptr_t)&sfp->sf_si; sf.sf_ah = (u_int32_t)(uintptr_t)catcher; /* Fill in POSIX parts */ sf.sf_si = siginfo; sf.sf_si.si_signo = sig; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = siginfo.si_code; sf.sf_addr = (u_int32_t)siginfo.si_addr; sf.sf_ah = (u_int32_t)(uintptr_t)catcher; } mtx_unlock(&psp->ps_mtx); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, PTRIN(sf.sf_uc.uc_mcontext.mc_xfpustate), xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (uintptr_t)sfp; regs->tf_rip = p->p_sysent->sv_sigcode_base; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* XXXKIB leave user %fs and %gs untouched */ PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. */ #ifdef COMPAT_43 int ofreebsd32_sigreturn(struct thread *td, struct ofreebsd32_sigreturn_args *uap) { struct ia32_sigcontext3 sc, *scp; struct trapframe *regs; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_eflags; if (!EFL_SECURE(eflags, regs->tf_rflags)) { return (EINVAL); } if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; regs->tf_gs = scp->sc_gs; regs->tf_rax = scp->sc_eax; regs->tf_rbx = scp->sc_ebx; regs->tf_rcx = scp->sc_ecx; regs->tf_rdx = scp->sc_edx; regs->tf_rsi = scp->sc_esi; regs->tf_rdi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_rbp = scp->sc_ebp; regs->tf_rsp = scp->sc_esp; regs->tf_rip = scp->sc_eip; regs->tf_rflags = eflags; if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } #endif #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_freebsd32_sigreturn(td, uap) struct thread *td; struct freebsd4_freebsd32_sigreturn_args /* { const struct freebsd4_freebsd32_ucontext *sigcntxp; } */ *uap; { struct ia32_ucontext4 uc; struct trapframe *regs; struct ia32_ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_rflags)) { uprintf("pid %d (%s): freebsd4_freebsd32_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; regs->tf_rbp = ucp->uc_mcontext.mc_ebp; regs->tf_rbx = ucp->uc_mcontext.mc_ebx; regs->tf_rdx = ucp->uc_mcontext.mc_edx; regs->tf_rcx = ucp->uc_mcontext.mc_ecx; regs->tf_rax = ucp->uc_mcontext.mc_eax; regs->tf_trapno = ucp->uc_mcontext.mc_trapno; regs->tf_err = ucp->uc_mcontext.mc_err; regs->tf_rip = ucp->uc_mcontext.mc_eip; regs->tf_cs = cs; regs->tf_rflags = ucp->uc_mcontext.mc_eflags; regs->tf_rsp = ucp->uc_mcontext.mc_esp; regs->tf_ss = ucp->uc_mcontext.mc_ss; regs->tf_ds = ucp->uc_mcontext.mc_ds; regs->tf_es = ucp->uc_mcontext.mc_es; regs->tf_fs = ucp->uc_mcontext.mc_fs; regs->tf_gs = ucp->uc_mcontext.mc_gs; kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int freebsd32_sigreturn(td, uap) struct thread *td; struct freebsd32_sigreturn_args /* { const struct freebsd32_ucontext *sigcntxp; } */ *uap; { struct ia32_ucontext uc; struct trapframe *regs; struct ia32_ucontext *ucp; char *xfpustate; size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_rflags)) { uprintf("pid %d (%s): freebsd32_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } if ((ucp->uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(struct savefpu)) { uprintf("pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", td->td_proc->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin(PTRIN(ucp->uc_mcontext.mc_xfpustate), xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", td->td_proc->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = ia32_set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) { uprintf("pid %d (%s): sigreturn set_fpcontext err %d\n", td->td_proc->p_pid, td->td_name, ret); return (ret); } regs->tf_rdi = ucp->uc_mcontext.mc_edi; regs->tf_rsi = ucp->uc_mcontext.mc_esi; regs->tf_rbp = ucp->uc_mcontext.mc_ebp; regs->tf_rbx = ucp->uc_mcontext.mc_ebx; regs->tf_rdx = ucp->uc_mcontext.mc_edx; regs->tf_rcx = ucp->uc_mcontext.mc_ecx; regs->tf_rax = ucp->uc_mcontext.mc_eax; regs->tf_trapno = ucp->uc_mcontext.mc_trapno; regs->tf_err = ucp->uc_mcontext.mc_err; regs->tf_rip = ucp->uc_mcontext.mc_eip; regs->tf_cs = cs; regs->tf_rflags = ucp->uc_mcontext.mc_eflags; regs->tf_rsp = ucp->uc_mcontext.mc_esp; regs->tf_ss = ucp->uc_mcontext.mc_ss; regs->tf_ds = ucp->uc_mcontext.mc_ds; regs->tf_es = ucp->uc_mcontext.mc_es; regs->tf_fs = ucp->uc_mcontext.mc_fs; regs->tf_gs = ucp->uc_mcontext.mc_gs; regs->tf_flags = TF_HASSEGS; kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * Clear registers on exec */ void ia32_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); #ifdef COMPAT_43 setup_lcall_gate(); #endif pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; pcb->pcb_initial_fpucw = __INITIAL_FPUCW_I386__; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; fpstate_drop(td); /* Return via doreti so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); td->td_retval[1] = 0; } Index: head/sys/amd64/linux/linux_sysvec.c =================================================================== --- head/sys/amd64/linux/linux_sysvec.c (revision 294929) +++ head/sys/amd64/linux/linux_sysvec.c (revision 294930) @@ -1,994 +1,994 @@ /*- * Copyright (c) 2013 Dmitry Chagin * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #define __ELF_WORD_SIZE 64 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux64, 1); #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux 64 debugging control"); #endif /* * Allow the this functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static register_t * linux_copyout_strings(struct image_params *imgp); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static void linux_set_syscall_retval(struct thread *td, int error); static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa); static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack); static int linux_vsyscall(struct thread *td); /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc; frame = td->td_frame; sa->args[0] = frame->tf_rdi; sa->args[1] = frame->tf_rsi; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rcx; sa->args[4] = frame->tf_r8; sa->args[5] = frame->tf_r9; sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; return (0); } static void linux_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame = td->td_frame; /* * On Linux only %rcx and %r11 values are not preserved across * the syscall. * So, do not clobber %rdx and %r10 */ td->td_retval[1] = frame->tf_rdx; frame->tf_r10 = frame->tf_rcx; cpu_set_syscall_retval(td, error); /* Restore all registers. */ set_pcb_flags(td->td_pcb, PCB_FULL_IRET); } static int elf_linux_fixup(register_t **stack_base, struct image_params *imgp) { Elf_Auxargs *args; Elf_Addr *base; Elf_Addr *pos; struct ps_strings *arginfo; struct proc *p; int issetugid; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; KASSERT(curthread->td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); base = (Elf64_Addr *)*stack_base; args = (Elf64_Auxargs *)imgp->auxargs; pos = base + (imgp->args->argc + imgp->args->envc + 2); issetugid = p->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword(base, (uint64_t)imgp->args->argc); *stack_base = (register_t *)base; return (0); } /* * Copy strings out to the new process address space, constructing new arg * and env vector tables. Return a pointer to the base so that it can be used * as the initial stack pointer. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* * Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; p = imgp->proc; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } /* * Reset registers to default values on exec. */ static void linux_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; clear_pcb_flags(pcb, PCB_32BIT); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; set_pcb_flags(pcb, PCB_FULL_IRET); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } clear_pcb_flags(pcb, PCB_DBREGS); } /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); } /* * Copied from amd64/amd64/machdep.c * * XXX fpu state need? don't think so */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct proc *p; struct l_ucontext uc; struct l_sigcontext *context; struct trapframe *regs; unsigned long rflags; int error; ksiginfo_t ksi; regs = td->td_frame; error = copyin((void *)regs->tf_rbx, &uc, sizeof(uc)); if (error != 0) return (error); p = td->td_proc; context = &uc.uc_mcontext; rflags = context->sc_rflags; /* * Don't allow users to change privileged or reserved flags. */ /* * XXX do allow users to change the privileged flag PSL_RF. * The cpu sets PSL_RF in tf_rflags for faults. Debuggers * should sometimes set it there too. tf_rflags is kept in * the signal context during signal handling and there is no * other place to remember it, so the PSL_RF bit may be * corrupted by the signal handler without us knowing. * Corruption of the PSL_RF bit at worst causes one more or * one less debugger trap, so allowing it is fairly harmless. */ #define RFLAG_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) if (!RFLAG_SECURE(rflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) { printf("linux_rt_sigreturn: rflags = 0x%lx\n", rflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { printf("linux_rt_sigreturn: cs = 0x%x\n", context->sc_cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return (EINVAL); } PROC_LOCK(p); linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); SIG_CANTMASK(td->td_sigmask); signotify(td); PROC_UNLOCK(p); regs->tf_rdi = context->sc_rdi; regs->tf_rsi = context->sc_rsi; regs->tf_rdx = context->sc_rdx; regs->tf_rbp = context->sc_rbp; regs->tf_rbx = context->sc_rbx; regs->tf_rcx = context->sc_rcx; regs->tf_rax = context->sc_rax; regs->tf_rip = context->sc_rip; regs->tf_rsp = context->sc_rsp; regs->tf_r8 = context->sc_r8; regs->tf_r9 = context->sc_r9; regs->tf_r10 = context->sc_r10; regs->tf_r11 = context->sc_r11; regs->tf_r12 = context->sc_r12; regs->tf_r13 = context->sc_r13; regs->tf_r14 = context->sc_r14; regs->tf_r15 = context->sc_r15; regs->tf_cs = context->sc_cs; regs->tf_err = context->sc_err; regs->tf_rflags = rflags; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * copied from amd64/amd64/machdep.c * * Send an interrupt to process. */ static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct l_rt_sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; caddr_t sp; struct trapframe *regs; int sig, code; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); LINUX_CTR4(rt_sendsig, "%p, %d, %p, %u", catcher, sig, mask, code); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size - + sp = (caddr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe); } else sp = (caddr_t)regs->tf_rsp - sizeof(struct l_rt_sigframe) - 128; /* Align to 16 bytes. */ sfp = (struct l_rt_sigframe *)((unsigned long)sp & ~0xFul); mtx_unlock(&psp->ps_mtx); /* Translate the signal. */ sig = bsd_to_linux_signal(sig); /* Save user context. */ bzero(&sf, sizeof(sf)); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_sigmask); bsd_to_linux_sigset(mask, &sf.sf_sc.uc_mcontext.sc_mask); sf.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); sf.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); sf.sf_sc.uc_mcontext.sc_rdi = regs->tf_rdi; sf.sf_sc.uc_mcontext.sc_rsi = regs->tf_rsi; sf.sf_sc.uc_mcontext.sc_rdx = regs->tf_rdx; sf.sf_sc.uc_mcontext.sc_rbp = regs->tf_rbp; sf.sf_sc.uc_mcontext.sc_rbx = regs->tf_rbx; sf.sf_sc.uc_mcontext.sc_rcx = regs->tf_rcx; sf.sf_sc.uc_mcontext.sc_rax = regs->tf_rax; sf.sf_sc.uc_mcontext.sc_rip = regs->tf_rip; sf.sf_sc.uc_mcontext.sc_rsp = regs->tf_rsp; sf.sf_sc.uc_mcontext.sc_r8 = regs->tf_r8; sf.sf_sc.uc_mcontext.sc_r9 = regs->tf_r9; sf.sf_sc.uc_mcontext.sc_r10 = regs->tf_r10; sf.sf_sc.uc_mcontext.sc_r11 = regs->tf_r11; sf.sf_sc.uc_mcontext.sc_r12 = regs->tf_r12; sf.sf_sc.uc_mcontext.sc_r13 = regs->tf_r13; sf.sf_sc.uc_mcontext.sc_r14 = regs->tf_r14; sf.sf_sc.uc_mcontext.sc_r15 = regs->tf_r15; sf.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; sf.sf_sc.uc_mcontext.sc_rflags = regs->tf_rflags; sf.sf_sc.uc_mcontext.sc_err = regs->tf_err; sf.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); sf.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; /* Build the argument list for the signal handler. */ regs->tf_rdi = sig; /* arg 1 in %rdi */ regs->tf_rax = 0; regs->tf_rsi = (register_t)&sfp->sf_si; /* arg 2 in %rsi */ regs->tf_rdx = (register_t)&sfp->sf_sc; /* arg 3 in %rdx */ sf.sf_handler = catcher; /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &sf.sf_si, sig); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_rsp = (long)sfp; regs->tf_rip = linux_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1, len; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds * attempt to use the alternate path for the interpreter. * If an alternate path is found, use our stringspace * to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) { len = strlen(rpath) + 1; if (len <= MAXSHELLCMDLEN) memcpy(imgp->interpreter_name, rpath, len); free(rpath, M_TEMP); } } } return(error); } #define LINUX_VSYSCALL_START (-10UL << 20) #define LINUX_VSYSCALL_SZ 1024 const unsigned long linux_vsyscall_vector[] = { LINUX_SYS_gettimeofday, LINUX_SYS_linux_time, /* getcpu not implemented */ }; static int linux_vsyscall(struct thread *td) { struct trapframe *frame; uint64_t retqaddr; int code, traced; int error; frame = td->td_frame; /* Check %rip for vsyscall area */ if (__predict_true(frame->tf_rip < LINUX_VSYSCALL_START)) return (EINVAL); if ((frame->tf_rip & (LINUX_VSYSCALL_SZ - 1)) != 0) return (EINVAL); code = (frame->tf_rip - LINUX_VSYSCALL_START) / LINUX_VSYSCALL_SZ; if (code >= nitems(linux_vsyscall_vector)) return (EINVAL); /* * vsyscall called as callq *(%rax), so we must * use return address from %rsp and also fixup %rsp */ error = copyin((void *)frame->tf_rsp, &retqaddr, sizeof(retqaddr)); if (error) return (error); frame->tf_rip = retqaddr; frame->tf_rax = linux_vsyscall_vector[code]; frame->tf_rsp += 8; traced = (frame->tf_flags & PSL_T); amd64_syscall(td, traced); return (0); } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_rt_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF64", .sv_coredump = elf64_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = linux_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_LP64 | SV_SHP, .sv_set_syscall_retval = linux_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = linux_vsyscall, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)SHAREDPAGE); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNULINUX_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux64_brandnote = { .hdr.n_namesz = sizeof(GNULINUX_ABI_VENDOR), .hdr.n_descsz = 16, .hdr.n_type = 1, .vendor = GNULINUX_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf64_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux-x86-64.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf64_Brandinfo linux_glibc2brandshort = { .brand = ELFOSABI_LINUX, .machine = EM_X86_64, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib64/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux64_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf64_Brandinfo *linux_brandlist[] = { &linux_glibc2brand, &linux_glibc2brandshort, NULL }; static int linux64_elf_modevent(module_t mod, int type, void *data) { Elf64_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk64", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux x86-64 ELF exec handler installed\n"); } else printf("cannot insert Linux x86-64 ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf64_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux64_elf_mod = { "linux64elf", linux64_elf_modevent, 0 }; DECLARE_MODULE_TIED(linux64elf, linux64_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linux64elf, linux_common, 1, 1, 1); Index: head/sys/amd64/linux32/linux32_sysvec.c =================================================================== --- head/sys/amd64/linux32/linux32_sysvec.c (revision 294929) +++ head/sys/amd64/linux32/linux32_sysvec.c (revision 294930) @@ -1,1207 +1,1207 @@ /*- * Copyright (c) 2004 Tim J. Robbins * Copyright (c) 2003 Peter Wemm * Copyright (c) 2002 Doug Rabson * Copyright (c) 1998-1999 Andrew Gallatin * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #ifndef COMPAT_FREEBSD32 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" #endif #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #define AUXARGS_ENTRY_32(pos, id, val) \ do { \ suword32(pos++, id); \ suword32(pos++, val); \ } while (0) #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif /* * Allow the sendsig functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX32_SYS_linux_rt_sendsig 0 #define LINUX32_SYS_linux_sendsig 0 const char *linux_kplatform; static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux32_locore_o_start; extern char _binary_linux32_locore_o_end; extern struct sysent linux32_sysent[LINUX32_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static register_t *linux_copyout_strings(struct image_params *imgp); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack); static void linux32_fixlimit(struct rlimit *rl, int which); static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); base = (Elf32_Addr *)*stack_base; args = (Elf32_Auxargs *)imgp->auxargs; pos = base + (imgp->args->argc + imgp->args->envc + 2); issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY_32(pos, LINUX_AT_SYSINFO, linux32_vsyscall); AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY_32(pos, AT_BASE, args->base); AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, PTROUT(imgp->canary)); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, PTROUT(imgp->execpathp)); if (args->execfd != -1) AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY_32(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; base--; suword32(base, (uint32_t)imgp->args->argc); *stack_base = (register_t *)base; return (0); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int oonstack; int sig; int code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + + fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; frame.sf_siginfo = PTROUT(&fp->sf_si); frame.sf_ucontext = PTROUT(&fp->sf_sc); /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn * and libgcc unwind. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = 0; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_rt_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int oonstack; int sig, code; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_rsp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + + fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_rsp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = PTROUT(catcher); frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = regs->tf_gs; frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_rdi; frame.sf_sc.sc_esi = regs->tf_rsi; frame.sf_sc.sc_ebp = regs->tf_rbp; frame.sf_sc.sc_ebx = regs->tf_rbx; frame.sf_sc.sc_esp = regs->tf_rsp; frame.sf_sc.sc_edx = regs->tf_rdx; frame.sf_sc.sc_ecx = regs->tf_rcx; frame.sf_sc.sc_eax = regs->tf_rax; frame.sf_sc.sc_eip = regs->tf_rip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_rflags; frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_rsp = PTROUT(fp); regs->tf_rip = linux32_sigcode; regs->tf_rflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucode32sel; regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; sigset_t bmask; l_sigset_t lmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; lmask.__mask = frame.sf_extramask[0]; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context. */ regs->tf_rdi = frame.sf_sc.sc_edi; regs->tf_rsi = frame.sf_sc.sc_esi; regs->tf_rbp = frame.sf_sc.sc_ebp; regs->tf_rbx = frame.sf_sc.sc_ebx; regs->tf_rdx = frame.sf_sc.sc_edx; regs->tf_rcx = frame.sf_sc.sc_ecx; regs->tf_rax = frame.sf_sc.sc_eax; regs->tf_rip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_es = frame.sf_sc.sc_es; regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_gs = frame.sf_sc.sc_gs; regs->tf_rflags = eflags; regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_rflags)) return(EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_rip; trapsignal(td, &ksi); return(EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ regs->tf_gs = context->sc_gs; regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_rdi = context->sc_edi; regs->tf_rsi = context->sc_esi; regs->tf_rbp = context->sc_ebp; regs->tf_rbx = context->sc_ebx; regs->tf_rdx = context->sc_edx; regs->tf_rcx = context->sc_ecx; regs->tf_rax = context->sc_eax; regs->tf_rip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_rflags = eflags; regs->tf_rsp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; set_pcb_flags(td->td_pcb, PCB_FULL_IRET); /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = PTRIN(lss->ss_sp); ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; p = td->td_proc; frame = td->td_frame; sa->args[0] = frame->tf_rbx; sa->args[1] = frame->tf_rcx; sa->args[2] = frame->tf_rdx; sa->args[3] = frame->tf_rsi; sa->args[4] = frame->tf_rdi; sa->args[5] = frame->tf_rbp; /* Unconfirmed */ sa->code = frame->tf_rax; if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_rdx; return (0); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an * alternate * path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } /* * Clear registers on exec * XXX copied from ia32_signal.c. */ static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; mtx_lock(&dt_lock); if (td->td_proc->p_md.md_ldt != NULL) user_ldt_free(td); else mtx_unlock(&dt_lock); critical_enter(); wrmsr(MSR_FSBASE, 0); wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ pcb->pcb_fsbase = 0; pcb->pcb_gsbase = 0; critical_exit(); pcb->pcb_initial_fpucw = __LINUX_NPXCW__; bzero((char *)regs, sizeof(struct trapframe)); regs->tf_rip = imgp->entry_addr; regs->tf_rsp = stack; regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); regs->tf_gs = _ugssel; regs->tf_fs = _ufssel; regs->tf_es = _udatasel; regs->tf_ds = _udatasel; regs->tf_ss = _udatasel; regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; fpstate_drop(td); /* Do full restore on return so that we can change to a different %cs */ set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); td->td_retval[1] = 0; } /* * XXX copied from ia32_sysvec.c. */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; u_int32_t *vectp; char *stringp, *destp; u_int32_t *stack_base; struct linux32_ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; /* * Calculate string base and vector table pointers. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; destp = (caddr_t)arginfo - SPARE_USRSPACE - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(u_int32_t)); } else /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (uint32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, "32-bit Linux emulation"); static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, &linux32_maxdsiz, 0, ""); static u_long linux32_maxssiz = LINUX32_MAXSSIZ; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, &linux32_maxssiz, 0, ""); static u_long linux32_maxvmem = LINUX32_MAXVMEM; SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, &linux32_maxvmem, 0, ""); #if defined(DEBUG) SYSCTL_PROC(_compat_linux32, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif static void linux32_fixlimit(struct rlimit *rl, int which) { switch (which) { case RLIMIT_DATA: if (linux32_maxdsiz != 0) { if (rl->rlim_cur > linux32_maxdsiz) rl->rlim_cur = linux32_maxdsiz; if (rl->rlim_max > linux32_maxdsiz) rl->rlim_max = linux32_maxdsiz; } break; case RLIMIT_STACK: if (linux32_maxssiz != 0) { if (rl->rlim_cur > linux32_maxssiz) rl->rlim_cur = linux32_maxssiz; if (rl->rlim_max > linux32_maxssiz) rl->rlim_max = linux32_maxssiz; } break; case RLIMIT_VMEM: if (linux32_maxvmem != 0) { if (rl->rlim_cur > linux32_maxvmem) rl->rlim_cur = linux32_maxvmem; if (rl->rlim_max > linux32_maxvmem) rl->rlim_max = linux32_maxvmem; } break; } } struct sysentvec elf_linux_sysvec = { .sv_size = LINUX32_SYS_MAXSYSCALL, .sv_table = linux32_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux32_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF32", .sv_coredump = elf32_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = LINUX32_MAXUSER, .sv_usrstack = LINUX32_USRSTACK, .sv_psstrings = LINUX32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = linux32_fixlimit, .sv_maxssiz = &linux32_maxssiz, .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux32_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX32_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux32_locore_o_end - &_binary_linux32_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX32_SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; linux_kplatform = linux_shared_page_mapping + (linux_platform - (caddr_t)LINUX32_SHAREDPAGE); } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux32_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux32_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux32_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); MODULE_DEPEND(linuxelf, linux_common, 1, 1, 1); Index: head/sys/arm/arm/machdep.c =================================================================== --- head/sys/arm/arm/machdep.c (revision 294929) +++ head/sys/arm/arm/machdep.c (revision 294930) @@ -1,1920 +1,1920 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependant functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_timer.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #endif #ifdef DDB #include #if __ARM_ARCH >= 6 #include DB_SHOW_COMMAND(cp15, db_show_cp15) { u_int reg; reg = cp15_midr_get(); db_printf("Cpu ID: 0x%08x\n", reg); reg = cp15_ctr_get(); db_printf("Current Cache Lvl ID: 0x%08x\n",reg); reg = cp15_sctlr_get(); db_printf("Ctrl: 0x%08x\n",reg); reg = cp15_actlr_get(); db_printf("Aux Ctrl: 0x%08x\n",reg); reg = cp15_id_pfr0_get(); db_printf("Processor Feat 0: 0x%08x\n", reg); reg = cp15_id_pfr1_get(); db_printf("Processor Feat 1: 0x%08x\n", reg); reg = cp15_id_dfr0_get(); db_printf("Debug Feat 0: 0x%08x\n", reg); reg = cp15_id_afr0_get(); db_printf("Auxiliary Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr0_get(); db_printf("Memory Model Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr1_get(); db_printf("Memory Model Feat 1: 0x%08x\n", reg); reg = cp15_id_mmfr2_get(); db_printf("Memory Model Feat 2: 0x%08x\n", reg); reg = cp15_id_mmfr3_get(); db_printf("Memory Model Feat 3: 0x%08x\n", reg); reg = cp15_ttbr_get(); db_printf("TTB0: 0x%08x\n", reg); } DB_SHOW_COMMAND(vtop, db_show_vtop) { u_int reg; if (have_addr) { cp15_ats1cpr_set(addr); reg = cp15_par_get(); db_printf("Physical address reg: 0x%08x\n",reg); } else db_printf("show vtop \n"); } #endif /* __ARM_ARCH >= 6 */ #endif /* DDB */ #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; static struct trapframe proc0_tf; uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef FDT static char *loader_envp; vm_paddr_t pmap_pa; #ifdef ARM_NEW_PMAP vm_offset_t systempage; vm_offset_t irqstack; vm_offset_t undstack; vm_offset_t abtstack; #else /* * This is the number of L2 page tables required for covering max * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, * stacks etc.), uprounded to be divisible by 4. */ #define KERNEL_PT_MAX 78 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; struct pv_addr systempage; static struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; static struct pv_addr kernelstack; #endif #endif #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 uint32_t board_id; struct arm_lbabi_tag *atag_list; char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; char atags[LBABI_MAX_COMMAND_LINE * 2]; uint32_t memstart[LBABI_MAX_BANKS]; uint32_t memsize[LBABI_MAX_BANKS]; uint32_t membanks; #endif static uint32_t board_revision; /* hex representation of uint64_t */ static char board_serial[32]; SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes"); SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD, &board_revision, 0, "Board revision"); SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD, board_serial, 0, "Board serial"); int vfp_exists; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &vfp_exists, 0, "Floating point support enabled"); void board_set_serial(uint64_t serial) { snprintf(board_serial, sizeof(board_serial)-1, "%016jx", serial); } void board_set_revision(uint32_t revision) { board_revision = revision; } void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct sigframe *)(td->td_sigstk.ss_sp + + fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_usr_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); /* Set the mode to enter in the signal handler */ #if __ARM_ARCH >= 7 if ((register_t)catcher & 1) tf->tf_spsr |= PSR_T; else tf->tf_spsr &= ~PSR_T; #endif CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ cpu_icache_sync_range(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Assume the MD caller knows what it's doing here, and * really does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... * * NOTE: If the CPU control register is not readable, * this will totally fail! We'll just assume that * any system that has high vector support has a * readable CPU control register, for now. If we * ever encounter one that does not, we'll have to * rethink this. */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; const unsigned int mbyte = 1024 * 1024; #ifdef ARM_TP_ADDRESS #ifndef ARM_CACHE_LOCK_ENABLE vm_page_t m; #endif #endif identify_arm_cpu(); vm_ksubmap_init(&kmi); /* * Display the RAM layout. */ printf("real memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem) / mbyte); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(vm_cnt.v_free_count), (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); if (bootverbose) { arm_physmem_print_tables(); arm_devmap_print_table(); } bufinit(); vm_pager_bufferinit(); pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; pmap_set_pcb_pagedir(pmap_kernel(), pcb); #ifndef ARM_NEW_PMAP vector_page_setprot(VM_PROT_READ); pmap_postinit(); #endif #ifdef ARM_TP_ADDRESS #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif *(uint32_t *)ARM_RAS_START = 0; *(uint32_t *)ARM_RAS_END = 0xffffffff; #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { cpu_dcache_wb_range((uintptr_t)ptr, len); #ifdef ARM_L2_PIPT cpu_l2cache_wb_range((uintptr_t)vtophys(ptr), len); #else cpu_l2cache_wb_range((uintptr_t)ptr, len); #endif } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); spinlock_enter(); #ifndef NO_EVENTTIMERS if (!busy) cpu_idleclock(); #endif if (!sched_runnable()) cpu_sleep(0); #ifndef NO_EVENTTIMERS if (!busy) cpu_activeclock(); #endif spinlock_exit(); CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } /* * Most ARM platforms don't need to do anything special to init their clocks * (they get intialized during normal device attachment), and by not defining a * cpu_initclocks() function they get this generic one. Any platform that needs * to do something special can just provide their own implementation, which will * override this one due to the weak linkage. */ void arm_generic_initclocks(void) { #ifndef NO_EVENTTIMERS #ifdef SMP if (PCPU_GET(cpuid) == 0) cpu_initclocks_bsp(); else cpu_initclocks_ap(); #else cpu_initclocks_bsp(); #endif #endif } __weak_reference(arm_generic_initclocks, cpu_initclocks); int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, uint32_t *v) { if (proc_readmem(td, td->td_proc, addr, v, sizeof(*v)) != sizeof(*v)) return (ENOMEM); return (0); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, uint32_t v) { if (proc_writemem(td, td->td_proc, addr, &v, sizeof(v)) != sizeof(v)) return (ENOMEM); return (0); } static u_int ptrace_get_usr_reg(void *cookie, int reg) { int ret; struct thread *td = cookie; KASSERT(((reg >= 0) && (reg <= ARM_REG_NUM_PC)), ("reg is outside range")); switch(reg) { case ARM_REG_NUM_PC: ret = td->td_frame->tf_pc; break; case ARM_REG_NUM_LR: ret = td->td_frame->tf_usr_lr; break; case ARM_REG_NUM_SP: ret = td->td_frame->tf_usr_sp; break; default: ret = *((register_t*)&td->td_frame->tf_r0 + reg); break; } return (ret); } static u_int ptrace_get_usr_int(void* cookie, vm_offset_t offset, u_int* val) { struct thread *td = cookie; u_int error; error = ptrace_read_int(td, offset, val); return (error); } /** * This function parses current instruction opcode and decodes * any possible jump (change in PC) which might occur after * the instruction is executed. * * @param td Thread structure of analysed task * @param cur_instr Currently executed instruction * @param alt_next_address Pointer to the variable where * the destination address of the * jump instruction shall be stored. * * @return <0> when jump is possible * otherwise */ static int ptrace_get_alternative_next(struct thread *td, uint32_t cur_instr, uint32_t *alt_next_address) { int error; if (inst_branch(cur_instr) || inst_call(cur_instr) || inst_return(cur_instr)) { error = arm_predict_branch(td, cur_instr, td->td_frame->tf_pc, alt_next_address, ptrace_get_usr_reg, ptrace_get_usr_int); return (error); } return (EINVAL); } int ptrace_single_step(struct thread *td) { struct proc *p; int error, error_alt; uint32_t cur_instr, alt_next = 0; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); KASSERT(td->td_md.md_ptrace_instr_alt == 0, ("Didn't clear alternative single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc, &cur_instr); if (error) goto out; error = ptrace_read_int(td, td->td_frame->tf_pc + INSN_SIZE, &td->td_md.md_ptrace_instr); if (error == 0) { error = ptrace_write_int(td, td->td_frame->tf_pc + INSN_SIZE, PTRACE_BREAKPOINT); if (error) { td->td_md.md_ptrace_instr = 0; } else { td->td_md.md_ptrace_addr = td->td_frame->tf_pc + INSN_SIZE; } } error_alt = ptrace_get_alternative_next(td, cur_instr, &alt_next); if (error_alt == 0) { error_alt = ptrace_read_int(td, alt_next, &td->td_md.md_ptrace_instr_alt); if (error_alt) { td->td_md.md_ptrace_instr_alt = 0; } else { error_alt = ptrace_write_int(td, alt_next, PTRACE_BREAKPOINT); if (error_alt) td->td_md.md_ptrace_instr_alt = 0; else td->td_md.md_ptrace_addr_alt = alt_next; } } out: PROC_LOCK(p); return ((error != 0) && (error_alt != 0)); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); if (td->td_md.md_ptrace_instr != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } if (td->td_md.md_ptrace_instr_alt != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr_alt, td->td_md.md_ptrace_instr_alt); PROC_LOCK(p); td->td_md.md_ptrace_instr_alt = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; register_t cspr; td = curthread; if (td->td_md.md_spinlock_count == 0) { cspr = disable_interrupts(PSR_I | PSR_F); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = cspr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t cspr; td = curthread; critical_exit(); cspr = td->td_md.md_saved_cspr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = imgp->entry_addr; tf->tf_svc_lr = 0x77777777; tf->tf_pc = imgp->entry_addr; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) { gr[_REG_R0] = 0; gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C; } else { gr[_REG_R0] = tf->tf_r0; gr[_REG_CPSR] = tf->tf_spsr; } gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (PSR_I | PSR_F)) != 0) return (EINVAL); /* Restore register context. */ set_mcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_regs.sf_r4 = tf->tf_r4; pcb->pcb_regs.sf_r5 = tf->tf_r5; pcb->pcb_regs.sf_r6 = tf->tf_r6; pcb->pcb_regs.sf_r7 = tf->tf_r7; pcb->pcb_regs.sf_r8 = tf->tf_r8; pcb->pcb_regs.sf_r9 = tf->tf_r9; pcb->pcb_regs.sf_r10 = tf->tf_r10; pcb->pcb_regs.sf_r11 = tf->tf_r11; pcb->pcb_regs.sf_r12 = tf->tf_r12; pcb->pcb_regs.sf_pc = tf->tf_pc; pcb->pcb_regs.sf_lr = tf->tf_usr_lr; pcb->pcb_regs.sf_sp = tf->tf_usr_sp; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(struct arm_boot_params *abp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; init_static_kenv(NULL, 0); return (lastaddr); } void pcpu0_init(void) { #if __ARM_ARCH >= 6 set_curthread(&thread0); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); } #if defined(LINUX_BOOT_ABI) vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp) { struct arm_lbabi_tag *walker; uint32_t revision; uint64_t serial; /* * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2 * is atags or dtb pointer. If all of these aren't satisfied, * then punt. */ if (!(abp->abp_r0 == 0 && abp->abp_r1 != 0 && abp->abp_r2 != 0)) return 0; board_id = abp->abp_r1; walker = (struct arm_lbabi_tag *) (abp->abp_r2 + KERNVIRTADDR - abp->abp_physaddr); /* xxx - Need to also look for binary device tree */ if (ATAG_TAG(walker) != ATAG_CORE) return 0; atag_list = walker; while (ATAG_TAG(walker) != ATAG_NONE) { switch (ATAG_TAG(walker)) { case ATAG_CORE: break; case ATAG_MEM: arm_physmem_hardware_region(walker->u.tag_mem.start, walker->u.tag_mem.size); break; case ATAG_INITRD2: break; case ATAG_SERIAL: serial = walker->u.tag_sn.low | ((uint64_t)walker->u.tag_sn.high << 32); board_set_serial(serial); break; case ATAG_REVISION: revision = walker->u.tag_rev.rev; board_set_revision(revision); break; case ATAG_CMDLINE: /* XXX open question: Parse this for boothowto? */ bcopy(walker->u.tag_cmd.command, linux_command_line, ATAG_SIZE(walker)); break; default: break; } walker = ATAG_NEXT(walker); } /* Save a copy for later */ bcopy(atag_list, atags, (char *)walker - (char *)atag_list + ATAG_SIZE(walker)); init_static_kenv(NULL, 0); return fake_preload_metadata(abp); } #endif #if defined(FREEBSD_BOOT_LOADER) vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr = 0; void *mdp; void *kmdp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif /* * Mask metadata pointer: it is supposed to be on page boundary. If * the first argument (mdp) doesn't point to a valid address the * bootloader must have passed us something else than the metadata * ptr, so we give up. Also give up if we cannot find metadta section * the loader creates that we get all this data out of. */ if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL) return 0; preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) return 0; boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); init_static_kenv(loader_envp, 0); lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif return lastaddr; } #endif vm_offset_t default_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr; #if defined(LINUX_BOOT_ABI) if ((lastaddr = linux_parse_boot_param(abp)) != 0) return lastaddr; #endif #if defined(FREEBSD_BOOT_LOADER) if ((lastaddr = freebsd_parse_boot_param(abp)) != 0) return lastaddr; #endif /* Fall back to hardcoded metadata. */ lastaddr = fake_preload_metadata(abp); return lastaddr; } /* * Stub version of the boot parameter parsing routine. We are * called early in initarm, before even VM has been initialized. * This routine needs to preserve any data that the boot loader * has passed in before the kernel starts to grow past the end * of the BSS, traditionally the place boot-loaders put this data. * * Since this is called so early, things that depend on the vm system * being setup (including access to some SoC's serial ports), about * all that can be done in this routine is to copy the arguments. * * This is the default boot parameter parsing routine. Individual * kernels/boards can override this weak function with one of their * own. We just fake metadata... */ __weak_reference(default_parse_boot_param, parse_boot_param); /* * Initialize proc0 */ void init_proc0(vm_offset_t kstack) { proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + kstack_pages * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_vfpcpu = -1; thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } int arm_predict_branch(void *cookie, u_int insn, register_t pc, register_t *new_pc, u_int (*fetch_reg)(void*, int), u_int (*read_int)(void*, vm_offset_t, u_int*)) { u_int addr, nregs, offset = 0; int error = 0; switch ((insn >> 24) & 0xf) { case 0x2: /* add pc, reg1, #value */ case 0x0: /* add pc, reg1, reg2, lsl #offset */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); if (((insn >> 16) & 0xf) == 15) addr += 8; if (insn & 0x0200000) { offset = (insn >> 7) & 0x1e; offset = (insn & 0xff) << (32 - offset) | (insn & 0xff) >> offset; } else { offset = fetch_reg(cookie, insn & 0x0f); if ((insn & 0x0000ff0) != 0x00000000) { if (insn & 0x10) nregs = fetch_reg(cookie, (insn >> 8) & 0xf); else nregs = (insn >> 7) & 0x1f; switch ((insn >> 5) & 3) { case 0: /* lsl */ offset = offset << nregs; break; case 1: /* lsr */ offset = offset >> nregs; break; default: break; /* XXX */ } } *new_pc = addr + offset; return (0); } case 0xa: /* b ... */ case 0xb: /* bl ... */ addr = ((insn << 2) & 0x03ffffff); if (addr & 0x02000000) addr |= 0xfc000000; *new_pc = (pc + 8 + addr); return (0); case 0x7: /* ldr pc, [pc, reg, lsl #2] */ addr = fetch_reg(cookie, insn & 0xf); addr = pc + 8 + (addr << 2); error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x1: /* mov pc, reg */ *new_pc = fetch_reg(cookie, insn & 0xf); return (0); case 0x4: case 0x5: /* ldr pc, [reg] */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); /* ldr pc, [reg, #offset] */ if (insn & (1 << 24)) offset = insn & 0xfff; if (insn & 0x00800000) addr += offset; else addr -= offset; error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x8: /* ldmxx reg, {..., pc} */ case 0x9: addr = fetch_reg(cookie, (insn >> 16) & 0xf); nregs = (insn & 0x5555) + ((insn >> 1) & 0x5555); nregs = (nregs & 0x3333) + ((nregs >> 2) & 0x3333); nregs = (nregs + (nregs >> 4)) & 0x0f0f; nregs = (nregs + (nregs >> 8)) & 0x001f; switch ((insn >> 23) & 0x3) { case 0x0: /* ldmda */ addr = addr - 0; break; case 0x1: /* ldmia */ addr = addr + 0 + ((nregs - 1) << 2); break; case 0x2: /* ldmdb */ addr = addr - 4; break; case 0x3: /* ldmib */ addr = addr + 4 + ((nregs - 1) << 2); break; } error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); default: return (EINVAL); } } #ifdef ARM_NEW_PMAP void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #else void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #endif #ifdef EFI #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, struct mem_region *mr, int *mrcnt, uint32_t *memsize) { struct efi_md *map, *p; const char *type; size_t efisz, memory_size; int ndesc, i, j; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; *mrcnt = 0; *memsize = 0; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = roundup2(sizeof(struct efi_map_header), 0x10); map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); memory_size = 0; for (i = 0, j = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012llx %12p %08llx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } j++; if (j >= FDT_MEM_REGIONS) break; mr[j].mr_start = p->md_phys; mr[j].mr_size = p->md_pages * PAGE_SIZE; memory_size += mr[j].mr_size; } *mrcnt = j; *memsize = memory_size; } #endif /* EFI */ #ifdef FDT static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } static void print_kenv(void) { char *cp; debugf("loader passed (static) kenv:\n"); if (loader_envp == NULL) { debugf(" no env, null ptr\n"); return; } debugf(" loader_envp = 0x%08x\n", (uint32_t)loader_envp); for (cp = loader_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (uint32_t)cp, cp); } #ifndef ARM_NEW_PMAP void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; struct pv_addr kernel_l1pt; struct pv_addr dpcpu; vm_offset_t dtbp, freemempos, l2_start, lastaddr; uint32_t memsize, l2size; char *env; void *kmdp; u_int l1pagetable; int i, j, err_devmap, mem_regions_sz; lastaddr = parse_boot_param(abp); arm_physmem_kernaddr = abp->abp_physaddr; memsize = 0; cpuinfo_init(); set_cpufuncs(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* Calculate number of L2 tables needed for mapping vm_page_array */ l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); l2size = (l2size >> L1_S_SHIFT) + 1; /* * Add one table for end of kernel map, one for stacks, msgbuf and * L1 and L2 tables map and one for vectors map. */ l2size += 3; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0, j = 0; i < l2size; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); j = i; } else { kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + L2_TABLE_SIZE_REAL * (i - j); kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + abp->abp_physaddr; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, kstack_pages * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* * Try to map as much as possible of kernel text and data using * 1MB section mapping and for the rest of initial kernel address * space use L2 coarse tables. * * Link L2 tables for mapping remainder of kernel (modulo 1MB) * and kernel structures */ l2_start = lastaddr & ~(L1_S_OFFSET); for (i = 0 ; i < l2size - 1; i++) pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, &kernel_pt_table[i]); pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; /* Map kernel code and data */ pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr, (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map L1 directory and allocated L2 page tables */ pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, kernel_pt_table[0].pv_pa, L2_TABLE_SIZE_REAL * l2size, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map allocated DPCPU, stacks and msgbuf */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, freemempos - dpcpu.pv_va, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Link and map the vector page */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[l2size - 1]); pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); arm_devmap_bootstrap(l1pagetable, NULL); vm_max_kernel_address = platform_lastaddr(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); pmap_pa = kernel_l1pt.pv_pa; setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void *)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); dbg_monitor_init(); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } #else /* !ARM_NEW_PMAP */ void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_paddr_t lastaddr; vm_offset_t dtbp, kernelstack, dpcpu; uint32_t memsize; char *env; void *kmdp; int err_devmap, mem_regions_sz; #ifdef EFI struct efi_map_header *efihdr; #endif /* get last allocated physical address */ arm_physmem_kernaddr = abp->abp_physaddr; lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr; memsize = 0; set_cpufuncs(); cpuinfo_init(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); #ifdef EFI efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) { add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz, &memsize); } else #endif { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); } arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* * Set TEX remapping registers. * Setup kernel page tables and switch to kernel L1 page table. */ pmap_set_tex(); pmap_bootstrap_prepare(lastaddr); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* * Allocate a page for the system page mapped to 0xffff0000 * This page will just contain the system vectors and can be * shared by all processes. */ systempage = pmap_preboot_get_pages(1); /* Map the vector page. */ pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH, 1); if (virtual_end >= ARM_VECTORS_HIGH) virtual_end = ARM_VECTORS_HIGH - 1; /* Allocate dynamic per-cpu area. */ dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate stacks for all modes */ irqstack = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU); abtstack = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU); undstack = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU ); kernelstack = pmap_preboot_get_vpages(kstack_pages * MAXCPU); /* Allocate message buffer. */ msgbufp = (void *)pmap_preboot_get_vpages( round_page(msgbufsize) / PAGE_SIZE); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ set_stackptrs(0); mutex_init(); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); arm_devmap_bootstrap(0, NULL); vm_max_kernel_address = platform_lastaddr(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); debugf(" lastaddr1: 0x%08x\n", lastaddr); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ /* Set stack for exception handlers */ undefined_init(); init_proc0(kernelstack); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); enable_interrupts(PSR_A); pmap_bootstrap(0); /* Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); /* Init message buffer. */ msgbufinit(msgbufp, msgbufsize); dbg_monitor_init(); kdb_init(); return ((void *)STACKALIGN(thread0.td_pcb)); } #endif /* !ARM_NEW_PMAP */ #endif /* FDT */ uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } Index: head/sys/arm64/arm64/machdep.c =================================================================== --- head/sys/arm64/arm64/machdep.c (revision 294929) +++ head/sys/arm64/arm64/machdep.c (revision 294930) @@ -1,984 +1,984 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_platform.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef FDT #include #endif struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_elr = regs->elr; frame->tf_spsr = regs->spsr; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, pcb); memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpcr; regs->fp_sr = pcb->pcb_fpsr; } else #endif memset(regs->fp_q, 0, sizeof(regs->fp_q)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpcr = regs->fp_cr; pcb->pcb_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ARM64TODO: ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs)); curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("ARM64TODO: cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; critical_exit(); daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(daif); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; uint32_t spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); spsr = uc.uc_mcontext.mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0) return (EINVAL); set_mcontext(td, &uc.uc_mcontext); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; for (i = 0; i < PCB_LR; i++) pcb->pcb_x[i] = tf->tf_x[i]; pcb->pcb_x[PCB_LR] = tf->tf_lr; pcb->pcb_pc = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int code, onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct sigframe *)(td->td_sigstk.ss_sp + + fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } typedef struct { uint32_t type; uint64_t phys_start; uint64_t virt_start; uint64_t num_pages; uint64_t attr; } EFI_MEMORY_DESCRIPTOR; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, u_int *physmap_idxp) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type <= EFI_MD_TYPE_PALCODE) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idxp)) break; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { int dcache_line_shift, icache_line_shift; uint32_t ctr_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); /* Read the log2 words in each D cache line */ dcache_line_shift = CTR_DLINE_SIZE(ctr_el0); /* Get the D cache line size */ dcache_line_size = sizeof(int) << dcache_line_shift; /* And the same for the I cache */ icache_line_shift = CTR_ILINE_SIZE(ctr_el0); icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); } void initarm(struct arm64_bootparams *abp) { struct efi_map_header *efihdr; struct pcpu *pcpup; vm_offset_t lastaddr; caddr_t kmdp; vm_paddr_t mem_len; int i; /* Set the module data location */ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); #ifdef FDT try_load_dtb(kmdp); #endif /* Find the address to start allocating from */ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); /* Load the physical memory ranges */ physmap_idx = 0; efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); add_efi_map_entries(efihdr, physmap, &physmap_idx); /* Print the memory map */ mem_len = 0; for (i = 0; i < physmap_idx; i += 2) { dump_avail[i] = physmap[i]; dump_avail[i + 1] = physmap[i + 1]; mem_len += physmap[i + 1] - physmap[i]; } dump_avail[i] = 0; dump_avail[i + 1] = 0; /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ pmap_bootstrap(abp->kern_l1pt, KERNBASE - abp->kern_delta, lastaddr - KERNBASE); arm_devmap_bootstrap(0, NULL); cninit(); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_monitor_init(); kdb_init(); early_boot = 0; } uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, struct timecounter *); uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { return (arm_cpu_fill_vdso_timehands != NULL ? arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); } #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("Physical address reg: 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif Index: head/sys/arm64/arm64/vm_machdep.c =================================================================== --- head/sys/arm64/arm64/vm_machdep.c (revision 294929) +++ head/sys/arm64/arm64/vm_machdep.c (revision 294930) @@ -1,260 +1,260 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; if ((flags & RFPROC) == 0) return; if (td1 == curthread) { /* * Save the tpidr_el0 and the vfp state, these normally happen * in cpu_switch, but if userland changes these then forks * this may not have happened. */ td1->td_pcb->pcb_tpidr_el0 = READ_SPECIALREG(tpidr_el0); #ifdef VFP if ((td1->td_pcb->pcb_fpflags & PCB_FP_STARTED) != 0) vfp_save_state(td1, td1->td_pcb); #endif } pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); td2->td_pcb->pcb_l1addr = vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); tf->tf_x[0] = 0; tf->tf_x[1] = 0; tf->tf_spsr = 0; td2->td_frame = tf; /* Set the return value registers for fork() */ td2->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td2->td_pcb->pcb_x[9] = (uintptr_t)td2; td2->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; td2->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; td2->td_md.md_saved_daif = 0; } void cpu_reset(void) { printf("cpu_reset"); while(1) __asm volatile("wfi" ::: "memory"); } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_x[0] = td->td_retval[0]; frame->tf_x[1] = td->td_retval[1]; frame->tf_spsr &= ~PSR_C; /* carry bit */ break; case ERESTART: frame->tf_elr -= 4; break; case EJUSTRETURN: break; default: frame->tf_spsr |= PSR_C; /* carry bit */ frame->tf_x[0] = error; break; } } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Put enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcals that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_x[8] = (uintptr_t)fork_return; td->td_pcb->pcb_x[9] = (uintptr_t)td; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_vfpcpu = UINT_MAX; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = 0; } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; - tf->tf_sp = STACKALIGN(stack->ss_sp + stack->ss_size); + tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); tf->tf_elr = (register_t)entry; tf->tf_x[0] = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; pcb->pcb_tpidr_el0 = (register_t)tls_base; return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)STACKALIGN( td->td_pcb - 1); } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_x[8] = (uintptr_t)func; td->td_pcb->pcb_x[9] = (uintptr_t)arg; td->td_pcb->pcb_x[PCB_LR] = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; td->td_pcb->pcb_vfpcpu = UINT_MAX; } void cpu_exit(struct thread *td) { } void swi_vm(void *v) { /* Nothing to do here - busdma bounce buffers are not implemented. */ } Index: head/sys/i386/i386/machdep.c =================================================================== --- head/sys/i386/i386/machdep.c (revision 294929) +++ head/sys/i386/i386/machdep.c (revision 294930) @@ -1,3402 +1,3402 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_atpic.h" #include "opt_compat.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_platform.h" #include "opt_xbox.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef DDB #ifndef KDB #error KDB must be enabled in order for DDB to work! #endif #include #include #endif #ifdef PC98 #include #else #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef PERFMON #include #endif #ifdef SMP #include #endif #ifdef FDT #include #endif #ifdef DEV_APIC #include #endif #ifdef DEV_ISA #include #endif #ifdef XBOX #include int arch_i386_is_xbox = 0; uint32_t arch_i386_xbox_memsize = 0; #endif /* Sanity check for __curthread() */ CTASSERT(offsetof(struct pcpu, pc_curthread) == 0); extern register_t init386(int first); extern void dblfault_handler(void); #if !defined(CPU_DISABLE_SSE) && defined(I686_CPU) #define CPU_ENABLE_SSE #endif static void cpu_startup(void *); static void fpstate_drop(struct thread *td); static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len); static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* Intel ICH registers */ #define ICH_PMBASE 0x400 #define ICH_SMI_EN ICH_PMBASE + 0x30 int _udatasel, _ucodesel; u_int basemem; #ifdef PC98 int need_pre_dma_flush; /* If 1, use wbinvd befor DMA transfer. */ int need_post_dma_flush; /* If 1, use invd after DMA transfer. */ static int ispc98 = 1; SYSCTL_INT(_machdep, OID_AUTO, ispc98, CTLFLAG_RD, &ispc98, 0, ""); #endif int cold = 1; #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *, sigset_t *mask); #endif long Maxmem = 0; long realmem = 0; #ifdef PAE FEATURE(pae, "Physical Address Extensions"); #endif /* * The number of PHYSMAP entries must be one less than the number of * PHYSSEG entries because the PHYSMAP entry that spans the largest * physical address that is accessible by ISA DMA is split into two * PHYSSEG entries. */ #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t phys_avail[PHYSMAP_SIZE + 2]; vm_paddr_t dump_avail[PHYSMAP_SIZE + 2]; /* must be 2 less so 0 0 can signal end of chunks */ #define PHYS_AVAIL_ARRAY_END ((sizeof(phys_avail) / sizeof(phys_avail[0])) - 2) #define DUMP_AVAIL_ARRAY_END ((sizeof(dump_avail) / sizeof(dump_avail[0])) - 2) struct kva_md_info kmi; static struct trapframe proc0_tf; struct pcpu __pcpu[MAXCPU]; struct mtx icu_lock; struct mem_range_softc mem_range_softc; /* Default init_ops implementation. */ struct init_ops init_ops = { .early_clock_source_init = i8254_init, .early_delay = i8254_delay, #ifdef DEV_APIC .msi_init = msi_init, #endif }; static void cpu_startup(dummy) void *dummy; { uintmax_t memsize; char *sysenv; #ifndef PC98 /* * On MacBooks, we need to disallow the legacy USB circuit to * generate an SMI# because this can cause several problems, * namely: incorrect CPU frequency detection and failure to * start the APs. * We do this by disabling a bit in the SMI_EN (SMI Control and * Enable register) of the Intel ICH LPC Interface Bridge. */ sysenv = kern_getenv("smbios.system.product"); if (sysenv != NULL) { if (strncmp(sysenv, "MacBook1,1", 10) == 0 || strncmp(sysenv, "MacBook3,1", 10) == 0 || strncmp(sysenv, "MacBook4,1", 10) == 0 || strncmp(sysenv, "MacBookPro1,1", 13) == 0 || strncmp(sysenv, "MacBookPro1,2", 13) == 0 || strncmp(sysenv, "MacBookPro3,1", 13) == 0 || strncmp(sysenv, "MacBookPro4,1", 13) == 0 || strncmp(sysenv, "Macmini1,1", 10) == 0) { if (bootverbose) printf("Disabling LEGACY_USB_EN bit on " "Intel ICH.\n"); outl(ICH_SMI_EN, inl(ICH_SMI_EN) & ~0x8); } freeenv(sysenv); } #endif /* !PC98 */ /* * Good {morning,afternoon,evening,night}. */ startrtclock(); printcpuinfo(); panicifcpuunsupported(); #ifdef PERFMON perfmon_init(); #endif /* * Display physical memory if SMBIOS reports reasonable amount. */ memsize = 0; sysenv = kern_getenv("smbios.memory.enabled"); if (sysenv != NULL) { memsize = (uintmax_t)strtoul(sysenv, (char **)NULL, 10) << 10; freeenv(sysenv); } if (memsize < ptoa((uintmax_t)vm_cnt.v_free_count)) memsize = ptoa((uintmax_t)Maxmem); printf("real memory = %ju (%ju MB)\n", memsize, memsize >> 20); realmem = atop(memsize); /* * Display any holes after the first chunk of extended memory. */ if (bootverbose) { int indx; printf("Physical memory chunk(s):\n"); for (indx = 0; phys_avail[indx + 1] != 0; indx += 2) { vm_paddr_t size; size = phys_avail[indx + 1] - phys_avail[indx]; printf( "0x%016jx - 0x%016jx, %ju bytes (%ju pages)\n", (uintmax_t)phys_avail[indx], (uintmax_t)phys_avail[indx + 1] - 1, (uintmax_t)size, (uintmax_t)size / PAGE_SIZE); } } vm_ksubmap_init(&kmi); printf("avail memory = %ju (%ju MB)\n", ptoa((uintmax_t)vm_cnt.v_free_count), ptoa((uintmax_t)vm_cnt.v_free_count) / 1048576); /* * Set up buffers, so they can be used to read disk labels. */ bufinit(); vm_pager_bufferinit(); cpu_setregs(); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by call * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ #ifdef COMPAT_43 static void osendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct osigframe sf, *fp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct osigframe *)(td->td_sigstk.ss_sp + + fp = (struct osigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct osigframe)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct osigframe *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_scp = (register_t)&fp->sf_siginfo.si_sc; bzero(&sf.sf_siginfo, sizeof(sf.sf_siginfo)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_arg2 = (register_t)&fp->sf_siginfo; sf.sf_siginfo.si_signo = sig; sf.sf_siginfo.si_code = ksi->ksi_code; sf.sf_ahu.sf_action = (__osiginfohandler_t *)catcher; sf.sf_addr = 0; } else { /* Old FreeBSD-style arguments. */ sf.sf_arg2 = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* Save most if not all of trap frame. */ sf.sf_siginfo.si_sc.sc_eax = regs->tf_eax; sf.sf_siginfo.si_sc.sc_ebx = regs->tf_ebx; sf.sf_siginfo.si_sc.sc_ecx = regs->tf_ecx; sf.sf_siginfo.si_sc.sc_edx = regs->tf_edx; sf.sf_siginfo.si_sc.sc_esi = regs->tf_esi; sf.sf_siginfo.si_sc.sc_edi = regs->tf_edi; sf.sf_siginfo.si_sc.sc_cs = regs->tf_cs; sf.sf_siginfo.si_sc.sc_ds = regs->tf_ds; sf.sf_siginfo.si_sc.sc_ss = regs->tf_ss; sf.sf_siginfo.si_sc.sc_es = regs->tf_es; sf.sf_siginfo.si_sc.sc_fs = regs->tf_fs; sf.sf_siginfo.si_sc.sc_gs = rgs(); sf.sf_siginfo.si_sc.sc_isp = regs->tf_isp; /* Build the signal context to be used by osigreturn(). */ sf.sf_siginfo.si_sc.sc_onstack = (oonstack) ? 1 : 0; SIG2OSIG(*mask, sf.sf_siginfo.si_sc.sc_mask); sf.sf_siginfo.si_sc.sc_sp = regs->tf_esp; sf.sf_siginfo.si_sc.sc_fp = regs->tf_ebp; sf.sf_siginfo.si_sc.sc_pc = regs->tf_eip; sf.sf_siginfo.si_sc.sc_ps = regs->tf_eflags; sf.sf_siginfo.si_sc.sc_trapno = regs->tf_trapno; sf.sf_siginfo.si_sc.sc_err = regs->tf_err; /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { /* XXX confusing names: `tf' isn't a trapframe; `regs' is. */ struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_siginfo.si_sc.sc_gs = tf->tf_vm86_gs; sf.sf_siginfo.si_sc.sc_fs = tf->tf_vm86_fs; sf.sf_siginfo.si_sc.sc_es = tf->tf_vm86_es; sf.sf_siginfo.si_sc.sc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_siginfo.si_sc.sc_ps = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* See sendsig() for comments. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, fp, sizeof(*fp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)fp; if (p->p_sysent->sv_sigcode_base != 0) { regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szosigcode; } else { /* a.out sysentvec does not use shared page */ regs->tf_eip = p->p_sysent->sv_psstrings - szosigcode; } regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; load_gs(_udatasel); regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 static void freebsd4_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe4 sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; struct trapframe *regs; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); bzero(sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); bzero(sf.sf_uc.uc_mcontext.__spare__, sizeof(sf.sf_uc.uc_mcontext.__spare__)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe4 *)(td->td_sigstk.ss_sp + + sfp = (struct sigframe4 *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe4)); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sfp = (struct sigframe4 *)regs->tf_esp - 1; /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = ksi->ksi_addr; } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base + szsigcode - szfreebsd4_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #endif /* COMPAT_FREEBSD4 */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe sf, *sfp; struct proc *p; struct thread *td; struct sigacts *psp; char *sp; struct trapframe *regs; struct segment_descriptor *sdp; char *xfpusave; size_t xfpusave_len; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); #ifdef COMPAT_FREEBSD4 if (SIGISMEMBER(psp->ps_freebsd4, sig)) { freebsd4_sendsig(catcher, ksi, mask); return; } #endif #ifdef COMPAT_43 if (SIGISMEMBER(psp->ps_osigset, sig)) { osendsig(catcher, ksi, mask); return; } #endif regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef CPU_ENABLE_SSE if (cpu_max_ext_state_size > sizeof(union savefpu) && use_xsave) { xfpusave_len = cpu_max_ext_state_size - sizeof(union savefpu); xfpusave = __builtin_alloca(xfpusave_len); } else { #else { #endif xfpusave_len = 0; xfpusave = NULL; } /* Save user context. */ bzero(&sf, sizeof(sf)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_gs = rgs(); bcopy(regs, &sf.sf_uc.uc_mcontext.mc_fs, sizeof(*regs)); sf.sf_uc.uc_mcontext.mc_len = sizeof(sf.sf_uc.uc_mcontext); /* magic */ get_fpcontext(td, &sf.sf_uc.uc_mcontext, xfpusave, xfpusave_len); fpstate_drop(td); /* * Unconditionally fill the fsbase and gsbase into the mcontext. */ sdp = &td->td_pcb->pcb_fsd; sf.sf_uc.uc_mcontext.mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; sf.sf_uc.uc_mcontext.mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; bzero(sf.sf_uc.uc_mcontext.mc_spare2, sizeof(sf.sf_uc.uc_mcontext.mc_spare2)); bzero(sf.sf_uc.__spare__, sizeof(sf.sf_uc.__spare__)); /* Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sp = td->td_sigstk.ss_sp + td->td_sigstk.ss_size; + sp = (char *)td->td_sigstk.ss_sp + td->td_sigstk.ss_size; #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else sp = (char *)regs->tf_esp - 128; if (xfpusave != NULL) { sp -= xfpusave_len; sp = (char *)((unsigned int)sp & ~0x3F); sf.sf_uc.uc_mcontext.mc_xfpustate = (register_t)sp; } sp -= sizeof(struct sigframe); /* Align to 16 bytes. */ sfp = (struct sigframe *)((unsigned int)sp & ~0xF); /* Build the argument list for the signal handler. */ sf.sf_signum = sig; sf.sf_ucontext = (register_t)&sfp->sf_uc; bzero(&sf.sf_si, sizeof(sf.sf_si)); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ sf.sf_siginfo = (register_t)&sfp->sf_si; sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; /* Fill in POSIX parts */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ sf.sf_siginfo = ksi->ksi_code; sf.sf_addr = (register_t)ksi->ksi_addr; sf.sf_ahu.sf_handler = catcher; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * If we're a vm86 process, we want to save the segment registers. * We also change eflags to be our emulated eflags, not the actual * eflags. */ if (regs->tf_eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86 = &td->td_pcb->pcb_ext->ext_vm86; sf.sf_uc.uc_mcontext.mc_gs = tf->tf_vm86_gs; sf.sf_uc.uc_mcontext.mc_fs = tf->tf_vm86_fs; sf.sf_uc.uc_mcontext.mc_es = tf->tf_vm86_es; sf.sf_uc.uc_mcontext.mc_ds = tf->tf_vm86_ds; if (vm86->vm86_has_vme == 0) sf.sf_uc.uc_mcontext.mc_eflags = (tf->tf_eflags & ~(PSL_VIF | PSL_VIP)) | (vm86->vm86_eflags & (PSL_VIF | PSL_VIP)); /* * Clear PSL_NT to inhibit T_TSSFLT faults on return from * syscalls made by the signal handler. This just avoids * wasting time for our lazy fixup of such faults. PSL_NT * does nothing in vm86 mode, but vm86 programs can set it * almost legitimately in probes for old cpu types. */ tf->tf_eflags &= ~(PSL_VM | PSL_NT | PSL_VIF | PSL_VIP); } /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(*sfp)) != 0 || (xfpusave != NULL && copyout(xfpusave, (void *)sf.sf_uc.uc_mcontext.mc_xfpustate, xfpusave_len) != 0)) { #ifdef DEBUG printf("process %ld has trashed its stack\n", (long)p->p_pid); #endif PROC_LOCK(p); sigexit(td, SIGILL); } regs->tf_esp = (int)sfp; regs->tf_eip = p->p_sysent->sv_sigcode_base; if (regs->tf_eip == 0) regs->tf_eip = p->p_sysent->sv_psstrings - szsigcode; regs->tf_eflags &= ~(PSL_T | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * state to gain improper privileges. * * MPSAFE */ #ifdef COMPAT_43 int osigreturn(td, uap) struct thread *td; struct osigreturn_args /* { struct osigcontext *sigcntxp; } */ *uap; { struct osigcontext sc; struct trapframe *regs; struct osigcontext *scp; int eflags, error; ksiginfo_t ksi; regs = td->td_frame; error = copyin(uap->sigcntxp, &sc, sizeof(sc)); if (error != 0) return (error); scp = ≻ eflags = scp->sc_ps; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } tf->tf_vm86_ds = scp->sc_ds; tf->tf_vm86_es = scp->sc_es; tf->tf_vm86_fs = scp->sc_fs; tf->tf_vm86_gs = scp->sc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ if (!CS_SECURE(scp->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } regs->tf_ds = scp->sc_ds; regs->tf_es = scp->sc_es; regs->tf_fs = scp->sc_fs; } /* Restore remaining registers. */ regs->tf_eax = scp->sc_eax; regs->tf_ebx = scp->sc_ebx; regs->tf_ecx = scp->sc_ecx; regs->tf_edx = scp->sc_edx; regs->tf_esi = scp->sc_esi; regs->tf_edi = scp->sc_edi; regs->tf_cs = scp->sc_cs; regs->tf_ss = scp->sc_ss; regs->tf_isp = scp->sc_isp; regs->tf_ebp = scp->sc_fp; regs->tf_esp = scp->sc_sp; regs->tf_eip = scp->sc_pc; regs->tf_eflags = eflags; #if defined(COMPAT_43) if (scp->sc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, (sigset_t *)&scp->sc_mask, NULL, SIGPROCMASK_OLD); return (EJUSTRETURN); } #endif /* COMPAT_43 */ #ifdef COMPAT_FREEBSD4 /* * MPSAFE */ int freebsd4_sigreturn(td, uap) struct thread *td; struct freebsd4_sigreturn_args /* { const ucontext4 *sigcntxp; } */ *uap; { struct ucontext4 uc; struct trapframe *regs; struct ucontext4 *ucp; int cs, eflags, error; ksiginfo_t ksi; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): freebsd4_sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): freebsd4_sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } #endif /* COMPAT_FREEBSD4 */ /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; struct proc *p; struct trapframe *regs; ucontext_t *ucp; char *xfpustate; size_t xfpustate_len; int cs, eflags, error, ret; ksiginfo_t ksi; p = td->td_proc; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); ucp = &uc; if ((ucp->uc_mcontext.mc_flags & ~_MC_FLAG_MASK) != 0) { uprintf("pid %d (%s): sigreturn mc_flags %x\n", p->p_pid, td->td_name, ucp->uc_mcontext.mc_flags); return (EINVAL); } regs = td->td_frame; eflags = ucp->uc_mcontext.mc_eflags; if (eflags & PSL_VM) { struct trapframe_vm86 *tf = (struct trapframe_vm86 *)regs; struct vm86_kernel *vm86; /* * if pcb_ext == 0 or vm86_inited == 0, the user hasn't * set up the vm86 area, and we can't enter vm86 mode. */ if (td->td_pcb->pcb_ext == 0) return (EINVAL); vm86 = &td->td_pcb->pcb_ext->ext_vm86; if (vm86->vm86_inited == 0) return (EINVAL); /* Go back to user mode if both flags are set. */ if ((eflags & PSL_VIP) && (eflags & PSL_VIF)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); } if (vm86->vm86_has_vme) { eflags = (tf->tf_eflags & ~VME_USERCHANGE) | (eflags & VME_USERCHANGE) | PSL_VM; } else { vm86->vm86_eflags = eflags; /* save VIF, VIP */ eflags = (tf->tf_eflags & ~VM_USERCHANGE) | (eflags & VM_USERCHANGE) | PSL_VM; } bcopy(&ucp->uc_mcontext.mc_fs, tf, sizeof(struct trapframe)); tf->tf_eflags = eflags; tf->tf_vm86_ds = tf->tf_ds; tf->tf_vm86_es = tf->tf_es; tf->tf_vm86_fs = tf->tf_fs; tf->tf_vm86_gs = ucp->uc_mcontext.mc_gs; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; } else { /* * Don't allow users to change privileged or reserved flags. */ if (!EFL_SECURE(eflags, regs->tf_eflags)) { uprintf("pid %d (%s): sigreturn eflags = 0x%x\n", td->td_proc->p_pid, td->td_name, eflags); return (EINVAL); } /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ cs = ucp->uc_mcontext.mc_cs; if (!CS_SECURE(cs)) { uprintf("pid %d (%s): sigreturn cs = 0x%x\n", td->td_proc->p_pid, td->td_name, cs); ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } if ((uc.uc_mcontext.mc_flags & _MC_HASFPXSTATE) != 0) { xfpustate_len = uc.uc_mcontext.mc_xfpustate_len; if (xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) { uprintf( "pid %d (%s): sigreturn xfpusave_len = 0x%zx\n", p->p_pid, td->td_name, xfpustate_len); return (EINVAL); } xfpustate = __builtin_alloca(xfpustate_len); error = copyin((const void *)uc.uc_mcontext.mc_xfpustate, xfpustate, xfpustate_len); if (error != 0) { uprintf( "pid %d (%s): sigreturn copying xfpustate failed\n", p->p_pid, td->td_name); return (error); } } else { xfpustate = NULL; xfpustate_len = 0; } ret = set_fpcontext(td, &ucp->uc_mcontext, xfpustate, xfpustate_len); if (ret != 0) return (ret); bcopy(&ucp->uc_mcontext.mc_fs, regs, sizeof(*regs)); } #if defined(COMPAT_43) if (ucp->uc_mcontext.mc_onstack & 1) td->td_sigstk.ss_flags |= SS_ONSTACK; else td->td_sigstk.ss_flags &= ~SS_ONSTACK; #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Reset registers to default values on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *regs = td->td_frame; struct pcb *pcb = td->td_pcb; /* Reset pc->pcb_gs and %gs before possibly invalidating it. */ pcb->pcb_gs = _udatasel; load_gs(_udatasel); mtx_lock_spin(&dt_lock); if (td->td_proc->p_md.md_ldt) user_ldt_free(td); else mtx_unlock_spin(&dt_lock); bzero((char *)regs, sizeof(struct trapframe)); regs->tf_eip = imgp->entry_addr; regs->tf_esp = stack; regs->tf_eflags = PSL_USER | (regs->tf_eflags & PSL_T); regs->tf_ss = _udatasel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_cs = _ucodesel; /* PS_STRINGS value for BSD/OS binaries. It is 0 for non-BSD/OS. */ regs->tf_ebx = imgp->ps_strings; /* * Reset the hardware debug registers if they were in use. * They won't have any meaning for the newly exec'd process. */ if (pcb->pcb_flags & PCB_DBREGS) { pcb->pcb_dr0 = 0; pcb->pcb_dr1 = 0; pcb->pcb_dr2 = 0; pcb->pcb_dr3 = 0; pcb->pcb_dr6 = 0; pcb->pcb_dr7 = 0; if (pcb == curpcb) { /* * Clear the debug registers on the running * CPU, otherwise they will end up affecting * the next process we switch to. */ reset_dbregs(); } pcb->pcb_flags &= ~PCB_DBREGS; } pcb->pcb_initial_npxcw = __INITIAL_NPXCW__; /* * Drop the FP state if we hold it, so that the process gets a * clean FP state if it uses the FPU again. */ fpstate_drop(td); /* * XXX - Linux emulator * Make sure sure edx is 0x0 on entry. Linux binaries depend * on it. */ td->td_retval[1] = 0; } void cpu_setregs(void) { unsigned int cr0; cr0 = rcr0(); /* * CR0_MP, CR0_NE and CR0_TS are set for NPX (FPU) support: * * Prepare to trap all ESC (i.e., NPX) instructions and all WAIT * instructions. We must set the CR0_MP bit and use the CR0_TS * bit to control the trap, because setting the CR0_EM bit does * not cause WAIT instructions to trap. It's important to trap * WAIT instructions - otherwise the "wait" variants of no-wait * control instructions would degenerate to the "no-wait" variants * after FP context switches but work correctly otherwise. It's * particularly important to trap WAITs when there is no NPX - * otherwise the "wait" variants would always degenerate. * * Try setting CR0_NE to get correct error reporting on 486DX's. * Setting it should fail or do nothing on lesser processors. */ cr0 |= CR0_MP | CR0_NE | CR0_TS | CR0_WP | CR0_AM; load_cr0(cr0); load_gs(_udatasel); } u_long bootdev; /* not a struct cdev *- encoding is different */ SYSCTL_ULONG(_machdep, OID_AUTO, guessed_bootdev, CTLFLAG_RD, &bootdev, 0, "Maybe the Boot device (not in struct cdev *format)"); static char bootmethod[16] = "BIOS"; SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0, "System firmware boot method"); /* * Initialize 386 and configure to run kernel */ /* * Initialize segments & interrupt table */ int _default_ldt; union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */ union descriptor ldt[NLDT]; /* local descriptor table */ static struct gate_descriptor idt0[NIDT]; struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */ struct region_descriptor r_gdt, r_idt; /* table descriptors */ struct mtx dt_lock; /* lock for GDT and LDT */ static struct i386tss dblfault_tss; static char dblfault_stack[PAGE_SIZE]; extern vm_offset_t proc0kstack; /* * software prototypes -- in more palatable form. * * GCODE_SEL through GUDATA_SEL must be in this order for syscall/sysret * GUFS_SEL and GUGS_SEL must be in this order (swtch.s knows it) */ struct soft_segment_descriptor gdt_segs[] = { /* GNULL_SEL 0 Null Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = SEL_KPL, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPRIV_SEL 1 SMP Per-Processor Private Data Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUFS_SEL 2 %fs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUGS_SEL 3 %gs Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GCODE_SEL 4 Code Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GDATA_SEL 5 Data Descriptor for kernel */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUCODE_SEL 6 Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GUDATA_SEL 7 Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSLOWMEM_SEL 8 BIOS access to realmode segment 0x40, must be #8 in GDT */ { .ssd_base = 0x400, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_KPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GPROC0_SEL 9 Proc 0 Tss Descriptor */ { .ssd_base = 0x0, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GLDT_SEL 10 LDT Descriptor */ { .ssd_base = (int) ldt, .ssd_limit = sizeof(ldt)-1, .ssd_type = SDT_SYSLDT, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GUSERLDT_SEL 11 User LDT Descriptor per process */ { .ssd_base = (int) ldt, .ssd_limit = (512 * sizeof(union descriptor)-1), .ssd_type = SDT_SYSLDT, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GPANIC_SEL 12 Panic Tss Descriptor */ { .ssd_base = (int) &dblfault_tss, .ssd_limit = sizeof(struct i386tss)-1, .ssd_type = SDT_SYS386TSS, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* GBIOSCODE32_SEL 13 BIOS 32-bit interface (32bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSCODE16_SEL 14 BIOS 32-bit interface (16bit Code) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSDATA_SEL 15 BIOS 32-bit interface (Data) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* GBIOSUTIL_SEL 16 BIOS 16-bit interface (Utility) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GBIOSARGS_SEL 17 BIOS 16-bit interface (Arguments) */ { .ssd_base = 0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = 0, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 1 }, /* GNDIS_SEL 18 NDIS Descriptor */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, }; static struct soft_segment_descriptor ldt_segs[] = { /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Code Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMERA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, /* Null Descriptor - overwritten by call gate */ { .ssd_base = 0x0, .ssd_limit = 0x0, .ssd_type = 0, .ssd_dpl = 0, .ssd_p = 0, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 0, .ssd_gran = 0 }, /* Data Descriptor for user */ { .ssd_base = 0x0, .ssd_limit = 0xfffff, .ssd_type = SDT_MEMRWA, .ssd_dpl = SEL_UPL, .ssd_p = 1, .ssd_xx = 0, .ssd_xx1 = 0, .ssd_def32 = 1, .ssd_gran = 1 }, }; void setidt(idx, func, typ, dpl, selec) int idx; inthand_t *func; int typ; int dpl; int selec; { struct gate_descriptor *ip; ip = idt + idx; ip->gd_looffset = (int)func; ip->gd_selector = selec; ip->gd_stkcpy = 0; ip->gd_xx = 0; ip->gd_type = typ; ip->gd_dpl = dpl; ip->gd_p = 1; ip->gd_hioffset = ((int)func)>>16 ; } extern inthand_t IDTVEC(div), IDTVEC(dbg), IDTVEC(nmi), IDTVEC(bpt), IDTVEC(ofl), IDTVEC(bnd), IDTVEC(ill), IDTVEC(dna), IDTVEC(fpusegm), IDTVEC(tss), IDTVEC(missing), IDTVEC(stk), IDTVEC(prot), IDTVEC(page), IDTVEC(mchk), IDTVEC(rsvd), IDTVEC(fpu), IDTVEC(align), IDTVEC(xmm), #ifdef KDTRACE_HOOKS IDTVEC(dtrace_ret), #endif #ifdef XENHVM IDTVEC(xen_intr_upcall), #endif IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall); #ifdef DDB /* * Display the index and function name of any IDT entries that don't use * the default 'rsvd' entry point. */ DB_SHOW_COMMAND(idt, db_show_idt) { struct gate_descriptor *ip; int idx; uintptr_t func; ip = idt; for (idx = 0; idx < NIDT && !db_pager_quit; idx++) { func = (ip->gd_hioffset << 16 | ip->gd_looffset); if (func != (uintptr_t)&IDTVEC(rsvd)) { db_printf("%3d\t", idx); db_printsym(func, DB_STGY_PROC); db_printf("\n"); } ip++; } } /* Show privileged registers. */ DB_SHOW_COMMAND(sysregs, db_show_sysregs) { uint64_t idtr, gdtr; idtr = ridt(); db_printf("idtr\t0x%08x/%04x\n", (u_int)(idtr >> 16), (u_int)idtr & 0xffff); gdtr = rgdt(); db_printf("gdtr\t0x%08x/%04x\n", (u_int)(gdtr >> 16), (u_int)gdtr & 0xffff); db_printf("ldtr\t0x%04x\n", rldt()); db_printf("tr\t0x%04x\n", rtr()); db_printf("cr0\t0x%08x\n", rcr0()); db_printf("cr2\t0x%08x\n", rcr2()); db_printf("cr3\t0x%08x\n", rcr3()); db_printf("cr4\t0x%08x\n", rcr4()); if (rcr4() & CR4_XSAVE) db_printf("xcr0\t0x%016llx\n", rxcr(0)); if (amd_feature & (AMDID_NX | AMDID_LM)) db_printf("EFER\t0x%016llx\n", rdmsr(MSR_EFER)); if (cpu_feature2 & (CPUID2_VMX | CPUID2_SMX)) db_printf("FEATURES_CTL\t0x%016llx\n", rdmsr(MSR_IA32_FEATURE_CONTROL)); if ((cpu_vendor_id == CPU_VENDOR_INTEL || cpu_vendor_id == CPU_VENDOR_AMD) && CPUID_TO_FAMILY(cpu_id) >= 6) db_printf("DEBUG_CTL\t0x%016llx\n", rdmsr(MSR_DEBUGCTLMSR)); if (cpu_feature & CPUID_PAT) db_printf("PAT\t0x%016llx\n", rdmsr(MSR_PAT)); } DB_SHOW_COMMAND(dbregs, db_show_dbregs) { db_printf("dr0\t0x%08x\n", rdr0()); db_printf("dr1\t0x%08x\n", rdr1()); db_printf("dr2\t0x%08x\n", rdr2()); db_printf("dr3\t0x%08x\n", rdr3()); db_printf("dr6\t0x%08x\n", rdr6()); db_printf("dr7\t0x%08x\n", rdr7()); } #endif void sdtossd(sd, ssd) struct segment_descriptor *sd; struct soft_segment_descriptor *ssd; { ssd->ssd_base = (sd->sd_hibase << 24) | sd->sd_lobase; ssd->ssd_limit = (sd->sd_hilimit << 16) | sd->sd_lolimit; ssd->ssd_type = sd->sd_type; ssd->ssd_dpl = sd->sd_dpl; ssd->ssd_p = sd->sd_p; ssd->ssd_def32 = sd->sd_def32; ssd->ssd_gran = sd->sd_gran; } #if !defined(PC98) static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, int *physmap_idxp) { int i, insert_idx, physmap_idx; physmap_idx = *physmap_idxp; if (length == 0) return (1); #ifndef PAE if (base > 0xffffffff) { printf("%uK of memory above 4GB ignored\n", (u_int)(length / 1024)); return (1); } #endif /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = physmap_idx + 2; for (i = 0; i <= physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } physmap_idx += 2; *physmap_idxp = physmap_idx; if (physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } static int add_smap_entry(struct bios_smap *smap, vm_paddr_t *physmap, int *physmap_idxp) { if (boothowto & RB_VERBOSE) printf("SMAP type=%02x base=%016llx len=%016llx\n", smap->type, smap->base, smap->length); if (smap->type != SMAP_TYPE_MEMORY) return (1); return (add_physmap_entry(smap->base, smap->length, physmap, physmap_idxp)); } static void add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap, int *physmap_idxp) { struct bios_smap *smap, *smapend; u_int32_t smapsize; /* * Memory map from INT 15:E820. * * subr_module.c says: * "Consumer may safely assume that size value precedes data." * ie: an int32_t immediately precedes SMAP. */ smapsize = *((u_int32_t *)smapbase - 1); smapend = (struct bios_smap *)((uintptr_t)smapbase + smapsize); for (smap = smapbase; smap < smapend; smap++) if (!add_smap_entry(smap, physmap, physmap_idxp)) break; } #endif /* !PC98 */ static void basemem_setup(void) { vm_paddr_t pa; pt_entry_t *pte; int i; if (basemem > 640) { printf("Preposterous BIOS basemem of %uK, truncating to 640K\n", basemem); basemem = 640; } /* * XXX if biosbasemem is now < 640, there is a `hole' * between the end of base memory and the start of * ISA memory. The hole may be empty or it may * contain BIOS code or data. Map it read/write so * that the BIOS can write to it. (Memory from 0 to * the physical end of the kernel is mapped read-only * to begin with and then parts of it are remapped. * The parts that aren't remapped form holes that * remain read-only and are unused by the kernel. * The base memory area is below the physical end of * the kernel and right now forms a read-only hole. * The part of it from PAGE_SIZE to * (trunc_page(biosbasemem * 1024) - 1) will be * remapped and used by the kernel later.) * * This code is similar to the code used in * pmap_mapdev, but since no memory needs to be * allocated we simply change the mapping. */ for (pa = trunc_page(basemem * 1024); pa < ISA_HOLE_START; pa += PAGE_SIZE) pmap_kenter(KERNBASE + pa, pa); /* * Map pages between basemem and ISA_HOLE_START, if any, r/w into * the vm86 page table so that vm86 can scribble on them using * the vm86 map too. XXX: why 2 ways for this and only 1 way for * page 0, at least as initialized here? */ pte = (pt_entry_t *)vm86paddr; for (i = basemem / 4; i < 160; i++) pte[i] = (i << PAGE_SHIFT) | PG_V | PG_RW | PG_U; } /* * Populate the (physmap) array with base/bound pairs describing the * available physical memory in the system, then test this memory and * build the phys_avail array describing the actually-available memory. * * If we cannot accurately determine the physical memory map, then use * value from the 0xE801 call, and failing that, the RTC. * * Total memory size may be set by the kernel environment variable * hw.physmem or the compile-time define MAXMEM. * * XXX first should be vm_paddr_t. */ #ifdef PC98 static void getmemsize(int first) { int off, physmap_idx, pa_indx, da_indx; u_long physmem_tunable, memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size; int i; int pg_n; u_int extmem; u_int under16; vm_paddr_t pa; bzero(physmap, sizeof(physmap)); /* XXX - some of EPSON machines can't use PG_N */ pg_n = PG_N; if (pc98_machine_type & M_EPSON_PC98) { switch (epson_machine_id) { #ifdef WB_CACHE default: #endif case EPSON_PC486_HX: case EPSON_PC486_HG: case EPSON_PC486_HA: pg_n = 0; break; } } under16 = pc98_getmemsize(&basemem, &extmem); basemem_setup(); physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_ULONG_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * By default keep the memtest enabled. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* * We need to divide chunk if Maxmem is larger than 16MB and * under 16MB area is not full of memory. * (1) system area (15-16MB region) is cut off * (2) extended memory is only over 16MB area (ex. Melco "HYPERMEMORY") */ if ((under16 != 16 * 1024) && (extmem > 15 * 1024)) { /* 15M - 16M region is cut off, so need to divide chunk */ physmap[physmap_idx + 1] = under16 * 1024; physmap_idx += 2; physmap[physmap_idx] = 0x1000000; physmap[physmap_idx + 1] = physmap[2] + extmem * 1024; } /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR3; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | pg_n; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } #else /* PC98 */ static void getmemsize(int first) { int has_smap, off, physmap_idx, pa_indx, da_indx; u_long memtest; vm_paddr_t physmap[PHYSMAP_SIZE]; pt_entry_t *pte; quad_t dcons_addr, dcons_size, physmem_tunable; int hasbrokenint12, i, res; u_int extmem; struct vm86frame vmf; struct vm86context vmc; vm_paddr_t pa; struct bios_smap *smap, *smapbase; caddr_t kmdp; has_smap = 0; #ifdef XBOX if (arch_i386_is_xbox) { /* * We queried the memory size before, so chop off 4MB for * the framebuffer and inform the OS of this. */ physmap[0] = 0; physmap[1] = (arch_i386_xbox_memsize * 1024 * 1024) - XBOX_FB_SIZE; physmap_idx = 0; goto physmap_done; } #endif bzero(&vmf, sizeof(vmf)); bzero(physmap, sizeof(physmap)); basemem = 0; /* * Check if the loader supplied an SMAP memory map. If so, * use that and do not make any VM86 calls. */ physmap_idx = 0; smapbase = NULL; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase != NULL) { add_smap_entries(smapbase, physmap, &physmap_idx); has_smap = 1; goto have_smap; } /* * Some newer BIOSes have a broken INT 12H implementation * which causes a kernel panic immediately. In this case, we * need use the SMAP to determine the base memory size. */ hasbrokenint12 = 0; TUNABLE_INT_FETCH("hw.hasbrokenint12", &hasbrokenint12); if (hasbrokenint12 == 0) { /* Use INT12 to determine base memory size. */ vm86_intcall(0x12, &vmf); basemem = vmf.vmf_ax; basemem_setup(); } /* * Fetch the memory map with INT 15:E820. Map page 1 R/W into * the kernel page table so we can use it as a buffer. The * kernel will unmap this page later. */ pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT); vmc.npages = 0; smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT)); res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di); KASSERT(res != 0, ("vm86_getptr() failed: address not found")); vmf.vmf_ebx = 0; do { vmf.vmf_eax = 0xE820; vmf.vmf_edx = SMAP_SIG; vmf.vmf_ecx = sizeof(struct bios_smap); i = vm86_datacall(0x15, &vmf, &vmc); if (i || vmf.vmf_eax != SMAP_SIG) break; has_smap = 1; if (!add_smap_entry(smap, physmap, &physmap_idx)) break; } while (vmf.vmf_ebx != 0); have_smap: /* * If we didn't fetch the "base memory" size from INT12, * figure it out from the SMAP (or just guess). */ if (basemem == 0) { for (i = 0; i <= physmap_idx; i += 2) { if (physmap[i] == 0x00000000) { basemem = physmap[i + 1] / 1024; break; } } /* XXX: If we couldn't find basemem from SMAP, just guess. */ if (basemem == 0) basemem = 640; basemem_setup(); } if (physmap[1] != 0) goto physmap_done; /* * If we failed to find an SMAP, figure out the extended * memory size. We will then build a simple memory map with * two segments, one for "base memory" and the second for * "extended memory". Note that "extended memory" starts at a * physical address of 1MB and that both basemem and extmem * are in units of 1KB. * * First, try to fetch the extended memory size via INT 15:E801. */ vmf.vmf_ax = 0xE801; if (vm86_intcall(0x15, &vmf) == 0) { extmem = vmf.vmf_cx + vmf.vmf_dx * 64; } else { /* * If INT15:E801 fails, this is our last ditch effort * to determine the extended memory size. Currently * we prefer the RTC value over INT15:88. */ #if 0 vmf.vmf_ah = 0x88; vm86_intcall(0x15, &vmf); extmem = vmf.vmf_ax; #else extmem = rtcin(RTC_EXTLO) + (rtcin(RTC_EXTHI) << 8); #endif } /* * Special hack for chipsets that still remap the 384k hole when * there's 16MB of memory - this really confuses people that * are trying to use bus mastering ISA controllers with the * "16MB limit"; they only have 16MB, but the remapping puts * them beyond the limit. * * If extended memory is between 15-16MB (16-17MB phys address range), * chop it to 15MB. */ if ((extmem > 15 * 1024) && (extmem < 16 * 1024)) extmem = 15 * 1024; physmap[0] = 0; physmap[1] = basemem * 1024; physmap_idx = 2; physmap[physmap_idx] = 0x100000; physmap[physmap_idx + 1] = physmap[physmap_idx] + extmem * 1024; physmap_done: /* * Now, physmap contains a map of physical memory. */ #ifdef SMP /* make hole for AP bootstrap code */ physmap[1] = mp_bootaddress(physmap[1]); #endif /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". We may adjust this * based on ``hw.physmem'' and the results of the memory test. */ Maxmem = atop(physmap[physmap_idx + 1]); #ifdef MAXMEM Maxmem = MAXMEM / 4; #endif if (TUNABLE_QUAD_FETCH("hw.physmem", &physmem_tunable)) Maxmem = atop(physmem_tunable); /* * If we have an SMAP, don't allow MAXMEM or hw.physmem to extend * the amount of memory in the system. */ if (has_smap && Maxmem > atop(physmap[physmap_idx + 1])) Maxmem = atop(physmap[physmap_idx + 1]); /* * By default enable the memory test on real hardware, and disable * it if we appear to be running in a VM. This avoids touching all * pages unnecessarily, which doesn't matter on real hardware but is * bad for shared VM hosts. Use a general name so that * one could eventually do more with the code than just disable it. */ memtest = (vm_guest > VM_GUEST_NO) ? 0 : 1; TUNABLE_ULONG_FETCH("hw.memtest.tests", &memtest); if (atop(physmap[physmap_idx + 1]) != Maxmem && (boothowto & RB_VERBOSE)) printf("Physical memory use set to %ldK\n", Maxmem * 4); /* * If Maxmem has been increased beyond what the system has detected, * extend the last memory segment to the new limit. */ if (atop(physmap[physmap_idx + 1]) < Maxmem) physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem); /* call pmap initialization to make new kernel address space */ pmap_bootstrap(first); /* * Size up each available chunk of physical memory. */ physmap[0] = PAGE_SIZE; /* mask off page 0 */ pa_indx = 0; da_indx = 1; phys_avail[pa_indx++] = physmap[0]; phys_avail[pa_indx] = physmap[0]; dump_avail[da_indx] = physmap[0]; pte = CMAP3; /* * Get dcons buffer address */ if (getenv_quad("dcons.addr", &dcons_addr) == 0 || getenv_quad("dcons.size", &dcons_size) == 0) dcons_addr = 0; /* * physmap is in bytes, so when converting to page boundaries, * round up the start address and round down the end address. */ for (i = 0; i <= physmap_idx; i += 2) { vm_paddr_t end; end = ptoa((vm_paddr_t)Maxmem); if (physmap[i + 1] < end) end = trunc_page(physmap[i + 1]); for (pa = round_page(physmap[i]); pa < end; pa += PAGE_SIZE) { int tmp, page_bad, full; int *ptr = (int *)CADDR3; full = FALSE; /* * block out kernel memory as not available. */ if (pa >= KERNLOAD && pa < first) goto do_dump_avail; /* * block out dcons buffer */ if (dcons_addr > 0 && pa >= trunc_page(dcons_addr) && pa < dcons_addr + dcons_size) goto do_dump_avail; page_bad = FALSE; if (memtest == 0) goto skip_memtest; /* * map page into kernel: valid, read/write,non-cacheable */ *pte = pa | PG_V | PG_RW | PG_N; invltlb(); tmp = *(int *)ptr; /* * Test for alternating 1's and 0's */ *(volatile int *)ptr = 0xaaaaaaaa; if (*(volatile int *)ptr != 0xaaaaaaaa) page_bad = TRUE; /* * Test for alternating 0's and 1's */ *(volatile int *)ptr = 0x55555555; if (*(volatile int *)ptr != 0x55555555) page_bad = TRUE; /* * Test for all 1's */ *(volatile int *)ptr = 0xffffffff; if (*(volatile int *)ptr != 0xffffffff) page_bad = TRUE; /* * Test for all 0's */ *(volatile int *)ptr = 0x0; if (*(volatile int *)ptr != 0x0) page_bad = TRUE; /* * Restore original value. */ *(int *)ptr = tmp; skip_memtest: /* * Adjust array of valid/good pages. */ if (page_bad == TRUE) continue; /* * If this good page is a continuation of the * previous set of good pages, then just increase * the end pointer. Otherwise start a new chunk. * Note that "end" points one higher than end, * making the range >= start and < end. * If we're also doing a speculative memory * test and we at or past the end, bump up Maxmem * so that we keep going. The first bad page * will terminate the loop. */ if (phys_avail[pa_indx] == pa) { phys_avail[pa_indx] += PAGE_SIZE; } else { pa_indx++; if (pa_indx == PHYS_AVAIL_ARRAY_END) { printf( "Too many holes in the physical address space, giving up\n"); pa_indx--; full = TRUE; goto do_dump_avail; } phys_avail[pa_indx++] = pa; /* start */ phys_avail[pa_indx] = pa + PAGE_SIZE; /* end */ } physmem++; do_dump_avail: if (dump_avail[da_indx] == pa) { dump_avail[da_indx] += PAGE_SIZE; } else { da_indx++; if (da_indx == DUMP_AVAIL_ARRAY_END) { da_indx--; goto do_next; } dump_avail[da_indx++] = pa; /* start */ dump_avail[da_indx] = pa + PAGE_SIZE; /* end */ } do_next: if (full) break; } } *pte = 0; invltlb(); /* * XXX * The last chunk must contain at least one page plus the message * buffer to avoid complicating other code (message buffer address * calculation, etc.). */ while (phys_avail[pa_indx - 1] + PAGE_SIZE + round_page(msgbufsize) >= phys_avail[pa_indx]) { physmem -= atop(phys_avail[pa_indx] - phys_avail[pa_indx - 1]); phys_avail[pa_indx--] = 0; phys_avail[pa_indx--] = 0; } Maxmem = atop(phys_avail[pa_indx]); /* Trim off space for the message buffer. */ phys_avail[pa_indx] -= round_page(msgbufsize); /* Map the message buffer. */ for (off = 0; off < round_page(msgbufsize); off += PAGE_SIZE) pmap_kenter((vm_offset_t)msgbufp + off, phys_avail[pa_indx] + off); } #endif /* PC98 */ register_t init386(first) int first; { struct gate_descriptor *gdp; int gsel_tss, metadata_missing, x, pa; struct pcpu *pc; #ifdef CPU_ENABLE_SSE struct xstate_hdr *xhdr; #endif thread0.td_kstack = proc0kstack; thread0.td_kstack_pages = TD0_KSTACK_PAGES; /* * This may be done better later if it gets more high level * components in it. If so just link td->td_proc here. */ proc_linkup0(&proc0, &thread0); #ifdef PC98 /* * Initialize DMAC */ pc98_init_dmac(); #endif metadata_missing = 0; if (bootinfo.bi_modulep) { preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE; preload_bootstrap_relocate(KERNBASE); } else { metadata_missing = 1; } if (bootinfo.bi_envp) init_static_kenv((caddr_t)bootinfo.bi_envp + KERNBASE, 0); else init_static_kenv(NULL, 0); /* Init basic tunables, hz etc */ init_param1(); /* * Make gdt memory segments. All segments cover the full 4GB * of address space and permissions are enforced at page level. */ gdt_segs[GCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUCODE_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUDATA_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUFS_SEL].ssd_limit = atop(0 - 1); gdt_segs[GUGS_SEL].ssd_limit = atop(0 - 1); pc = &__pcpu[0]; gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1); gdt_segs[GPRIV_SEL].ssd_base = (int) pc; gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss; for (x = 0; x < NGDT; x++) ssdtosd(&gdt_segs[x], &gdt[x].sd); r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1; r_gdt.rd_base = (int) gdt; mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN); lgdt(&r_gdt); pcpu_init(pc, 0, sizeof(struct pcpu)); for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE) pmap_kenter(pa + KERNBASE, pa); dpcpu_init((void *)(first + KERNBASE), 0); first += DPCPU_SIZE; PCPU_SET(prvspace, pc); PCPU_SET(curthread, &thread0); /* * Initialize mutexes. * * icu_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we * must be able to get the icu lock, so it can't be * under witness. */ mutex_init(); mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE); /* make ldt memory segments */ ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1); ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1); for (x = 0; x < sizeof ldt_segs / sizeof ldt_segs[0]; x++) ssdtosd(&ldt_segs[x], &ldt[x].sd); _default_ldt = GSEL(GLDT_SEL, SEL_KPL); lldt(_default_ldt); PCPU_SET(currentldt, _default_ldt); /* exceptions */ for (x = 0; x < NIDT; x++) setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL , GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL)); setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #ifdef KDTRACE_HOOKS setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #ifdef XENHVM setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); #endif r_idt.rd_limit = sizeof(idt0) - 1; r_idt.rd_base = (int) idt; lidt(&r_idt); #ifdef XBOX /* * The following code queries the PCI ID of 0:0:0. For the XBOX, * This should be 0x10de / 0x02a5. * * This is exactly what Linux does. */ outl(0xcf8, 0x80000000); if (inl(0xcfc) == 0x02a510de) { arch_i386_is_xbox = 1; pic16l_setled(XBOX_LED_GREEN); /* * We are an XBOX, but we may have either 64MB or 128MB of * memory. The PCI host bridge should be programmed for this, * so we just query it. */ outl(0xcf8, 0x80000084); arch_i386_xbox_memsize = (inl(0xcfc) == 0x7FFFFFF) ? 128 : 64; } #endif /* XBOX */ /* * Initialize the clock before the console so that console * initialization can use DELAY(). */ clock_init(); finishidentcpu(); /* Final stage of CPU initialization */ setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); initializecpu(); /* Initialize CPU registers */ initializecpucache(); /* pointer to selector slot for %fs/%gs */ PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd); dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 = dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)]; dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 = dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL); #if defined(PAE) || defined(PAE_TABLES) dblfault_tss.tss_cr3 = (int)IdlePDPT; #else dblfault_tss.tss_cr3 = (int)IdlePTD; #endif dblfault_tss.tss_eip = (int)dblfault_handler; dblfault_tss.tss_eflags = PSL_KERNEL; dblfault_tss.tss_ds = dblfault_tss.tss_es = dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL); dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL); dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL); dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL); vm86_initialize(); getmemsize(first); init_param2(physmem); /* now running on new page tables, configured,and u/iom is accessible */ /* * Initialize the console before we print anything out. */ cninit(); if (metadata_missing) printf("WARNING: loader(8) metadata is missing!\n"); #ifdef DEV_ISA #ifdef DEV_ATPIC #ifndef PC98 elcr_probe(); #endif atpic_startup(); #else /* Reset and mask the atpics and leave them shut down. */ atpic_reset(); /* * Point the ICU spurious interrupt vectors at the APIC spurious * interrupt handler. */ setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); #endif #endif #ifdef DDB db_fetch_ksymtab(bootinfo.bi_symtab, bootinfo.bi_esymtab); #endif kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif msgbufinit(msgbufp, msgbufsize); #ifdef DEV_NPX npxinit(true); #endif /* * Set up thread0 pcb after npxinit calculated pcb + fpu save * area size. Zero out the extended state header in fpu save * area. */ thread0.td_pcb = get_pcb_td(&thread0); bzero(get_pcb_user_save_td(&thread0), cpu_max_ext_state_size); #ifdef CPU_ENABLE_SSE if (use_xsave) { xhdr = (struct xstate_hdr *)(get_pcb_user_save_td(&thread0) + 1); xhdr->xstate_bv = xsave_mask; } #endif PCPU_SET(curpcb, thread0.td_pcb); /* make an initial tss so cpu can get interrupt stack on syscall! */ /* Note: -16 is so we can grow the trapframe if we came from vm86 */ PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16); PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL)); gsel_tss = GSEL(GPROC0_SEL, SEL_KPL); PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd); PCPU_SET(common_tssd, *PCPU_GET(tss_gdt)); PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16); ltr(gsel_tss); /* make a call gate to reenter kernel with */ gdp = &ldt[LSYS5CALLS_SEL].gd; x = (int) &IDTVEC(lcall_syscall); gdp->gd_looffset = x; gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL); gdp->gd_stkcpy = 1; gdp->gd_type = SDT_SYS386CGT; gdp->gd_dpl = SEL_UPL; gdp->gd_p = 1; gdp->gd_hioffset = x >> 16; /* XXX does this work? */ /* XXX yes! */ ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; ldt[LSOL26CALLS_SEL] = ldt[LSYS5CALLS_SEL]; /* transfer to user mode */ _ucodesel = GSEL(GUCODE_SEL, SEL_UPL); _udatasel = GSEL(GUDATA_SEL, SEL_UPL); /* setup proc 0's pcb */ thread0.td_pcb->pcb_flags = 0; #if defined(PAE) || defined(PAE_TABLES) thread0.td_pcb->pcb_cr3 = (int)IdlePDPT; #else thread0.td_pcb->pcb_cr3 = (int)IdlePTD; #endif thread0.td_pcb->pcb_ext = 0; thread0.td_frame = &proc0_tf; cpu_probe_amdc1e(); #ifdef FDT x86_init_fdt(); #endif /* Location of kernel stack for locore */ return ((register_t)thread0.td_pcb); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } #ifndef PC98 static int smap_sysctl_handler(SYSCTL_HANDLER_ARGS) { struct bios_smap *smapbase; struct bios_smap_xattr smap; caddr_t kmdp; uint32_t *smapattr; int count, error, i; /* Retrieve the system memory map from the loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf32 kernel"); smapbase = (struct bios_smap *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP); if (smapbase == NULL) return (0); smapattr = (uint32_t *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_SMAP_XATTR); count = *((u_int32_t *)smapbase - 1) / sizeof(*smapbase); error = 0; for (i = 0; i < count; i++) { smap.base = smapbase[i].base; smap.length = smapbase[i].length; smap.type = smapbase[i].type; if (smapattr != NULL) smap.xattr = smapattr[i]; else smap.xattr = 0; error = SYSCTL_OUT(req, &smap, sizeof(smap)); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, smap, CTLTYPE_OPAQUE|CTLFLAG_RD, NULL, 0, smap_sysctl_handler, "S,bios_smap_xattr", "Raw BIOS SMAP data"); #endif /* !PC98 */ void spinlock_enter(void) { struct thread *td; register_t flags; td = curthread; if (td->td_md.md_spinlock_count == 0) { flags = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_flags = flags; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t flags; td = curthread; critical_exit(); flags = td->td_md.md_saved_flags; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(flags); } #if defined(I586_CPU) && !defined(NO_F00F_HACK) static void f00f_hack(void *unused); SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL); static void f00f_hack(void *unused) { struct gate_descriptor *new_idt; vm_offset_t tmp; if (!has_f00f_bug) return; GIANT_REQUIRED; printf("Intel Pentium detected, installing workaround for F00F bug\n"); tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO); if (tmp == 0) panic("kmem_malloc returned 0"); /* Put the problematic entry (#6) at the end of the lower page. */ new_idt = (struct gate_descriptor*) (tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor)); bcopy(idt, new_idt, sizeof(idt0)); r_idt.rd_base = (u_int)new_idt; lidt(&r_idt); idt = new_idt; pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ); } #endif /* defined(I586_CPU) && !NO_F00F_HACK */ /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_edi = tf->tf_edi; pcb->pcb_esi = tf->tf_esi; pcb->pcb_ebp = tf->tf_ebp; pcb->pcb_ebx = tf->tf_ebx; pcb->pcb_eip = tf->tf_eip; pcb->pcb_esp = (ISPL(tf->tf_cs)) ? tf->tf_esp : (int)(tf + 1) - 8; pcb->pcb_gs = rgs(); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_eip = addr; return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_eflags |= PSL_T; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_eflags &= ~PSL_T; return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; pcb = td->td_pcb; regs->r_gs = pcb->pcb_gs; return (fill_frame_regs(tp, regs)); } int fill_frame_regs(struct trapframe *tp, struct reg *regs) { regs->r_fs = tp->tf_fs; regs->r_es = tp->tf_es; regs->r_ds = tp->tf_ds; regs->r_edi = tp->tf_edi; regs->r_esi = tp->tf_esi; regs->r_ebp = tp->tf_ebp; regs->r_ebx = tp->tf_ebx; regs->r_edx = tp->tf_edx; regs->r_ecx = tp->tf_ecx; regs->r_eax = tp->tf_eax; regs->r_eip = tp->tf_eip; regs->r_cs = tp->tf_cs; regs->r_eflags = tp->tf_eflags; regs->r_esp = tp->tf_esp; regs->r_ss = tp->tf_ss; return (0); } int set_regs(struct thread *td, struct reg *regs) { struct pcb *pcb; struct trapframe *tp; tp = td->td_frame; if (!EFL_SECURE(regs->r_eflags, tp->tf_eflags) || !CS_SECURE(regs->r_cs)) return (EINVAL); pcb = td->td_pcb; tp->tf_fs = regs->r_fs; tp->tf_es = regs->r_es; tp->tf_ds = regs->r_ds; tp->tf_edi = regs->r_edi; tp->tf_esi = regs->r_esi; tp->tf_ebp = regs->r_ebp; tp->tf_ebx = regs->r_ebx; tp->tf_edx = regs->r_edx; tp->tf_ecx = regs->r_ecx; tp->tf_eax = regs->r_eax; tp->tf_eip = regs->r_eip; tp->tf_cs = regs->r_cs; tp->tf_eflags = regs->r_eflags; tp->tf_esp = regs->r_esp; tp->tf_ss = regs->r_ss; pcb->pcb_gs = regs->r_gs; return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { KASSERT(td == curthread || TD_IS_SUSPENDED(td) || P_SHOULDSTOP(td->td_proc), ("not suspended thread %p", td)); #ifdef DEV_NPX npxgetregs(td); #else bzero(fpregs, sizeof(*fpregs)); #endif #ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_fill_fpregs_xmm(&get_pcb_user_save_td(td)->sv_xmm, (struct save87 *)fpregs); else #endif /* CPU_ENABLE_SSE */ bcopy(&get_pcb_user_save_td(td)->sv_87, fpregs, sizeof(*fpregs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { #ifdef CPU_ENABLE_SSE if (cpu_fxsr) npx_set_fpregs_xmm((struct save87 *)fpregs, &get_pcb_user_save_td(td)->sv_xmm); else #endif /* CPU_ENABLE_SSE */ bcopy(fpregs, &get_pcb_user_save_td(td)->sv_87, sizeof(*fpregs)); #ifdef DEV_NPX npxuserinited(td); #endif return (0); } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; struct segment_descriptor *sdp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->tf_esp); PROC_UNLOCK(curthread->td_proc); mcp->mc_gs = td->td_pcb->pcb_gs; mcp->mc_fs = tp->tf_fs; mcp->mc_es = tp->tf_es; mcp->mc_ds = tp->tf_ds; mcp->mc_edi = tp->tf_edi; mcp->mc_esi = tp->tf_esi; mcp->mc_ebp = tp->tf_ebp; mcp->mc_isp = tp->tf_isp; mcp->mc_eflags = tp->tf_eflags; if (flags & GET_MC_CLEAR_RET) { mcp->mc_eax = 0; mcp->mc_edx = 0; mcp->mc_eflags &= ~PSL_C; } else { mcp->mc_eax = tp->tf_eax; mcp->mc_edx = tp->tf_edx; } mcp->mc_ebx = tp->tf_ebx; mcp->mc_ecx = tp->tf_ecx; mcp->mc_eip = tp->tf_eip; mcp->mc_cs = tp->tf_cs; mcp->mc_esp = tp->tf_esp; mcp->mc_ss = tp->tf_ss; mcp->mc_len = sizeof(*mcp); get_fpcontext(td, mcp, NULL, 0); sdp = &td->td_pcb->pcb_fsd; mcp->mc_fsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; sdp = &td->td_pcb->pcb_gsd; mcp->mc_gsbase = sdp->sd_hibase << 24 | sdp->sd_lobase; mcp->mc_flags = 0; mcp->mc_xfpustate = 0; mcp->mc_xfpustate_len = 0; bzero(mcp->mc_spare2, sizeof(mcp->mc_spare2)); return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; char *xfpustate; int eflags, ret; tp = td->td_frame; if (mcp->mc_len != sizeof(*mcp) || (mcp->mc_flags & ~_MC_FLAG_MASK) != 0) return (EINVAL); eflags = (mcp->mc_eflags & PSL_USERCHANGE) | (tp->tf_eflags & ~PSL_USERCHANGE); if (mcp->mc_flags & _MC_HASFPXSTATE) { if (mcp->mc_xfpustate_len > cpu_max_ext_state_size - sizeof(union savefpu)) return (EINVAL); xfpustate = __builtin_alloca(mcp->mc_xfpustate_len); ret = copyin((void *)mcp->mc_xfpustate, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); } else xfpustate = NULL; ret = set_fpcontext(td, mcp, xfpustate, mcp->mc_xfpustate_len); if (ret != 0) return (ret); tp->tf_fs = mcp->mc_fs; tp->tf_es = mcp->mc_es; tp->tf_ds = mcp->mc_ds; tp->tf_edi = mcp->mc_edi; tp->tf_esi = mcp->mc_esi; tp->tf_ebp = mcp->mc_ebp; tp->tf_ebx = mcp->mc_ebx; tp->tf_edx = mcp->mc_edx; tp->tf_ecx = mcp->mc_ecx; tp->tf_eax = mcp->mc_eax; tp->tf_eip = mcp->mc_eip; tp->tf_eflags = eflags; tp->tf_esp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; td->td_pcb->pcb_gs = mcp->mc_gs; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpusave, size_t xfpusave_len) { #ifdef CPU_ENABLE_SSE size_t max_len, len; #endif #ifndef DEV_NPX mcp->mc_fpformat = _MC_FPFMT_NODEV; mcp->mc_ownedfp = _MC_FPOWNED_NONE; bzero(mcp->mc_fpstate, sizeof(mcp->mc_fpstate)); #else mcp->mc_ownedfp = npxgetregs(td); bcopy(get_pcb_user_save_td(td), &mcp->mc_fpstate[0], sizeof(mcp->mc_fpstate)); mcp->mc_fpformat = npxformat(); #ifdef CPU_ENABLE_SSE if (!use_xsave || xfpusave_len == 0) return; max_len = cpu_max_ext_state_size - sizeof(union savefpu); len = xfpusave_len; if (len > max_len) { len = max_len; bzero(xfpusave + max_len, len - max_len); } mcp->mc_flags |= _MC_HASFPXSTATE; mcp->mc_xfpustate_len = len; bcopy(get_pcb_user_save_td(td) + 1, xfpusave, len); #endif #endif } static int set_fpcontext(struct thread *td, mcontext_t *mcp, char *xfpustate, size_t xfpustate_len) { union savefpu *fpstate; int error; if (mcp->mc_fpformat == _MC_FPFMT_NODEV) return (0); else if (mcp->mc_fpformat != _MC_FPFMT_387 && mcp->mc_fpformat != _MC_FPFMT_XMM) return (EINVAL); else if (mcp->mc_ownedfp == _MC_FPOWNED_NONE) { /* We don't care what state is left in the FPU or PCB. */ fpstate_drop(td); error = 0; } else if (mcp->mc_ownedfp == _MC_FPOWNED_FPU || mcp->mc_ownedfp == _MC_FPOWNED_PCB) { #ifdef DEV_NPX fpstate = (union savefpu *)&mcp->mc_fpstate; #ifdef CPU_ENABLE_SSE if (cpu_fxsr) fpstate->sv_xmm.sv_env.en_mxcsr &= cpu_mxcsr_mask; #endif error = npxsetregs(td, fpstate, xfpustate, xfpustate_len); #else error = EINVAL; #endif } else return (EINVAL); return (error); } static void fpstate_drop(struct thread *td) { KASSERT(PCB_USER_FPU(td->td_pcb), ("fpstate_drop: kernel-owned fpu")); critical_enter(); #ifdef DEV_NPX if (PCPU_GET(fpcurthread) == td) npxdrop(); #endif /* * XXX force a full drop of the npx. The above only drops it if we * owned it. npxgetregs() has the same bug in the !cpu_fxsr case. * * XXX I don't much like npxgetregs()'s semantics of doing a full * drop. Dropping only to the pcb matches fnsave's behaviour. * We only need to drop to !PCB_INITDONE in sendsig(). But * sendsig() is the only caller of npxgetregs()... perhaps we just * have too many layers. */ curthread->td_pcb->pcb_flags &= ~(PCB_NPXINITDONE | PCB_NPXUSERINITDONE); critical_exit(); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; if (td == NULL) { dbregs->dr[0] = rdr0(); dbregs->dr[1] = rdr1(); dbregs->dr[2] = rdr2(); dbregs->dr[3] = rdr3(); dbregs->dr[4] = rdr4(); dbregs->dr[5] = rdr5(); dbregs->dr[6] = rdr6(); dbregs->dr[7] = rdr7(); } else { pcb = td->td_pcb; dbregs->dr[0] = pcb->pcb_dr0; dbregs->dr[1] = pcb->pcb_dr1; dbregs->dr[2] = pcb->pcb_dr2; dbregs->dr[3] = pcb->pcb_dr3; dbregs->dr[4] = 0; dbregs->dr[5] = 0; dbregs->dr[6] = pcb->pcb_dr6; dbregs->dr[7] = pcb->pcb_dr7; } return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { struct pcb *pcb; int i; if (td == NULL) { load_dr0(dbregs->dr[0]); load_dr1(dbregs->dr[1]); load_dr2(dbregs->dr[2]); load_dr3(dbregs->dr[3]); load_dr4(dbregs->dr[4]); load_dr5(dbregs->dr[5]); load_dr6(dbregs->dr[6]); load_dr7(dbregs->dr[7]); } else { /* * Don't let an illegal value for dr7 get set. Specifically, * check for undefined settings. Setting these bit patterns * result in undefined behaviour and can lead to an unexpected * TRCTRAP. */ for (i = 0; i < 4; i++) { if (DBREG_DR7_ACCESS(dbregs->dr[7], i) == 0x02) return (EINVAL); if (DBREG_DR7_LEN(dbregs->dr[7], i) == 0x02) return (EINVAL); } pcb = td->td_pcb; /* * Don't let a process set a breakpoint that is not within the * process's address space. If a process could do this, it * could halt the system by setting a breakpoint in the kernel * (if ddb was enabled). Thus, we need to check to make sure * that no breakpoints are being enabled for addresses outside * process's address space. * * XXX - what about when the watched area of the user's * address space is written into from within the kernel * ... wouldn't that still cause a breakpoint to be generated * from within kernel mode? */ if (DBREG_DR7_ENABLED(dbregs->dr[7], 0)) { /* dr0 is enabled */ if (dbregs->dr[0] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 1)) { /* dr1 is enabled */ if (dbregs->dr[1] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 2)) { /* dr2 is enabled */ if (dbregs->dr[2] >= VM_MAXUSER_ADDRESS) return (EINVAL); } if (DBREG_DR7_ENABLED(dbregs->dr[7], 3)) { /* dr3 is enabled */ if (dbregs->dr[3] >= VM_MAXUSER_ADDRESS) return (EINVAL); } pcb->pcb_dr0 = dbregs->dr[0]; pcb->pcb_dr1 = dbregs->dr[1]; pcb->pcb_dr2 = dbregs->dr[2]; pcb->pcb_dr3 = dbregs->dr[3]; pcb->pcb_dr6 = dbregs->dr[6]; pcb->pcb_dr7 = dbregs->dr[7]; pcb->pcb_flags |= PCB_DBREGS; } return (0); } /* * Return > 0 if a hardware breakpoint has been hit, and the * breakpoint was in user space. Return 0, otherwise. */ int user_dbreg_trap(void) { u_int32_t dr7, dr6; /* debug registers dr6 and dr7 */ u_int32_t bp; /* breakpoint bits extracted from dr6 */ int nbp; /* number of breakpoints that triggered */ caddr_t addr[4]; /* breakpoint addresses */ int i; dr7 = rdr7(); if ((dr7 & 0x000000ff) == 0) { /* * all GE and LE bits in the dr7 register are zero, * thus the trap couldn't have been caused by the * hardware debug registers */ return 0; } nbp = 0; dr6 = rdr6(); bp = dr6 & 0x0000000f; if (!bp) { /* * None of the breakpoint bits are set meaning this * trap was not caused by any of the debug registers */ return 0; } /* * at least one of the breakpoints were hit, check to see * which ones and if any of them are user space addresses */ if (bp & 0x01) { addr[nbp++] = (caddr_t)rdr0(); } if (bp & 0x02) { addr[nbp++] = (caddr_t)rdr1(); } if (bp & 0x04) { addr[nbp++] = (caddr_t)rdr2(); } if (bp & 0x08) { addr[nbp++] = (caddr_t)rdr3(); } for (i = 0; i < nbp; i++) { if (addr[i] < (caddr_t)VM_MAXUSER_ADDRESS) { /* * addr[i] is in user space */ return nbp; } } /* * None of the breakpoints are in user space. */ return 0; } #ifdef KDB /* * Provide inb() and outb() as functions. They are normally only available as * inline functions, thus cannot be called from the debugger. */ /* silence compiler warnings */ u_char inb_(u_short); void outb_(u_short, u_char); u_char inb_(u_short port) { return inb(port); } void outb_(u_short port, u_char data) { outb(port, data); } #endif /* KDB */ Index: head/sys/i386/linux/linux_sysvec.c =================================================================== --- head/sys/i386/linux/linux_sysvec.c (revision 294929) +++ head/sys/i386/linux/linux_sysvec.c (revision 294930) @@ -1,1199 +1,1199 @@ /*- * Copyright (c) 1994-1996 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MODULE_VERSION(linux, 1); #if BYTE_ORDER == LITTLE_ENDIAN #define SHELLMAGIC 0x2123 /* #! */ #else #define SHELLMAGIC 0x2321 #endif #if defined(DEBUG) SYSCTL_PROC(_compat_linux, OID_AUTO, debug, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, linux_sysctl_debug, "A", "Linux debugging control"); #endif /* * Allow the sendsig functions to use the ldebug() facility * even though they are not syscalls themselves. Map them * to syscall 0. This is slightly less bogus than using * ldebug(sigreturn). */ #define LINUX_SYS_linux_rt_sendsig 0 #define LINUX_SYS_linux_sendsig 0 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) static int linux_szsigcode; static vm_object_t linux_shared_page_obj; static char *linux_shared_page_mapping; extern char _binary_linux_locore_o_start; extern char _binary_linux_locore_o_end; extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); static int linux_fixup(register_t **stack_base, struct image_params *iparams); static int elf_linux_fixup(register_t **stack_base, struct image_params *iparams); static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack); static register_t *linux_copyout_strings(struct image_params *imgp); static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel); static void linux_vdso_install(void *param); static void linux_vdso_deinstall(void *param); static int linux_szplatform; const char *linux_kplatform; static eventhandler_tag linux_exit_tag; static eventhandler_tag linux_exec_tag; static eventhandler_tag linux_thread_dtor_tag; /* * Linux syscalls return negative errno's, we do positive and map them * Reference: * FreeBSD: src/sys/sys/errno.h * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h * linux-2.6.17.8/include/asm-generic/errno.h */ static int bsd_to_linux_errno[ELAST + 1] = { -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, -72, -67, -71 }; #define LINUX_T_UNKNOWN 255 static int _bsd_to_linux_trapcode[] = { LINUX_T_UNKNOWN, /* 0 */ 6, /* 1 T_PRIVINFLT */ LINUX_T_UNKNOWN, /* 2 */ 3, /* 3 T_BPTFLT */ LINUX_T_UNKNOWN, /* 4 */ LINUX_T_UNKNOWN, /* 5 */ 16, /* 6 T_ARITHTRAP */ 254, /* 7 T_ASTFLT */ LINUX_T_UNKNOWN, /* 8 */ 13, /* 9 T_PROTFLT */ 1, /* 10 T_TRCTRAP */ LINUX_T_UNKNOWN, /* 11 */ 14, /* 12 T_PAGEFLT */ LINUX_T_UNKNOWN, /* 13 */ 17, /* 14 T_ALIGNFLT */ LINUX_T_UNKNOWN, /* 15 */ LINUX_T_UNKNOWN, /* 16 */ LINUX_T_UNKNOWN, /* 17 */ 0, /* 18 T_DIVIDE */ 2, /* 19 T_NMI */ 4, /* 20 T_OFLOW */ 5, /* 21 T_BOUND */ 7, /* 22 T_DNA */ 8, /* 23 T_DOUBLEFLT */ 9, /* 24 T_FPOPFLT */ 10, /* 25 T_TSSFLT */ 11, /* 26 T_SEGNPFLT */ 12, /* 27 T_STKFLT */ 18, /* 28 T_MCHK */ 19, /* 29 T_XMMFLT */ 15 /* 30 T_RESERVED */ }; #define bsd_to_linux_trapcode(code) \ ((code)args->argc + 1); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)envp); (*stack_base)--; suword(*stack_base, (intptr_t)(void *)argv); (*stack_base)--; suword(*stack_base, imgp->args->argc); return (0); } static int elf_linux_fixup(register_t **stack_base, struct image_params *imgp) { struct proc *p; Elf32_Auxargs *args; Elf32_Addr *uplatform; struct ps_strings *arginfo; register_t *pos; int issetugid; KASSERT(curthread->td_proc == imgp->proc, ("unsafe elf_linux_fixup(), should be curproc")); p = imgp->proc; issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); args = (Elf32_Auxargs *)imgp->auxargs; pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, imgp->proc->p_sysent->sv_shared_page_base); AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); /* * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, * as it has appeared in the 2.4.0-rc7 first time. * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), * glibc falls back to the hard-coded CLK_TCK value when aux entry * is not present. * Also see linux_times() implementation. */ if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); AUXARGS_ENTRY(pos, AT_PHENT, args->phent); AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); AUXARGS_ENTRY(pos, AT_BASE, args->base); AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); AUXARGS_ENTRY(pos, LINUX_AT_RANDOM, imgp->canary); if (imgp->execpathp != 0) AUXARGS_ENTRY(pos, LINUX_AT_EXECFN, imgp->execpathp); if (args->execfd != -1) AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); AUXARGS_ENTRY(pos, AT_NULL, 0); free(imgp->auxargs, M_TEMP); imgp->auxargs = NULL; (*stack_base)--; suword(*stack_base, (register_t)imgp->args->argc); return (0); } /* * Copied from kern/kern_exec.c */ static register_t * linux_copyout_strings(struct image_params *imgp) { int argc, envc; char **vectp; char *stringp, *destp; register_t *stack_base; struct ps_strings *arginfo; char canary[LINUX_AT_RANDOM_LEN]; size_t execpath_len; struct proc *p; /* * Calculate string base and vector table pointers. */ p = imgp->proc; if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - roundup(sizeof(canary), sizeof(char *)) - roundup(execpath_len, sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); /* * install LINUX_PLATFORM */ copyout(linux_kplatform, ((caddr_t)arginfo - linux_szplatform), linux_szplatform); if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - linux_szplatform - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - linux_szplatform - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)); copyout(canary, (void *)imgp->canary, sizeof(canary)); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (LINUX_AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size) * sizeof(char *)); } else { /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (char **)(destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(char *)); } /* * vectp also becomes our initial stack base */ stack_base = (register_t *)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword(vectp++, 0); suword(&arginfo->ps_envstr, (long)(intptr_t)vectp); suword(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword(vectp++, (long)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword(vectp, 0); return (stack_base); } static void linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_rt_sigframe *fp, frame; int sig, code; int oonstack; sig = ksi->ksi_signo; code = ksi->ksi_code; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + + fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); } else fp = (struct l_rt_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; frame.sf_siginfo = &fp->sf_si; frame.sf_ucontext = &fp->sf_sc; /* Fill in POSIX parts */ ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.uc_flags = 0; /* XXX ??? */ frame.sf_sc.uc_link = NULL; /* XXX ??? */ frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; PROC_UNLOCK(p); bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; frame.sf_sc.uc_mcontext.sc_gs = rgs(); frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ #ifdef DEBUG if (ldebug(rt_sendsig)) printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), fp, oonstack); #endif PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_rt_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td = curthread; struct proc *p = td->td_proc; struct sigacts *psp; struct trapframe *regs; struct l_sigframe *fp, frame; l_sigset_t lmask; int sig, code; int oonstack; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; sig = ksi->ksi_signo; code = ksi->ksi_code; mtx_assert(&psp->ps_mtx, MA_OWNED); if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ linux_rt_sendsig(catcher, ksi, mask); return; } regs = td->td_frame; oonstack = sigonstack(regs->tf_esp); #ifdef DEBUG if (ldebug(sendsig)) printf(ARGS(sendsig, "%p, %d, %p, %u"), catcher, sig, (void*)mask, code); #endif /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + + fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct l_sigframe)); } else fp = (struct l_sigframe *)regs->tf_esp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. */ sig = bsd_to_linux_signal(sig); bzero(&frame, sizeof(frame)); frame.sf_handler = catcher; frame.sf_sig = sig; bsd_to_linux_sigset(mask, &lmask); /* * Build the signal context to be used by sigreturn. */ frame.sf_sc.sc_mask = lmask.__mask; frame.sf_sc.sc_gs = rgs(); frame.sf_sc.sc_fs = regs->tf_fs; frame.sf_sc.sc_es = regs->tf_es; frame.sf_sc.sc_ds = regs->tf_ds; frame.sf_sc.sc_edi = regs->tf_edi; frame.sf_sc.sc_esi = regs->tf_esi; frame.sf_sc.sc_ebp = regs->tf_ebp; frame.sf_sc.sc_ebx = regs->tf_ebx; frame.sf_sc.sc_esp = regs->tf_esp; frame.sf_sc.sc_edx = regs->tf_edx; frame.sf_sc.sc_ecx = regs->tf_ecx; frame.sf_sc.sc_eax = regs->tf_eax; frame.sf_sc.sc_eip = regs->tf_eip; frame.sf_sc.sc_cs = regs->tf_cs; frame.sf_sc.sc_eflags = regs->tf_eflags; frame.sf_sc.sc_esp_at_signal = regs->tf_esp; frame.sf_sc.sc_ss = regs->tf_ss; frame.sf_sc.sc_err = regs->tf_err; frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); frame.sf_extramask[0] = lmask.__mask; if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. */ regs->tf_esp = (int)fp; regs->tf_eip = linux_sigcode; regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); regs->tf_cs = _ucodesel; regs->tf_ds = _udatasel; regs->tf_es = _udatasel; regs->tf_fs = _udatasel; regs->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) { struct l_sigframe frame; struct trapframe *regs; l_sigset_t lmask; sigset_t bmask; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(sigreturn)) printf(ARGS(sigreturn, "%p"), (void *)args->sfp); #endif /* * The trampoline code hands us the sigframe. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->sfp, &frame, sizeof(frame)) != 0) return (EFAULT); /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = frame.sf_sc.sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(frame.sf_sc.sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } lmask.__mask = frame.sf_sc.sc_mask; linux_to_bsd_sigset(&lmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context. */ /* %gs was restored by the trampoline. */ regs->tf_fs = frame.sf_sc.sc_fs; regs->tf_es = frame.sf_sc.sc_es; regs->tf_ds = frame.sf_sc.sc_ds; regs->tf_edi = frame.sf_sc.sc_edi; regs->tf_esi = frame.sf_sc.sc_esi; regs->tf_ebp = frame.sf_sc.sc_ebp; regs->tf_ebx = frame.sf_sc.sc_ebx; regs->tf_edx = frame.sf_sc.sc_edx; regs->tf_ecx = frame.sf_sc.sc_ecx; regs->tf_eax = frame.sf_sc.sc_eax; regs->tf_eip = frame.sf_sc.sc_eip; regs->tf_cs = frame.sf_sc.sc_cs; regs->tf_eflags = eflags; regs->tf_esp = frame.sf_sc.sc_esp_at_signal; regs->tf_ss = frame.sf_sc.sc_ss; return (EJUSTRETURN); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by rt_sendsig (above). * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) { struct l_ucontext uc; struct l_sigcontext *context; sigset_t bmask; l_stack_t *lss; stack_t ss; struct trapframe *regs; int eflags; ksiginfo_t ksi; regs = td->td_frame; #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); #endif /* * The trampoline code hands us the ucontext. * It is unsafe to keep track of it ourselves, in the event that a * program jumps out of a signal handler. */ if (copyin(args->ucp, &uc, sizeof(uc)) != 0) return (EFAULT); context = &uc.uc_mcontext; /* * Check for security violations. */ #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) eflags = context->sc_eflags; if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) return (EINVAL); /* * Don't allow users to load a valid privileged %cs. Let the * hardware check for invalid selectors, excess privilege in * other selectors, invalid %eip's and invalid %esp's. */ #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) if (!CS_SECURE(context->sc_cs)) { ksiginfo_init_trap(&ksi); ksi.ksi_signo = SIGBUS; ksi.ksi_code = BUS_OBJERR; ksi.ksi_trapno = T_PROTFLT; ksi.ksi_addr = (void *)regs->tf_eip; trapsignal(td, &ksi); return (EINVAL); } linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); /* * Restore signal context */ /* %gs was restored by the trampoline. */ regs->tf_fs = context->sc_fs; regs->tf_es = context->sc_es; regs->tf_ds = context->sc_ds; regs->tf_edi = context->sc_edi; regs->tf_esi = context->sc_esi; regs->tf_ebp = context->sc_ebp; regs->tf_ebx = context->sc_ebx; regs->tf_edx = context->sc_edx; regs->tf_ecx = context->sc_ecx; regs->tf_eax = context->sc_eax; regs->tf_eip = context->sc_eip; regs->tf_cs = context->sc_cs; regs->tf_eflags = eflags; regs->tf_esp = context->sc_esp_at_signal; regs->tf_ss = context->sc_ss; /* * call sigaltstack & ignore results.. */ lss = &uc.uc_stack; ss.ss_sp = lss->ss_sp; ss.ss_size = lss->ss_size; ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); #ifdef DEBUG if (ldebug(rt_sigreturn)) printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); #endif (void)kern_sigaltstack(td, &ss, NULL); return (EJUSTRETURN); } static int linux_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; struct trapframe *frame; p = td->td_proc; frame = td->td_frame; sa->code = frame->tf_eax; sa->args[0] = frame->tf_ebx; sa->args[1] = frame->tf_ecx; sa->args[2] = frame->tf_edx; sa->args[3] = frame->tf_esi; sa->args[4] = frame->tf_edi; sa->args[5] = frame->tf_ebp; /* Unconfirmed */ if (sa->code >= p->p_sysent->sv_size) /* nosys */ sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; td->td_retval[0] = 0; td->td_retval[1] = frame->tf_edx; return (0); } /* * If a linux binary is exec'ing something, try this image activator * first. We override standard shell script execution in order to * be able to modify the interpreter path. We only do this if a linux * binary is doing the exec, so we do not create an EXEC module for it. */ static int exec_linux_imgact_try(struct image_params *iparams); static int exec_linux_imgact_try(struct image_params *imgp) { const char *head = (const char *)imgp->image_header; char *rpath; int error = -1; /* * The interpreter for shell scripts run from a linux binary needs * to be located in /compat/linux if possible in order to recursively * maintain linux path emulation. */ if (((const short *)head)[0] == SHELLMAGIC) { /* * Run our normal shell image activator. If it succeeds attempt * to use the alternate path for the interpreter. If an alternate * path is found, use our stringspace to store it. */ if ((error = exec_shell_imgact(imgp)) == 0) { linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); if (rpath != NULL) imgp->args->fname_buf = imgp->interpreter_name = rpath; } } return (error); } /* * exec_setregs may initialize some registers differently than Linux * does, thus potentially confusing Linux binaries. If necessary, we * override the exec_setregs default(s) here. */ static void exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct pcb *pcb = td->td_pcb; exec_setregs(td, imgp, stack); /* Linux sets %gs to 0, we default to _udatasel */ pcb->pcb_gs = 0; load_gs(0); pcb->pcb_initial_npxcw = __LINUX_NPXCW__; } static void linux_get_machine(const char **dst) { switch (cpu_class) { case CPUCLASS_686: *dst = "i686"; break; case CPUCLASS_586: *dst = "i586"; break; case CPUCLASS_486: *dst = "i486"; break; default: *dst = "i386"; } } struct sysentvec linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux a.out", .sv_coredump = NULL, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = exec_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); struct sysentvec elf_linux_sysvec = { .sv_size = LINUX_SYS_MAXSYSCALL, .sv_table = linux_sysent, .sv_mask = 0, .sv_errsize = ELAST + 1, .sv_errtbl = bsd_to_linux_errno, .sv_transtrap = translate_traps, .sv_fixup = elf_linux_fixup, .sv_sendsig = linux_sendsig, .sv_sigcode = &_binary_linux_locore_o_start, .sv_szsigcode = &linux_szsigcode, .sv_name = "Linux ELF", .sv_coredump = elf32_coredump, .sv_imgact_try = exec_linux_imgact_try, .sv_minsigstksz = LINUX_MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = VM_MAXUSER_ADDRESS, .sv_usrstack = LINUX_USRSTACK, .sv_psstrings = LINUX_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = linux_copyout_strings, .sv_setregs = exec_linux_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = linux_fetch_syscall_args, .sv_syscallnames = NULL, .sv_shared_page_base = LINUX_SHAREDPAGE, .sv_shared_page_len = PAGE_SIZE, .sv_schedtail = linux_schedtail, .sv_thread_detach = linux_thread_detach, .sv_trap = NULL, }; static void linux_vdso_install(void *param) { linux_szsigcode = (&_binary_linux_locore_o_end - &_binary_linux_locore_o_start); if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) panic("Linux invalid vdso size\n"); __elfN(linux_vdso_fixup)(&elf_linux_sysvec); linux_shared_page_obj = __elfN(linux_shared_page_init) (&linux_shared_page_mapping); __elfN(linux_vdso_reloc)(&elf_linux_sysvec, LINUX_SHAREDPAGE); bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, linux_szsigcode); elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; } SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, (sysinit_cfunc_t)linux_vdso_install, NULL); static void linux_vdso_deinstall(void *param) { __elfN(linux_shared_page_fini)(linux_shared_page_obj); }; SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t)linux_vdso_deinstall, NULL); static char GNU_ABI_VENDOR[] = "GNU"; static int GNULINUX_ABI_DESC = 0; static boolean_t linux_trans_osrel(const Elf_Note *note, int32_t *osrel) { const Elf32_Word *desc; uintptr_t p; p = (uintptr_t)(note + 1); p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); desc = (const Elf32_Word *)p; if (desc[0] != GNULINUX_ABI_DESC) return (FALSE); /* * For linux we encode osrel as follows (see linux_mib.c): * VVVMMMIII (version, major, minor), see linux_mib.c. */ *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; return (TRUE); } static Elf_Brandnote linux_brandnote = { .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), .hdr.n_descsz = 16, /* XXX at least 16 */ .hdr.n_type = 1, .vendor = GNU_ABI_VENDOR, .flags = BN_TRANSLATE_OSREL, .trans_osrel = linux_trans_osrel }; static Elf32_Brandinfo linux_brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.1", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; static Elf32_Brandinfo linux_glibc2brand = { .brand = ELFOSABI_LINUX, .machine = EM_386, .compat_3_brand = "Linux", .emul_path = "/compat/linux", .interp_path = "/lib/ld-linux.so.2", .sysvec = &elf_linux_sysvec, .interp_newpath = NULL, .brand_note = &linux_brandnote, .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE }; Elf32_Brandinfo *linux_brandlist[] = { &linux_brand, &linux_glibc2brand, NULL }; static int linux_elf_modevent(module_t mod, int type, void *data) { Elf32_Brandinfo **brandinfo; int error; struct linux_ioctl_handler **lihp; error = 0; switch(type) { case MOD_LOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_insert_brand_entry(*brandinfo) < 0) error = EINVAL; if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_register_handler(*lihp); LIST_INIT(&futex_list); mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, NULL, 1000); linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, NULL, 1000); linux_thread_dtor_tag = EVENTHANDLER_REGISTER(thread_dtor, linux_thread_dtor, NULL, EVENTHANDLER_PRI_ANY); linux_get_machine(&linux_kplatform); linux_szplatform = roundup(strlen(linux_kplatform) + 1, sizeof(char *)); linux_osd_jail_register(); stclohz = (stathz ? stathz : hz); if (bootverbose) printf("Linux ELF exec handler installed\n"); } else printf("cannot insert Linux ELF brand handler\n"); break; case MOD_UNLOAD: for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_brand_inuse(*brandinfo)) error = EBUSY; if (error == 0) { for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; ++brandinfo) if (elf32_remove_brand_entry(*brandinfo) < 0) error = EINVAL; } if (error == 0) { SET_FOREACH(lihp, linux_ioctl_handler_set) linux_ioctl_unregister_handler(*lihp); mtx_destroy(&futex_mtx); EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); EVENTHANDLER_DEREGISTER(thread_dtor, linux_thread_dtor_tag); linux_osd_jail_deregister(); if (bootverbose) printf("Linux ELF exec handler removed\n"); } else printf("Could not deinstall ELF interpreter entry\n"); break; default: return (EOPNOTSUPP); } return (error); } static moduledata_t linux_elf_mod = { "linuxelf", linux_elf_modevent, 0 }; DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); Index: head/sys/i386/svr4/svr4_machdep.c =================================================================== --- head/sys/i386/svr4/svr4_machdep.c (revision 294929) +++ head/sys/i386/svr4/svr4_machdep.c (revision 294930) @@ -1,610 +1,610 @@ /*- * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #undef sigcode #undef szsigcode extern int svr4_szsigcode; extern char svr4_sigcode[]; static void svr4_getsiginfo(union svr4_siginfo *, int, u_long, caddr_t); #if !defined(__NetBSD__) /* taken from /sys/arch/i386/include/psl.h on NetBSD-1.3 */ # define PSL_MBZ 0xffc08028 # define PSL_USERSTATIC (PSL_USER | PSL_MBZ | PSL_IOPL | PSL_NT | PSL_VM | PSL_VIF | PSL_VIP) # define USERMODE(c, f) (ISPL(c) == SEL_UPL) #endif #if defined(__NetBSD__) void svr4_setregs(td, epp, stack) struct thread *td; struct exec_package *epp; u_long stack; { register struct pcb *pcb = td->td_pcb; pcb->pcb_savefpu.sv_env.en_cw = __SVR4_NPXCW__; setregs(td, epp, stack, 0UL); } #endif /* __NetBSD__ */ void svr4_getcontext(td, uc, mask, oonstack) struct thread *td; struct svr4_ucontext *uc; sigset_t *mask; int oonstack; { struct proc *p = td->td_proc; struct trapframe *tf = td->td_frame; svr4_greg_t *r = uc->uc_mcontext.greg; struct svr4_sigaltstack *s = &uc->uc_stack; #if defined(DONE_MORE_SIGALTSTACK_WORK) struct sigacts *psp; struct sigaltstack *sf; #endif PROC_LOCK(p); #if defined(DONE_MORE_SIGALTSTACK_WORK) psp = p->p_sigacts; sf = &p->p_sigstk; #endif memset(uc, 0, sizeof(struct svr4_ucontext)); uc->uc_link = p->p_emuldata; /* * Set the general purpose registers */ #ifdef VM86 if (tf->tf_eflags & PSL_VM) { r[SVR4_X86_GS] = tf->tf_vm86_gs; r[SVR4_X86_FS] = tf->tf_vm86_fs; r[SVR4_X86_ES] = tf->tf_vm86_es; r[SVR4_X86_DS] = tf->tf_vm86_ds; r[SVR4_X86_EFL] = get_vflags(td); } else #endif { #if defined(__NetBSD__) __asm("movl %%gs,%w0" : "=r" (r[SVR4_X86_GS])); __asm("movl %%fs,%w0" : "=r" (r[SVR4_X86_FS])); #else r[SVR4_X86_GS] = rgs(); r[SVR4_X86_FS] = tf->tf_fs; #endif r[SVR4_X86_ES] = tf->tf_es; r[SVR4_X86_DS] = tf->tf_ds; r[SVR4_X86_EFL] = tf->tf_eflags; } r[SVR4_X86_EDI] = tf->tf_edi; r[SVR4_X86_ESI] = tf->tf_esi; r[SVR4_X86_EBP] = tf->tf_ebp; r[SVR4_X86_ESP] = tf->tf_esp; r[SVR4_X86_EBX] = tf->tf_ebx; r[SVR4_X86_EDX] = tf->tf_edx; r[SVR4_X86_ECX] = tf->tf_ecx; r[SVR4_X86_EAX] = tf->tf_eax; r[SVR4_X86_TRAPNO] = tf->tf_trapno; r[SVR4_X86_ERR] = tf->tf_err; r[SVR4_X86_EIP] = tf->tf_eip; r[SVR4_X86_CS] = tf->tf_cs; r[SVR4_X86_UESP] = 0; r[SVR4_X86_SS] = tf->tf_ss; /* * Set the signal stack */ #if defined(DONE_MORE_SIGALTSTACK_WORK) bsd_to_svr4_sigaltstack(sf, s); #else s->ss_sp = (void *)(((u_long) tf->tf_esp) & ~(16384 - 1)); s->ss_size = 16384; s->ss_flags = 0; #endif PROC_UNLOCK(p); /* * Set the signal mask */ bsd_to_svr4_sigset(mask, &uc->uc_sigmask); /* * Set the flags */ uc->uc_flags = SVR4_UC_SIGMASK|SVR4_UC_CPU|SVR4_UC_STACK; } /* * Set to ucontext specified. Reset signal mask and * stack state from context. * Return to previous pc and psl as specified by * context left by sendsig. Check carefully to * make sure that the user has not modified the * psl to gain improper privileges or to cause * a machine fault. */ int svr4_setcontext(td, uc) struct thread *td; struct svr4_ucontext *uc; { #if defined(DONE_MORE_SIGALTSTACK_WORK) struct sigacts *psp; #endif struct proc *p = td->td_proc; register struct trapframe *tf; svr4_greg_t *r = uc->uc_mcontext.greg; struct svr4_sigaltstack *s = &uc->uc_stack; struct sigaltstack *sf; sigset_t mask; PROC_LOCK(p); #if defined(DONE_MORE_SIGALTSTACK_WORK) psp = p->p_sigacts; #endif sf = &td->td_sigstk; /* * XXX: * Should we check the value of flags to determine what to restore? * What to do with uc_link? * What to do with floating point stuff? * Should we bother with the rest of the registers that we * set to 0 right now? */ if ((uc->uc_flags & SVR4_UC_CPU) == 0) { PROC_UNLOCK(p); return 0; } DPRINTF(("svr4_setcontext(%d)\n", p->p_pid)); tf = td->td_frame; /* * Restore register context. */ #ifdef VM86 #warning "VM86 doesn't work yet, please don't try to use it." if (r[SVR4_X86_EFL] & PSL_VM) { tf->tf_vm86_gs = r[SVR4_X86_GS]; tf->tf_vm86_fs = r[SVR4_X86_FS]; tf->tf_vm86_es = r[SVR4_X86_ES]; tf->tf_vm86_ds = r[SVR4_X86_DS]; set_vflags(td, r[SVR4_X86_EFL]); } else #endif { /* * Check for security violations. If we're returning to * protected mode, the CPU will validate the segment registers * automatically and generate a trap on violations. We handle * the trap, rather than doing all of the checking here. */ if (((r[SVR4_X86_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) != 0 || !USERMODE(r[SVR4_X86_CS], r[SVR4_X86_EFL])) { PROC_UNLOCK(p); return (EINVAL); } #if defined(__NetBSD__) /* %fs and %gs were restored by the trampoline. */ #else /* %gs was restored by the trampoline. */ tf->tf_fs = r[SVR4_X86_FS]; #endif tf->tf_es = r[SVR4_X86_ES]; tf->tf_ds = r[SVR4_X86_DS]; tf->tf_eflags = r[SVR4_X86_EFL]; } tf->tf_edi = r[SVR4_X86_EDI]; tf->tf_esi = r[SVR4_X86_ESI]; tf->tf_ebp = r[SVR4_X86_EBP]; tf->tf_ebx = r[SVR4_X86_EBX]; tf->tf_edx = r[SVR4_X86_EDX]; tf->tf_ecx = r[SVR4_X86_ECX]; tf->tf_eax = r[SVR4_X86_EAX]; tf->tf_trapno = r[SVR4_X86_TRAPNO]; tf->tf_err = r[SVR4_X86_ERR]; tf->tf_eip = r[SVR4_X86_EIP]; tf->tf_cs = r[SVR4_X86_CS]; tf->tf_ss = r[SVR4_X86_SS]; tf->tf_esp = r[SVR4_X86_ESP]; p->p_emuldata = uc->uc_link; /* * restore signal stack */ if (uc->uc_flags & SVR4_UC_STACK) { svr4_to_bsd_sigaltstack(s, sf); } /* * restore signal mask */ if (uc->uc_flags & SVR4_UC_SIGMASK) { #if defined(DEBUG_SVR4) { int i; for (i = 0; i < 4; i++) DPRINTF(("\tuc_sigmask[%d] = %lx\n", i, uc->uc_sigmask.bits[i])); } #endif svr4_to_bsd_sigset(&uc->uc_sigmask, &mask); SIG_CANTMASK(mask); td->td_sigmask = mask; signotify(td); } PROC_UNLOCK(p); return 0; /*EJUSTRETURN;*/ } static void svr4_getsiginfo(si, sig, code, addr) union svr4_siginfo *si; int sig; u_long code; caddr_t addr; { si->svr4_si_signo = bsd_to_svr4_sig[sig]; si->svr4_si_errno = 0; si->svr4_si_addr = addr; switch (code) { case T_PRIVINFLT: si->svr4_si_code = SVR4_ILL_PRVOPC; si->svr4_si_trap = SVR4_T_PRIVINFLT; break; case T_BPTFLT: si->svr4_si_code = SVR4_TRAP_BRKPT; si->svr4_si_trap = SVR4_T_BPTFLT; break; case T_ARITHTRAP: si->svr4_si_code = SVR4_FPE_INTOVF; si->svr4_si_trap = SVR4_T_DIVIDE; break; case T_PROTFLT: si->svr4_si_code = SVR4_SEGV_ACCERR; si->svr4_si_trap = SVR4_T_PROTFLT; break; case T_TRCTRAP: si->svr4_si_code = SVR4_TRAP_TRACE; si->svr4_si_trap = SVR4_T_TRCTRAP; break; case T_PAGEFLT: si->svr4_si_code = SVR4_SEGV_ACCERR; si->svr4_si_trap = SVR4_T_PAGEFLT; break; case T_ALIGNFLT: si->svr4_si_code = SVR4_BUS_ADRALN; si->svr4_si_trap = SVR4_T_ALIGNFLT; break; case T_DIVIDE: si->svr4_si_code = SVR4_FPE_FLTDIV; si->svr4_si_trap = SVR4_T_DIVIDE; break; case T_OFLOW: si->svr4_si_code = SVR4_FPE_FLTOVF; si->svr4_si_trap = SVR4_T_DIVIDE; break; case T_BOUND: si->svr4_si_code = SVR4_FPE_FLTSUB; si->svr4_si_trap = SVR4_T_BOUND; break; case T_DNA: si->svr4_si_code = SVR4_FPE_FLTINV; si->svr4_si_trap = SVR4_T_DNA; break; case T_FPOPFLT: si->svr4_si_code = SVR4_FPE_FLTINV; si->svr4_si_trap = SVR4_T_FPOPFLT; break; case T_SEGNPFLT: si->svr4_si_code = SVR4_SEGV_MAPERR; si->svr4_si_trap = SVR4_T_SEGNPFLT; break; case T_STKFLT: si->svr4_si_code = SVR4_ILL_BADSTK; si->svr4_si_trap = SVR4_T_STKFLT; break; default: si->svr4_si_code = 0; si->svr4_si_trap = 0; #if defined(DEBUG_SVR4) printf("sig %d code %ld\n", sig, code); /* panic("svr4_getsiginfo");*/ #endif break; } } /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * in u. to call routine. After the handler is * done svr4 will call setcontext for us * with the user context we just set up, and we * will return to the user pc, psl. */ void svr4_sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { register struct thread *td = curthread; struct proc *p = td->td_proc; register struct trapframe *tf; struct svr4_sigframe *fp, frame; struct sigacts *psp; int oonstack; int sig; int code; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; #if defined(DEBUG_SVR4) printf("svr4_sendsig(%d)\n", sig); #endif code = ksi->ksi_trapno; /* use trap No. */ psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->tf_esp); /* * Allocate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - fp = (struct svr4_sigframe *)(td->td_sigstk.ss_sp + + fp = (struct svr4_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct svr4_sigframe)); td->td_sigstk.ss_flags |= SS_ONSTACK; } else { fp = (struct svr4_sigframe *)tf->tf_esp - 1; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Build the argument list for the signal handler. * Notes: * - we always build the whole argument list, even when we * don't need to [when SA_SIGINFO is not set, we don't need * to pass all sf_si and sf_uc] * - we don't pass the correct signal address [we need to * modify many kernel files to enable that] */ svr4_getcontext(td, &frame.sf_uc, mask, oonstack); #if defined(DEBUG_SVR4) printf("obtained ucontext\n"); #endif svr4_getsiginfo(&frame.sf_si, sig, code, (caddr_t) tf->tf_eip); #if defined(DEBUG_SVR4) printf("obtained siginfo\n"); #endif frame.sf_signum = frame.sf_si.svr4_si_signo; frame.sf_sip = &fp->sf_si; frame.sf_ucp = &fp->sf_uc; frame.sf_handler = catcher; #if defined(DEBUG_SVR4) printf("sig = %d, sip %p, ucp = %p, handler = %p\n", frame.sf_signum, frame.sf_sip, frame.sf_ucp, frame.sf_handler); #endif if (copyout(&frame, fp, sizeof(frame)) != 0) { /* * Process has trashed its stack; give it an illegal * instruction to halt it in its tracks. */ PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } #if defined(__NetBSD__) /* * Build context to run handler in. */ tf->tf_es = GSEL(GUSERLDT_SEL, SEL_UPL); tf->tf_ds = GSEL(GUSERLDT_SEL, SEL_UPL); tf->tf_eip = (int)(((char *)PS_STRINGS) - svr4_szsigcode); tf->tf_cs = GSEL(GUSERLDT_SEL, SEL_UPL); tf->tf_eflags &= ~(PSL_T|PSL_VM|PSL_AC|PSL_D); tf->tf_esp = (int)fp; tf->tf_ss = GSEL(GUSERLDT_SEL, SEL_UPL); #else tf->tf_esp = (int)fp; tf->tf_eip = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); tf->tf_eflags &= ~(PSL_T | PSL_D); tf->tf_cs = _ucodesel; tf->tf_ds = _udatasel; tf->tf_es = _udatasel; tf->tf_fs = _udatasel; load_gs(_udatasel); tf->tf_ss = _udatasel; PROC_LOCK(p); mtx_lock(&psp->ps_mtx); #endif } int svr4_sys_sysarch(td, v) struct thread *td; struct svr4_sys_sysarch_args *v; { struct svr4_sys_sysarch_args *uap = v; #if 0 /* USER_LDT */ #if defined(__NetBSD__) caddr_t sg = stackgap_init(p->p_emul); #else caddr_t sg = stackgap_init(); #endif int error; #endif switch (uap->op) { case SVR4_SYSARCH_FPHW: return 0; case SVR4_SYSARCH_DSCR: #if 0 /* USER_LDT */ #warning "USER_LDT doesn't work - are you sure you want this?" { struct i386_set_ldt_args sa, *sap; struct sys_sysarch_args ua; struct svr4_ssd ssd; union descriptor bsd; if ((error = copyin(uap->a1, &ssd, sizeof(ssd))) != 0) { printf("Cannot copy arg1\n"); return error; } printf("s=%x, b=%x, l=%x, a1=%x a2=%x\n", ssd.selector, ssd.base, ssd.limit, ssd.access1, ssd.access2); /* We can only set ldt's for now. */ if (!ISLDT(ssd.selector)) { printf("Not an ldt\n"); return EPERM; } /* Oh, well we don't cleanup either */ if (ssd.access1 == 0) return 0; bsd.sd.sd_lobase = ssd.base & 0xffffff; bsd.sd.sd_hibase = (ssd.base >> 24) & 0xff; bsd.sd.sd_lolimit = ssd.limit & 0xffff; bsd.sd.sd_hilimit = (ssd.limit >> 16) & 0xf; bsd.sd.sd_type = ssd.access1 & 0x1f; bsd.sd.sd_dpl = (ssd.access1 >> 5) & 0x3; bsd.sd.sd_p = (ssd.access1 >> 7) & 0x1; bsd.sd.sd_xx = ssd.access2 & 0x3; bsd.sd.sd_def32 = (ssd.access2 >> 2) & 0x1; bsd.sd.sd_gran = (ssd.access2 >> 3)& 0x1; sa.start = IDXSEL(ssd.selector); sa.desc = stackgap_alloc(&sg, sizeof(union descriptor)); sa.num = 1; sap = stackgap_alloc(&sg, sizeof(struct i386_set_ldt_args)); if ((error = copyout(&sa, sap, sizeof(sa))) != 0) { printf("Cannot copyout args\n"); return error; } ua.op = I386_SET_LDT; ua.parms = (char *) sap; if ((error = copyout(&bsd, sa.desc, sizeof(bsd))) != 0) { printf("Cannot copyout desc\n"); return error; } return sys_sysarch(td, &ua, retval); } #endif default: printf("svr4_sysarch(%d), a1 %p\n", uap->op, uap->a1); return 0; } } Index: head/sys/mips/mips/freebsd32_machdep.c =================================================================== --- head/sys/mips/mips/freebsd32_machdep.c (revision 294929) +++ head/sys/mips/mips/freebsd32_machdep.c (revision 294930) @@ -1,489 +1,489 @@ /*- * Copyright (c) 2012 Juli Mallett * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Based on nwhitehorn's COMPAT_FREEBSD32 support code for PowerPC64. */ #include "opt_compat.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void freebsd32_exec_setregs(struct thread *, struct image_params *, u_long); static int get_mcontext32(struct thread *, mcontext32_t *, int); static int set_mcontext32(struct thread *, mcontext32_t *); static void freebsd32_sendsig(sig_t, ksiginfo_t *, sigset_t *); extern const char *freebsd32_syscallnames[]; struct sysentvec elf32_freebsd_sysvec = { .sv_size = SYS_MAXSYSCALL, .sv_table = freebsd32_sysent, .sv_mask = 0, .sv_errsize = 0, .sv_errtbl = NULL, .sv_transtrap = NULL, .sv_fixup = __elfN(freebsd_fixup), .sv_sendsig = freebsd32_sendsig, .sv_sigcode = sigcode32, .sv_szsigcode = &szsigcode32, .sv_name = "FreeBSD ELF32", .sv_coredump = __elfN(coredump), .sv_imgact_try = NULL, .sv_minsigstksz = MINSIGSTKSZ, .sv_pagesize = PAGE_SIZE, .sv_minuser = VM_MIN_ADDRESS, .sv_maxuser = ((vm_offset_t)0x80000000), .sv_usrstack = FREEBSD32_USRSTACK, .sv_psstrings = FREEBSD32_PS_STRINGS, .sv_stackprot = VM_PROT_ALL, .sv_copyout_strings = freebsd32_copyout_strings, .sv_setregs = freebsd32_exec_setregs, .sv_fixlimit = NULL, .sv_maxssiz = NULL, .sv_flags = SV_ABI_FREEBSD | SV_ILP32, .sv_set_syscall_retval = cpu_set_syscall_retval, .sv_fetch_syscall_args = cpu_fetch_syscall_args, .sv_syscallnames = freebsd32_syscallnames, .sv_schedtail = NULL, .sv_thread_detach = NULL, .sv_trap = NULL, }; INIT_SYSENTVEC(elf32_sysvec, &elf32_freebsd_sysvec); static Elf32_Brandinfo freebsd_brand_info = { .brand = ELFOSABI_FREEBSD, .machine = EM_MIPS, .compat_3_brand = "FreeBSD", .emul_path = NULL, .interp_path = "/libexec/ld-elf.so.1", .sysvec = &elf32_freebsd_sysvec, .interp_newpath = "/libexec/ld-elf32.so.1", .flags = 0 }; SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_FIRST, (sysinit_cfunc_t) elf32_insert_brand_entry, &freebsd_brand_info); static void freebsd32_exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { exec_setregs(td, imgp, stack); /* * See comment in exec_setregs about running 32-bit binaries with 64-bit * registers. */ td->td_frame->sp -= 65536; /* * Clear extended address space bit for userland. */ td->td_frame->sr &= ~MIPS_SR_UX; } int set_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; for (i = 0; i < NUMSAVEREGS; i++) r.r_regs[i] = regs->r_regs[i]; return (set_regs(td, &r)); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct reg r; unsigned i; int error; error = fill_regs(td, &r); if (error != 0) return (error); for (i = 0; i < NUMSAVEREGS; i++) regs->r_regs[i] = r.r_regs[i]; return (0); } int set_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; for (i = 0; i < NUMFPREGS; i++) fp.r_regs[i] = fpregs->r_regs[i]; return (set_fpregs(td, &fp)); } int fill_fpregs32(struct thread *td, struct fpreg32 *fpregs) { struct fpreg fp; unsigned i; int error; error = fill_fpregs(td, &fp); if (error != 0) return (error); for (i = 0; i < NUMFPREGS; i++) fpregs->r_regs[i] = fp.r_regs[i]; return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; unsigned i; int error; error = get_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_pc = mcp64.mc_pc; for (i = 0; i < 32; i++) mcp->mc_regs[i] = mcp64.mc_regs[i]; mcp->sr = mcp64.sr; mcp->mullo = mcp64.mullo; mcp->mulhi = mcp64.mulhi; mcp->mc_fpused = mcp64.mc_fpused; for (i = 0; i < 33; i++) mcp->mc_fpregs[i] = mcp64.mc_fpregs[i]; mcp->mc_fpc_eir = mcp64.mc_fpc_eir; mcp->mc_tls = (int32_t)(intptr_t)mcp64.mc_tls; return (0); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; unsigned i; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_pc = mcp->mc_pc; for (i = 0; i < 32; i++) mcp64.mc_regs[i] = mcp->mc_regs[i]; mcp64.sr = mcp->sr; mcp64.mullo = mcp->mullo; mcp64.mulhi = mcp->mulhi; mcp64.mc_fpused = mcp->mc_fpused; for (i = 0; i < 33; i++) mcp64.mc_fpregs[i] = mcp->mc_fpregs[i]; mcp64.mc_fpc_eir = mcp->mc_fpc_eir; mcp64.mc_tls = (void *)(intptr_t)mcp->mc_tls; return (set_mcontext(td, &mcp64)); } int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); #if 0 CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); #endif return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #define UCONTEXT_MAGIC 0xACEDBADE /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ static void freebsd32_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct fpreg32 fpregs; struct reg32 regs; struct sigacts *psp; struct sigframe32 sf, *sfp; int sig; int oonstack; unsigned i; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); fill_regs32(td, ®s); oonstack = sigonstack(td->td_frame->sp); /* save user context */ bzero(&sf, sizeof sf); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack.ss_sp = (int32_t)(intptr_t)td->td_sigstk.ss_sp; sf.sf_uc.uc_stack.ss_size = td->td_sigstk.ss_size; sf.sf_uc.uc_stack.ss_flags = td->td_sigstk.ss_flags; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs.r_regs[PC]; sf.sf_uc.uc_mcontext.mullo = regs.r_regs[MULLO]; sf.sf_uc.uc_mcontext.mulhi = regs.r_regs[MULHI]; sf.sf_uc.uc_mcontext.mc_tls = (int32_t)(intptr_t)td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ for (i = 1; i < 32; i++) sf.sf_uc.uc_mcontext.mc_regs[i] = regs.r_regs[i]; sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); fill_fpregs32(td, &fpregs); for (i = 0; i < 33; i++) sf.sf_uc.uc_mcontext.mc_fpregs[i] = fpregs.r_regs[i]; } /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe32 *)((vm_offset_t)(td->td_sigstk.ss_sp + + sfp = (struct sigframe32 *)(((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); } else sfp = (struct sigframe32 *)((vm_offset_t)(td->td_frame->sp - sizeof(struct sigframe32)) & ~(sizeof(__int64_t) - 1)); /* Build the argument list for the signal handler. */ td->td_frame->a0 = sig; td->td_frame->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ td->td_frame->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = td->td_frame->badvaddr; } else { /* Old FreeBSD-style arguments. */ td->td_frame->a1 = ksi->ksi_code; td->td_frame->a3 = td->td_frame->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(struct sigframe32)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); } td->td_frame->pc = (register_t)(intptr_t)catcher; td->td_frame->t9 = (register_t)(intptr_t)catcher; td->td_frame->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ td->td_frame->ra = (register_t)(intptr_t)FREEBSD32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int freebsd32_sysarch(struct thread *td, struct freebsd32_sysarch_args *uap) { int error; int32_t tlsbase; switch (uap->op) { case MIPS_SET_TLS: td->td_md.md_tls = (void *)(intptr_t)uap->parms; return (0); case MIPS_GET_TLS: tlsbase = (int32_t)(intptr_t)td->td_md.md_tls; error = copyout(&tlsbase, uap->parms, sizeof(tlsbase)); return (error); default: break; } return (EINVAL); } void elf32_dump_thread(struct thread *td __unused, void *dst __unused, size_t *off __unused) { } Index: head/sys/mips/mips/pm_machdep.c =================================================================== --- head/sys/mips/mips/pm_machdep.c (revision 294929) +++ head/sys/mips/mips/pm_machdep.c (revision 294930) @@ -1,496 +1,496 @@ /*- * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: src/sys/i386/i386/machdep.c,v 1.385.2.3 2000/05/10 02:04:46 obrien * JNPR: pm_machdep.c,v 1.9.2.1 2007/08/16 15:59:10 girish */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define UCONTEXT_MAGIC 0xACEDBADE /* * Send an interrupt to process. * * Stack is set up to allow sigcode stored * at top to call routine, followed by kcall * to sigreturn routine below. After sigreturn * resets the signal mask, the stack, and the * frame pointer, it returns to the user * specified pc, psl. */ void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct proc *p; struct thread *td; struct trapframe *regs; struct sigacts *psp; struct sigframe sf, *sfp; int sig; int oonstack; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); regs = td->td_frame; oonstack = sigonstack(regs->sp); /* save user context */ bzero(&sf, sizeof(struct sigframe)); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; sf.sf_uc.uc_mcontext.mc_pc = regs->pc; sf.sf_uc.uc_mcontext.mullo = regs->mullo; sf.sf_uc.uc_mcontext.mulhi = regs->mulhi; sf.sf_uc.uc_mcontext.mc_tls = td->td_md.md_tls; sf.sf_uc.uc_mcontext.mc_regs[0] = UCONTEXT_MAGIC; /* magic number */ bcopy((void *)®s->ast, (void *)&sf.sf_uc.uc_mcontext.mc_regs[1], sizeof(sf.sf_uc.uc_mcontext.mc_regs) - sizeof(register_t)); sf.sf_uc.uc_mcontext.mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (sf.sf_uc.uc_mcontext.mc_fpused) { /* if FPU has current state, save it first */ if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); bcopy((void *)&td->td_frame->f0, (void *)sf.sf_uc.uc_mcontext.mc_fpregs, sizeof(sf.sf_uc.uc_mcontext.mc_fpregs)); } /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe *)((vm_offset_t)(td->td_sigstk.ss_sp + + sfp = (struct sigframe *)(((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)) & ~(sizeof(__int64_t) - 1)); } else sfp = (struct sigframe *)((vm_offset_t)(regs->sp - sizeof(struct sigframe)) & ~(sizeof(__int64_t) - 1)); /* Build the argument list for the signal handler. */ regs->a0 = sig; regs->a2 = (register_t)(intptr_t)&sfp->sf_uc; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ regs->a1 = (register_t)(intptr_t)&sfp->sf_si; /* sf.sf_ahu.sf_action = (__siginfohandler_t *)catcher; */ /* fill siginfo structure */ sf.sf_si.si_signo = sig; sf.sf_si.si_code = ksi->ksi_code; sf.sf_si.si_addr = (void*)(intptr_t)regs->badvaddr; } else { /* Old FreeBSD-style arguments. */ regs->a1 = ksi->ksi_code; regs->a3 = regs->badvaddr; /* sf.sf_ahu.sf_handler = catcher; */ } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); /* * Copy the sigframe out to the user's stack. */ if (copyout(&sf, sfp, sizeof(struct sigframe)) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ PROC_LOCK(p); sigexit(td, SIGILL); } regs->pc = (register_t)(intptr_t)catcher; regs->t9 = (register_t)(intptr_t)catcher; regs->sp = (register_t)(intptr_t)sfp; /* * Signal trampoline code is at base of user stack. */ regs->ra = (register_t)(intptr_t)PS_STRINGS - *(p->p_sysent->sv_szsigcode); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } /* * System call to cleanup state after a signal * has been taken. Reset signal mask and * stack state from context left by sendsig (above). * Return to previous pc as specified by * context left by sendsig. */ int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; int error; error = copyin(uap->sigcntxp, &uc, sizeof(uc)); if (error != 0) return (error); error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->pc = (register_t) addr; return 0; } static int ptrace_read_int(struct thread *td, off_t addr, int *v) { if (proc_readmem(td, td->td_proc, addr, v, sizeof(*v)) != sizeof(*v)) return (ENOMEM); return (0); } static int ptrace_write_int(struct thread *td, off_t addr, int v) { if (proc_writemem(td, td->td_proc, addr, &v, sizeof(v)) != sizeof(v)) return (ENOMEM); return (0); } int ptrace_single_step(struct thread *td) { unsigned va; struct trapframe *locr0 = td->td_frame; int i; int bpinstr = MIPS_BREAK_SSTEP; int curinstr; struct proc *p; p = td->td_proc; PROC_UNLOCK(p); /* * Fetch what's at the current location. */ ptrace_read_int(td, (off_t)locr0->pc, &curinstr); /* compute next address after current location */ if(curinstr != 0) { va = MipsEmulateBranch(locr0, locr0->pc, locr0->fsr, (uintptr_t)&curinstr); } else { va = locr0->pc + 4; } if (td->td_md.md_ss_addr) { printf("SS %s (%d): breakpoint already set at %x (va %x)\n", p->p_comm, p->p_pid, td->td_md.md_ss_addr, va); /* XXX */ return (EFAULT); } td->td_md.md_ss_addr = va; /* * Fetch what's at the current location. */ ptrace_read_int(td, (off_t)va, &td->td_md.md_ss_instr); /* * Store breakpoint instruction at the "next" location now. */ i = ptrace_write_int (td, va, bpinstr); /* * The sync'ing of I & D caches is done by procfs_domem() * through procfs_rwmem(). */ PROC_LOCK(p); if (i < 0) return (EFAULT); #if 0 printf("SS %s (%d): breakpoint set at %x: %x (pc %x) br %x\n", p->p_comm, p->p_pid, p->p_md.md_ss_addr, p->p_md.md_ss_instr, locr0->pc, curinstr); /* XXX */ #endif return (0); } void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_regs.ra = tf->ra; pcb->pcb_regs.pc = tf->pc; pcb->pcb_regs.sp = tf->sp; } int fill_regs(struct thread *td, struct reg *regs) { memcpy(regs, td->td_frame, sizeof(struct reg)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *f; register_t sr; f = (struct trapframe *) td->td_frame; /* * Don't allow the user to change SR */ sr = f->sr; memcpy(td->td_frame, regs, sizeof(struct reg)); f->sr = sr; return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct trapframe *tp; tp = td->td_frame; PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(tp->sp); PROC_UNLOCK(curthread->td_proc); bcopy((void *)&td->td_frame->zero, (void *)&mcp->mc_regs, sizeof(mcp->mc_regs)); mcp->mc_fpused = td->td_md.md_flags & MDTD_FPUSED; if (mcp->mc_fpused) { bcopy((void *)&td->td_frame->f0, (void *)&mcp->mc_fpregs, sizeof(mcp->mc_fpregs)); } if (flags & GET_MC_CLEAR_RET) { mcp->mc_regs[V0] = 0; mcp->mc_regs[V1] = 0; mcp->mc_regs[A3] = 0; } mcp->mc_pc = td->td_frame->pc; mcp->mullo = td->td_frame->mullo; mcp->mulhi = td->td_frame->mulhi; mcp->mc_tls = td->td_md.md_tls; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tp; tp = td->td_frame; bcopy((void *)&mcp->mc_regs, (void *)&td->td_frame->zero, sizeof(mcp->mc_regs)); td->td_md.md_flags = mcp->mc_fpused & MDTD_FPUSED; if (mcp->mc_fpused) { bcopy((void *)&mcp->mc_fpregs, (void *)&td->td_frame->f0, sizeof(mcp->mc_fpregs)); } td->td_frame->pc = mcp->mc_pc; td->td_frame->mullo = mcp->mullo; td->td_frame->mulhi = mcp->mulhi; td->td_md.md_tls = mcp->mc_tls; /* Dont let user to set any bits in status and cause registers. */ return (0); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { if (td == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td); memcpy(fpregs, &td->td_frame->f0, sizeof(struct fpreg)); return 0; } int set_fpregs(struct thread *td, struct fpreg *fpregs) { if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, (struct thread *)0); memcpy(&td->td_frame->f0, fpregs, sizeof(struct fpreg)); return 0; } /* * Clear registers on exec * $sp is set to the stack pointer passed in. $pc is set to the entry * point given by the exec_package passed in, as is $t9 (used for PIC * code by the MIPS elf abi). */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { bzero((caddr_t)td->td_frame, sizeof(struct trapframe)); /* * The stack pointer has to be aligned to accommodate the largest * datatype at minimum. This probably means it should be 16-byte * aligned, but for now we're 8-byte aligning it. */ td->td_frame->sp = ((register_t) stack) & ~(sizeof(__int64_t) - 1); /* * If we're running o32 or n32 programs but have 64-bit registers, * GCC may use stack-relative addressing near the top of user * address space that, due to sign extension, will yield an * invalid address. For instance, if sp is 0x7fffff00 then GCC * might do something like this to load a word from 0x7ffffff0: * * addu sp, sp, 32768 * lw t0, -32528(sp) * * On systems with 64-bit registers, sp is sign-extended to * 0xffffffff80007f00 and the load is instead done from * 0xffffffff7ffffff0. * * To prevent this, we subtract 64K from the stack pointer here. * * For consistency, we should just always do this unless we're * running n64 programs. For now, since we don't support * COMPAT_FREEBSD32 on n64 kernels, we just do it unless we're * running n64 kernels. */ #if !defined(__mips_n64) td->td_frame->sp -= 65536; #endif td->td_frame->pc = imgp->entry_addr & ~3; td->td_frame->t9 = imgp->entry_addr & ~3; /* abicall req */ td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE | (mips_rd_status() & MIPS_SR_INT_MASK); #if defined(__mips_n32) td->td_frame->sr |= MIPS_SR_PX; #elif defined(__mips_n64) td->td_frame->sr |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX; #endif /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * and/or bits in other standard MIPS registers (if CPU-Specific) * that are needed. */ /* * Set up arguments for the rtld-capable crt0: * a0 stack pointer * a1 rtld cleanup (filled in by dynamic loader) * a2 rtld object (filled in by dynamic loader) * a3 ps_strings */ td->td_frame->a0 = (register_t) stack; td->td_frame->a1 = 0; td->td_frame->a2 = 0; td->td_frame->a3 = (register_t)imgp->ps_strings; td->td_md.md_flags &= ~MDTD_FPUSED; if (PCPU_GET(fpcurthread) == td) PCPU_SET(fpcurthread, (struct thread *)0); td->td_md.md_ss_addr = 0; } int ptrace_clear_single_step(struct thread *td) { int i; struct proc *p; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); if (!td->td_md.md_ss_addr) return EINVAL; /* * Restore original instruction and clear BP */ i = ptrace_write_int (td, td->td_md.md_ss_addr, td->td_md.md_ss_instr); /* The sync'ing of I & D caches is done by procfs_domem(). */ if (i < 0) { log(LOG_ERR, "SS %s %d: can't restore instruction at %x: %x\n", p->p_comm, p->p_pid, td->td_md.md_ss_addr, td->td_md.md_ss_instr); } td->td_md.md_ss_addr = 0; return 0; } Index: head/sys/mips/mips/vm_machdep.c =================================================================== --- head/sys/mips/mips/vm_machdep.c (revision 294929) +++ head/sys/mips/mips/vm_machdep.c (revision 294930) @@ -1,625 +1,625 @@ /*- * Copyright (c) 1982, 1986 The Regents of the University of California. * Copyright (c) 1989, 1990 William Jolitz * Copyright (c) 1994 John Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department, and William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_machdep.c 7.3 (Berkeley) 5/13/91 * Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$ * from: src/sys/i386/i386/vm_machdep.c,v 1.132.2.2 2000/08/26 04:19:26 yokota * JNPR: vm_machdep.c,v 1.8.2.2 2007/08/16 15:59:17 girish */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Duplicated from asm.h */ #if defined(__mips_o32) #define SZREG 4 #else #define SZREG 8 #endif #if defined(__mips_o32) || defined(__mips_o64) #define CALLFRAME_SIZ (SZREG * (4 + 2)) #elif defined(__mips_n32) || defined(__mips_n64) #define CALLFRAME_SIZ (SZREG * 4) #endif /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(register struct thread *td1,register struct proc *p2, struct thread *td2,int flags) { register struct proc *p1; struct pcb *pcb2; p1 = td1->td_proc; if ((flags & RFPROC) == 0) return; /* It is assumed that the vm_thread_alloc called * cpu_thread_alloc() before cpu_fork is called. */ /* Point the pcb to the top of the stack */ pcb2 = td2->td_pcb; /* Copy p1's pcb, note that in this case * our pcb also includes the td_frame being copied * too. The older mips2 code did an additional copy * of the td_frame, for us that's not needed any * longer (this copy does them both) */ bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); /* Point mdproc and then copy over td1's contents * md_proc is empty for MIPS */ td2->td_md.md_flags = td1->td_md.md_flags & MDTD_FPUSED; /* * Set up return-value registers as fork() libc stub expects. */ td2->td_frame->v0 = 0; td2->td_frame->v1 = 1; td2->td_frame->a3 = 0; if (td1 == PCPU_GET(fpcurthread)) MipsSaveCurFPState(td1); pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td2->td_pcb & ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td2; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td2->td_frame; pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & (MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * and/or bits in other standard MIPS registers (if CPU-Specific) * that are needed. */ td2->td_md.md_tls = td1->td_md.md_tls; td2->td_md.md_saved_intr = MIPS_SR_INT_IE; td2->td_md.md_spinlock_count = 1; #ifdef CPU_CNMIPS if (td1->td_md.md_flags & MDTD_COP2USED) { if (td1->td_md.md_cop2owner == COP2_OWNER_USERLAND) { if (td1->td_md.md_ucop2) octeon_cop2_save(td1->td_md.md_ucop2); else panic("cpu_fork: ucop2 is NULL but COP2 is enabled"); } else { if (td1->td_md.md_cop2) octeon_cop2_save(td1->td_md.md_cop2); else panic("cpu_fork: cop2 is NULL but COP2 is enabled"); } } if (td1->td_md.md_cop2) { td2->td_md.md_cop2 = octeon_cop2_alloc_ctx(); memcpy(td2->td_md.md_cop2, td1->td_md.md_cop2, sizeof(*td1->td_md.md_cop2)); } if (td1->td_md.md_ucop2) { td2->td_md.md_ucop2 = octeon_cop2_alloc_ctx(); memcpy(td2->td_md.md_ucop2, td1->td_md.md_ucop2, sizeof(*td1->td_md.md_ucop2)); } td2->td_md.md_cop2owner = td1->td_md.md_cop2owner; pcb2->pcb_context[PCB_REG_SR] |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX; /* Clear COP2 bits for userland & kernel */ td2->td_frame->sr &= ~MIPS_SR_COP_2_BIT; pcb2->pcb_context[PCB_REG_SR] &= ~MIPS_SR_COP_2_BIT; #endif } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_set_fork_handler(struct thread *td, void (*func) __P((void *)), void *arg) { /* * Note that the trap frame follows the args, so the function * is really called like this: func(arg, frame); */ td->td_pcb->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)func; td->td_pcb->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)arg; } void cpu_exit(struct thread *td) { } void cpu_thread_exit(struct thread *td) { if (PCPU_GET(fpcurthread) == td) PCPU_GET(fpcurthread) = (struct thread *)0; #ifdef CPU_CNMIPS if (td->td_md.md_cop2) memset(td->td_md.md_cop2, 0, sizeof(*td->td_md.md_cop2)); if (td->td_md.md_ucop2) memset(td->td_md.md_ucop2, 0, sizeof(*td->td_md.md_ucop2)); #endif } void cpu_thread_free(struct thread *td) { #ifdef CPU_CNMIPS if (td->td_md.md_cop2) octeon_cop2_free_ctx(td->td_md.md_cop2); if (td->td_md.md_ucop2) octeon_cop2_free_ctx(td->td_md.md_ucop2); td->td_md.md_cop2 = NULL; td->td_md.md_ucop2 = NULL; #endif } void cpu_thread_clean(struct thread *td) { } void cpu_thread_swapin(struct thread *td) { pt_entry_t *pte; int i; /* * The kstack may be at a different physical address now. * Cache the PTEs for the Kernel stack in the machine dependent * part of the thread struct so cpu_switch() can quickly map in * the pcb struct and kernel stack. */ for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } } void cpu_thread_swapout(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { pt_entry_t *pte; int i; KASSERT((td->td_kstack & (1 << PAGE_SHIFT)) == 0, ("kernel stack must be aligned.")); td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = &td->td_pcb->pcb_regs; for (i = 0; i < KSTACK_PAGES; i++) { pte = pmap_pte(kernel_pmap, td->td_kstack + i * PAGE_SIZE); td->td_md.md_upte[i] = *pte & ~TLBLO_SWBITS_MASK; } } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *locr0 = td->td_frame; unsigned int code; int quad_syscall; code = locr0->v0; quad_syscall = 0; #if defined(__mips_n32) || defined(__mips_n64) #ifdef COMPAT_FREEBSD32 if (code == SYS___syscall && SV_PROC_FLAG(td->td_proc, SV_ILP32)) quad_syscall = 1; #endif #else if (code == SYS___syscall) quad_syscall = 1; #endif if (code == SYS_syscall) code = locr0->a0; else if (code == SYS___syscall) { if (quad_syscall) code = _QUAD_LOWWORD ? locr0->a1 : locr0->a0; else code = locr0->a0; } switch (error) { case 0: if (quad_syscall && code != SYS_lseek) { /* * System call invoked through the * SYS___syscall interface but the * return value is really just 32 * bits. */ locr0->v0 = td->td_retval[0]; if (_QUAD_LOWWORD) locr0->v1 = td->td_retval[0]; locr0->a3 = 0; } else { locr0->v0 = td->td_retval[0]; locr0->v1 = td->td_retval[1]; locr0->a3 = 0; } break; case ERESTART: locr0->pc = td->td_pcb->pcb_tpc; break; case EJUSTRETURN: break; /* nothing to do */ default: if (quad_syscall && code != SYS_lseek) { locr0->v0 = error; if (_QUAD_LOWWORD) locr0->v1 = error; locr0->a3 = 1; } else { locr0->v0 = error; locr0->a3 = 1; } } } /* * Initialize machine state (pcb and trap frame) for a new thread about to * upcall. Put enough state in the new thread's PCB to get it to go back * userret(), where we can intercept it again to set the return (upcall) * Address and stack, along with those from upcalls that are from other sources * such as those generated in thread_userret() itself. */ void cpu_set_upcall(struct thread *td, struct thread *td0) { struct pcb *pcb2; /* Point the pcb to the top of the stack. */ pcb2 = td->td_pcb; /* * Copy the upcall pcb. This loads kernel regs. * Those not loaded individually below get their default * values here. * * XXXKSE It might be a good idea to simply skip this as * the values of the other registers may be unimportant. * This would remove any requirement for knowing the KSE * at this time (see the matching comment below for * more analysis) (need a good safe default). * In MIPS, the trapframe is the first element of the PCB * and gets copied when we copy the PCB. No separate copy * is needed. */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); /* * Set registers for trampoline to user mode. */ pcb2->pcb_context[PCB_REG_RA] = (register_t)(intptr_t)fork_trampoline; /* Make sp 64-bit aligned */ pcb2->pcb_context[PCB_REG_SP] = (register_t)(((vm_offset_t)td->td_pcb & ~(sizeof(__int64_t) - 1)) - CALLFRAME_SIZ); pcb2->pcb_context[PCB_REG_S0] = (register_t)(intptr_t)fork_return; pcb2->pcb_context[PCB_REG_S1] = (register_t)(intptr_t)td; pcb2->pcb_context[PCB_REG_S2] = (register_t)(intptr_t)td->td_frame; /* Dont set IE bit in SR. sched lock release will take care of it */ pcb2->pcb_context[PCB_REG_SR] = mips_rd_status() & (MIPS_SR_PX | MIPS_SR_KX | MIPS_SR_UX | MIPS_SR_INT_MASK); /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * that are needed. */ /* Setup to release spin count in in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_intr = MIPS_SR_INT_IE; #if 0 /* Maybe we need to fix this? */ td->td_md.md_saved_sr = ( (MIPS_SR_COP_2_BIT | MIPS_SR_COP_0_BIT) | (MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX | MIPS_SR_SX) | (MIPS_SR_INT_IE | MIPS_HARD_INT_MASK)); #endif } /* * Set that machine state for performing an upcall that has to * be done in thread_userret() so that those upcalls generated * in thread_userret() itself can be done as well. */ void cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; register_t sp; /* * At the point where a function is called, sp must be 8 * byte aligned[for compatibility with 64-bit CPUs] * in ``See MIPS Run'' by D. Sweetman, p. 269 * align stack */ - sp = ((register_t)(intptr_t)(stack->ss_sp + stack->ss_size) & ~0x7) - + sp = (((intptr_t)stack->ss_sp + stack->ss_size) & ~0x7) - CALLFRAME_SIZ; /* * Set the trap frame to point at the beginning of the uts * function. */ tf = td->td_frame; bzero(tf, sizeof(struct trapframe)); tf->sp = sp; tf->pc = (register_t)(intptr_t)entry; /* * MIPS ABI requires T9 to be the same as PC * in subroutine entry point */ tf->t9 = (register_t)(intptr_t)entry; tf->a0 = (register_t)(intptr_t)arg; /* * Keep interrupt mask */ td->td_frame->sr = MIPS_SR_KSU_USER | MIPS_SR_EXL | MIPS_SR_INT_IE | (mips_rd_status() & MIPS_SR_INT_MASK); #if defined(__mips_n32) td->td_frame->sr |= MIPS_SR_PX; #elif defined(__mips_n64) td->td_frame->sr |= MIPS_SR_PX | MIPS_SR_UX | MIPS_SR_KX; #endif /* tf->sr |= (ALL_INT_MASK & idle_mask) | SR_INT_ENAB; */ /**XXX the above may now be wrong -- mips2 implements this as panic */ /* * FREEBSD_DEVELOPERS_FIXME: * Setup any other CPU-Specific registers (Not MIPS Standard) * that are needed. */ } /* * Implement the pre-zeroed page mechanism. * This routine is called from the idle loop. */ #define ZIDLE_LO(v) ((v) * 2 / 3) #define ZIDLE_HI(v) ((v) * 4 / 5) /* * Software interrupt handler for queued VM system processing. */ void swi_vm(void *dummy) { if (busdma_swi_pending) busdma_swi(); } int cpu_set_user_tls(struct thread *td, void *tls_base) { td->td_md.md_tls = (char*)tls_base; return (0); } #ifdef DDB #include #define DB_PRINT_REG(ptr, regname) \ db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->regname)) #define DB_PRINT_REG_ARRAY(ptr, arrname, regname) \ db_printf(" %-12s %p\n", #regname, (void *)(intptr_t)((ptr)->arrname[regname])) static void dump_trapframe(struct trapframe *trapframe) { db_printf("Trapframe at %p\n", trapframe); DB_PRINT_REG(trapframe, zero); DB_PRINT_REG(trapframe, ast); DB_PRINT_REG(trapframe, v0); DB_PRINT_REG(trapframe, v1); DB_PRINT_REG(trapframe, a0); DB_PRINT_REG(trapframe, a1); DB_PRINT_REG(trapframe, a2); DB_PRINT_REG(trapframe, a3); #if defined(__mips_n32) || defined(__mips_n64) DB_PRINT_REG(trapframe, a4); DB_PRINT_REG(trapframe, a5); DB_PRINT_REG(trapframe, a6); DB_PRINT_REG(trapframe, a7); DB_PRINT_REG(trapframe, t0); DB_PRINT_REG(trapframe, t1); DB_PRINT_REG(trapframe, t2); DB_PRINT_REG(trapframe, t3); #else DB_PRINT_REG(trapframe, t0); DB_PRINT_REG(trapframe, t1); DB_PRINT_REG(trapframe, t2); DB_PRINT_REG(trapframe, t3); DB_PRINT_REG(trapframe, t4); DB_PRINT_REG(trapframe, t5); DB_PRINT_REG(trapframe, t6); DB_PRINT_REG(trapframe, t7); #endif DB_PRINT_REG(trapframe, s0); DB_PRINT_REG(trapframe, s1); DB_PRINT_REG(trapframe, s2); DB_PRINT_REG(trapframe, s3); DB_PRINT_REG(trapframe, s4); DB_PRINT_REG(trapframe, s5); DB_PRINT_REG(trapframe, s6); DB_PRINT_REG(trapframe, s7); DB_PRINT_REG(trapframe, t8); DB_PRINT_REG(trapframe, t9); DB_PRINT_REG(trapframe, k0); DB_PRINT_REG(trapframe, k1); DB_PRINT_REG(trapframe, gp); DB_PRINT_REG(trapframe, sp); DB_PRINT_REG(trapframe, s8); DB_PRINT_REG(trapframe, ra); DB_PRINT_REG(trapframe, sr); DB_PRINT_REG(trapframe, mullo); DB_PRINT_REG(trapframe, mulhi); DB_PRINT_REG(trapframe, badvaddr); DB_PRINT_REG(trapframe, cause); DB_PRINT_REG(trapframe, pc); } DB_SHOW_COMMAND(pcb, ddb_dump_pcb) { struct thread *td; struct pcb *pcb; struct trapframe *trapframe; /* Determine which thread to examine. */ if (have_addr) td = db_lookup_thread(addr, true); else td = curthread; pcb = td->td_pcb; db_printf("Thread %d at %p\n", td->td_tid, td); db_printf("PCB at %p\n", pcb); trapframe = &pcb->pcb_regs; dump_trapframe(trapframe); db_printf("PCB Context:\n"); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S0); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S1); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S2); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S3); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S4); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S5); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S6); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S7); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SP); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_S8); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_RA); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_SR); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_GP); DB_PRINT_REG_ARRAY(pcb, pcb_context, PCB_REG_PC); db_printf("PCB onfault = %p\n", pcb->pcb_onfault); db_printf("md_saved_intr = 0x%0lx\n", (long)td->td_md.md_saved_intr); db_printf("md_spinlock_count = %d\n", td->td_md.md_spinlock_count); if (td->td_frame != trapframe) { db_printf("td->td_frame %p is not the same as pcb_regs %p\n", td->td_frame, trapframe); } } /* * Dump the trapframe beginning at address specified by first argument. */ DB_SHOW_COMMAND(trapframe, ddb_dump_trapframe) { if (!have_addr) return; dump_trapframe((struct trapframe *)addr); } #endif /* DDB */ Index: head/sys/powerpc/powerpc/exec_machdep.c =================================================================== --- head/sys/powerpc/powerpc/exec_machdep.c (revision 294929) +++ head/sys/powerpc/powerpc/exec_machdep.c (revision 294930) @@ -1,1068 +1,1068 @@ /*- * Copyright (C) 1995, 1996 Wolfgang Solfrank. * Copyright (C) 1995, 1996 TooLs GmbH. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (C) 2001 Benno Rice * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * $NetBSD: machdep.c,v 1.74.2.1 2000/11/01 16:13:48 tv Exp $ */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_fpu_emu.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FPU_EMU #include #endif #ifdef COMPAT_FREEBSD32 #include #include #include typedef struct __ucontext32 { sigset_t uc_sigmask; mcontext32_t uc_mcontext; uint32_t uc_link; struct sigaltstack32 uc_stack; uint32_t uc_flags; uint32_t __spare__[4]; } ucontext32_t; struct sigframe32 { ucontext32_t sf_uc; struct siginfo32 sf_si; }; static int grab_mcontext32(struct thread *td, mcontext32_t *, int flags); #endif static int grab_mcontext(struct thread *, mcontext_t *, int); void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigacts *psp; struct sigframe sf; struct thread *td; struct proc *p; #ifdef COMPAT_FREEBSD32 struct siginfo32 siginfo32; struct sigframe32 sf32; #endif size_t sfpsize; caddr_t sfp, usfp; int oonstack, rndfsize; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; oonstack = sigonstack(tf->fixreg[1]); /* * Fill siginfo structure. */ ksi->ksi_info.si_signo = ksi->ksi_signo; ksi->ksi_info.si_addr = (void *)((tf->exc == EXC_DSI) ? tf->dar : tf->srr0); #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32)) { siginfo_to_siginfo32(&ksi->ksi_info, &siginfo32); sig = siginfo32.si_signo; code = siginfo32.si_code; sfp = (caddr_t)&sf32; sfpsize = sizeof(sf32); rndfsize = ((sizeof(sf32) + 15) / 16) * 16; /* * Save user context */ memset(&sf32, 0, sizeof(sf32)); grab_mcontext32(td, &sf32.sf_uc.uc_mcontext, 0); sf32.sf_uc.uc_sigmask = *mask; sf32.sf_uc.uc_stack.ss_sp = (uintptr_t)td->td_sigstk.ss_sp; sf32.sf_uc.uc_stack.ss_size = (uint32_t)td->td_sigstk.ss_size; sf32.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf32.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; } else { #endif sig = ksi->ksi_signo; code = ksi->ksi_code; sfp = (caddr_t)&sf; sfpsize = sizeof(sf); #ifdef __powerpc64__ /* * 64-bit PPC defines a 288 byte scratch region * below the stack. */ rndfsize = 288 + ((sizeof(sf) + 47) / 48) * 48; #else rndfsize = ((sizeof(sf) + 15) / 16) * 16; #endif /* * Save user context */ memset(&sf, 0, sizeof(sf)); grab_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; sf.sf_uc.uc_mcontext.mc_onstack = (oonstack) ? 1 : 0; #ifdef COMPAT_FREEBSD32 } #endif CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* * Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - usfp = (void *)(td->td_sigstk.ss_sp + + usfp = (void *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - rndfsize); } else { usfp = (void *)(tf->fixreg[1] - rndfsize); } /* * Save the floating-point state, if necessary, then copy it. */ /* XXX */ /* * Set up the registers to return to sigcode. * * r1/sp - sigframe ptr * lr - sig function, dispatched to by blrl in trampoline * r3 - sig number * r4 - SIGINFO ? &siginfo : exception code * r5 - user context * srr0 - trampoline function addr */ tf->lr = (register_t)catcher; tf->fixreg[1] = (register_t)usfp; tf->fixreg[FIRSTARG] = sig; #ifdef COMPAT_FREEBSD32 tf->fixreg[FIRSTARG+2] = (register_t)usfp + ((SV_PROC_FLAG(p, SV_ILP32)) ? offsetof(struct sigframe32, sf_uc) : offsetof(struct sigframe, sf_uc)); #else tf->fixreg[FIRSTARG+2] = (register_t)usfp + offsetof(struct sigframe, sf_uc); #endif if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* * Signal handler installed with SA_SIGINFO. */ #ifdef COMPAT_FREEBSD32 if (SV_PROC_FLAG(p, SV_ILP32)) { sf32.sf_si = siginfo32; tf->fixreg[FIRSTARG+1] = (register_t)usfp + offsetof(struct sigframe32, sf_si); sf32.sf_si = siginfo32; } else { #endif tf->fixreg[FIRSTARG+1] = (register_t)usfp + offsetof(struct sigframe, sf_si); sf.sf_si = ksi->ksi_info; #ifdef COMPAT_FREEBSD32 } #endif } else { /* Old FreeBSD-style arguments. */ tf->fixreg[FIRSTARG+1] = code; tf->fixreg[FIRSTARG+3] = (tf->exc == EXC_DSI) ? tf->dar : tf->srr0; } mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); tf->srr0 = (register_t)p->p_sysent->sv_sigcode_base; /* * copy the frame out to userland. */ if (copyout(sfp, usfp, sfpsize) != 0) { /* * Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); } CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->srr0, tf->fixreg[1]); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } #ifdef COMPAT_FREEBSD4 int freebsd4_sigreturn(struct thread *td, struct freebsd4_sigreturn_args *uap) { return sys_sigreturn(td, (struct sigreturn_args *)uap); } #endif /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_lr = tf->srr0; pcb->pcb_sp = tf->fixreg[1]; } /* * get_mcontext/sendsig helper routine that doesn't touch the * proc lock */ static int grab_mcontext(struct thread *td, mcontext_t *mcp, int flags) { struct pcb *pcb; int i; pcb = td->td_pcb; memset(mcp, 0, sizeof(mcontext_t)); mcp->mc_vers = _MC_VERSION; mcp->mc_flags = 0; memcpy(&mcp->mc_frame, td->td_frame, sizeof(struct trapframe)); if (flags & GET_MC_CLEAR_RET) { mcp->mc_gpr[3] = 0; mcp->mc_gpr[4] = 0; } /* * This assumes that floating-point context is *not* lazy, * so if the thread has used FP there would have been a * FP-unavailable exception that would have set things up * correctly. */ if (pcb->pcb_flags & PCB_FPREGS) { if (pcb->pcb_flags & PCB_FPU) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_fpu(td); critical_exit(); } mcp->mc_flags |= _MC_FP_VALID; memcpy(&mcp->mc_fpscr, &pcb->pcb_fpu.fpscr, sizeof(double)); for (i = 0; i < 32; i++) memcpy(&mcp->mc_fpreg[i], &pcb->pcb_fpu.fpr[i].fpr, sizeof(double)); } if (pcb->pcb_flags & PCB_VSX) { for (i = 0; i < 32; i++) memcpy(&mcp->mc_vsxfpreg[i], &pcb->pcb_fpu.fpr[i].vsr[2], sizeof(double)); } /* * Repeat for Altivec context */ if (pcb->pcb_flags & PCB_VEC) { KASSERT(td == curthread, ("get_mcontext: fp save not curthread")); critical_enter(); save_vec(td); critical_exit(); mcp->mc_flags |= _MC_AV_VALID; mcp->mc_vscr = pcb->pcb_vec.vscr; mcp->mc_vrsave = pcb->pcb_vec.vrsave; memcpy(mcp->mc_avec, pcb->pcb_vec.vr, sizeof(mcp->mc_avec)); } mcp->mc_len = sizeof(*mcp); return (0); } int get_mcontext(struct thread *td, mcontext_t *mcp, int flags) { int error; error = grab_mcontext(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct pcb *pcb; struct trapframe *tf; register_t tls; int i; pcb = td->td_pcb; tf = td->td_frame; if (mcp->mc_vers != _MC_VERSION || mcp->mc_len != sizeof(*mcp)) return (EINVAL); /* * Don't let the user set privileged MSR bits */ if ((mcp->mc_srr1 & PSL_USERSTATIC) != (tf->srr1 & PSL_USERSTATIC)) { return (EINVAL); } /* Copy trapframe, preserving TLS pointer across context change */ if (SV_PROC_FLAG(td->td_proc, SV_LP64)) tls = tf->fixreg[13]; else tls = tf->fixreg[2]; memcpy(tf, mcp->mc_frame, sizeof(mcp->mc_frame)); if (SV_PROC_FLAG(td->td_proc, SV_LP64)) tf->fixreg[13] = tls; else tf->fixreg[2] = tls; if (mcp->mc_flags & _MC_FP_VALID) { /* enable_fpu() will happen lazily on a fault */ pcb->pcb_flags |= PCB_FPREGS; memcpy(&pcb->pcb_fpu.fpscr, &mcp->mc_fpscr, sizeof(double)); bzero(pcb->pcb_fpu.fpr, sizeof(pcb->pcb_fpu.fpr)); for (i = 0; i < 32; i++) { memcpy(&pcb->pcb_fpu.fpr[i].fpr, &mcp->mc_fpreg[i], sizeof(double)); memcpy(&pcb->pcb_fpu.fpr[i].vsr[2], &mcp->mc_vsxfpreg[i], sizeof(double)); } } if (mcp->mc_flags & _MC_AV_VALID) { if ((pcb->pcb_flags & PCB_VEC) != PCB_VEC) { critical_enter(); enable_vec(td); critical_exit(); } pcb->pcb_vec.vscr = mcp->mc_vscr; pcb->pcb_vec.vrsave = mcp->mc_vrsave; memcpy(pcb->pcb_vec.vr, mcp->mc_avec, sizeof(mcp->mc_avec)); } return (0); } /* * Set set up registers on exec. */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; register_t argc; tf = trapframe(td); bzero(tf, sizeof *tf); #ifdef __powerpc64__ tf->fixreg[1] = -roundup(-stack + 48, 16); #else tf->fixreg[1] = -roundup(-stack + 8, 16); #endif /* * Set up arguments for _start(): * _start(argc, argv, envp, obj, cleanup, ps_strings); * * Notes: * - obj and cleanup are the auxilliary and termination * vectors. They are fixed up by ld.elf_so. * - ps_strings is a NetBSD extention, and will be * ignored by executables which are strictly * compliant with the SVR4 ABI. * * XXX We have to set both regs and retval here due to different * XXX calling convention in trap.c and init_main.c. */ /* Collect argc from the user stack */ argc = fuword((void *)stack); /* * XXX PG: these get overwritten in the syscall return code. * execve() should return EJUSTRETURN, like it does on NetBSD. * Emulate by setting the syscall return value cells. The * registers still have to be set for init's fork trampoline. */ td->td_retval[0] = argc; td->td_retval[1] = stack + sizeof(register_t); tf->fixreg[3] = argc; tf->fixreg[4] = stack + sizeof(register_t); tf->fixreg[5] = stack + (2 + argc)*sizeof(register_t); tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)imgp->ps_strings; /* NetBSD extension */ tf->srr0 = imgp->entry_addr; #ifdef __powerpc64__ tf->fixreg[12] = imgp->entry_addr; tf->srr1 = PSL_SF | PSL_USERSET | PSL_FE_DFLT; if (mfmsr() & PSL_HV) tf->srr1 |= PSL_HV; #else tf->srr1 = PSL_USERSET | PSL_FE_DFLT; #endif td->td_pcb->pcb_flags = 0; } #ifdef COMPAT_FREEBSD32 void ppc32_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; uint32_t argc; tf = trapframe(td); bzero(tf, sizeof *tf); tf->fixreg[1] = -roundup(-stack + 8, 16); argc = fuword32((void *)stack); td->td_retval[0] = argc; td->td_retval[1] = stack + sizeof(uint32_t); tf->fixreg[3] = argc; tf->fixreg[4] = stack + sizeof(uint32_t); tf->fixreg[5] = stack + (2 + argc)*sizeof(uint32_t); tf->fixreg[6] = 0; /* auxillary vector */ tf->fixreg[7] = 0; /* termination vector */ tf->fixreg[8] = (register_t)imgp->ps_strings; /* NetBSD extension */ tf->srr0 = imgp->entry_addr; tf->srr1 = PSL_USERSET | PSL_FE_DFLT; tf->srr1 &= ~PSL_SF; if (mfmsr() & PSL_HV) tf->srr1 |= PSL_HV; td->td_pcb->pcb_flags = 0; } #endif int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(regs, tf, sizeof(struct reg)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_flags & PCB_FPREGS) == 0) memset(fpregs, 0, sizeof(struct fpreg)); else memcpy(fpregs, &pcb->pcb_fpu, sizeof(struct fpreg)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; tf = td->td_frame; memcpy(tf, regs, sizeof(struct reg)); return (0); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { /* No debug registers on PowerPC */ return (ENOSYS); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct pcb *pcb; pcb = td->td_pcb; pcb->pcb_flags |= PCB_FPREGS; memcpy(&pcb->pcb_fpu, fpregs, sizeof(struct fpreg)); return (0); } #ifdef COMPAT_FREEBSD32 int set_regs32(struct thread *td, struct reg32 *regs) { struct trapframe *tf; int i; tf = td->td_frame; for (i = 0; i < 32; i++) tf->fixreg[i] = regs->fixreg[i]; tf->lr = regs->lr; tf->cr = regs->cr; tf->xer = regs->xer; tf->ctr = regs->ctr; tf->srr0 = regs->pc; return (0); } int fill_regs32(struct thread *td, struct reg32 *regs) { struct trapframe *tf; int i; tf = td->td_frame; for (i = 0; i < 32; i++) regs->fixreg[i] = tf->fixreg[i]; regs->lr = tf->lr; regs->cr = tf->cr; regs->xer = tf->xer; regs->ctr = tf->ctr; regs->pc = tf->srr0; return (0); } static int grab_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { mcontext_t mcp64; int i, error; error = grab_mcontext(td, &mcp64, flags); if (error != 0) return (error); mcp->mc_vers = mcp64.mc_vers; mcp->mc_flags = mcp64.mc_flags; mcp->mc_onstack = mcp64.mc_onstack; mcp->mc_len = mcp64.mc_len; memcpy(mcp->mc_avec,mcp64.mc_avec,sizeof(mcp64.mc_avec)); memcpy(mcp->mc_av,mcp64.mc_av,sizeof(mcp64.mc_av)); for (i = 0; i < 42; i++) mcp->mc_frame[i] = mcp64.mc_frame[i]; memcpy(mcp->mc_fpreg,mcp64.mc_fpreg,sizeof(mcp64.mc_fpreg)); memcpy(mcp->mc_vsxfpreg,mcp64.mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg)); return (0); } static int get_mcontext32(struct thread *td, mcontext32_t *mcp, int flags) { int error; error = grab_mcontext32(td, mcp, flags); if (error == 0) { PROC_LOCK(curthread->td_proc); mcp->mc_onstack = sigonstack(td->td_frame->fixreg[1]); PROC_UNLOCK(curthread->td_proc); } return (error); } static int set_mcontext32(struct thread *td, mcontext32_t *mcp) { mcontext_t mcp64; int i, error; mcp64.mc_vers = mcp->mc_vers; mcp64.mc_flags = mcp->mc_flags; mcp64.mc_onstack = mcp->mc_onstack; mcp64.mc_len = mcp->mc_len; memcpy(mcp64.mc_avec,mcp->mc_avec,sizeof(mcp64.mc_avec)); memcpy(mcp64.mc_av,mcp->mc_av,sizeof(mcp64.mc_av)); for (i = 0; i < 42; i++) mcp64.mc_frame[i] = mcp->mc_frame[i]; mcp64.mc_srr1 |= (td->td_frame->srr1 & 0xFFFFFFFF00000000ULL); memcpy(mcp64.mc_fpreg,mcp->mc_fpreg,sizeof(mcp64.mc_fpreg)); memcpy(mcp64.mc_vsxfpreg,mcp->mc_vsxfpreg,sizeof(mcp64.mc_vsxfpreg)); error = set_mcontext(td, &mcp64); return (error); } #endif #ifdef COMPAT_FREEBSD32 int freebsd32_sigreturn(struct thread *td, struct freebsd32_sigreturn_args *uap) { ucontext32_t uc; int error; CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } error = set_mcontext32(td, &uc.uc_mcontext); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR3(KTR_SIG, "sigreturn: return td=%p pc=%#x sp=%#x", td, uc.uc_mcontext.mc_srr0, uc.uc_mcontext.mc_gpr[1]); return (EJUSTRETURN); } /* * The first two fields of a ucontext_t are the signal mask and the machine * context. The next field is uc_link; we want to avoid destroying the link * when copying out contexts. */ #define UC32_COPY_SIZE offsetof(ucontext32_t, uc_link) int freebsd32_getcontext(struct thread *td, struct freebsd32_getcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->ucp, UC32_COPY_SIZE); } return (ret); } int freebsd32_setcontext(struct thread *td, struct freebsd32_setcontext_args *uap) { ucontext32_t uc; int ret; if (uap->ucp == NULL) ret = EINVAL; else { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } return (ret == 0 ? EJUSTRETURN : ret); } int freebsd32_swapcontext(struct thread *td, struct freebsd32_swapcontext_args *uap) { ucontext32_t uc; int ret; if (uap->oucp == NULL || uap->ucp == NULL) ret = EINVAL; else { get_mcontext32(td, &uc.uc_mcontext, GET_MC_CLEAR_RET); PROC_LOCK(td->td_proc); uc.uc_sigmask = td->td_sigmask; PROC_UNLOCK(td->td_proc); ret = copyout(&uc, uap->oucp, UC32_COPY_SIZE); if (ret == 0) { ret = copyin(uap->ucp, &uc, UC32_COPY_SIZE); if (ret == 0) { ret = set_mcontext32(td, &uc.uc_mcontext); if (ret == 0) { kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); } } } } return (ret == 0 ? EJUSTRETURN : ret); } #endif void cpu_set_syscall_retval(struct thread *td, int error) { struct proc *p; struct trapframe *tf; int fixup; if (error == EJUSTRETURN) return; p = td->td_proc; tf = td->td_frame; if (tf->fixreg[0] == SYS___syscall && (SV_PROC_FLAG(p, SV_ILP32))) { int code = tf->fixreg[FIRSTARG + 1]; if (p->p_sysent->sv_mask) code &= p->p_sysent->sv_mask; fixup = (code != SYS_freebsd6_lseek && code != SYS_lseek) ? 1 : 0; } else fixup = 0; switch (error) { case 0: if (fixup) { /* * 64-bit return, 32-bit syscall. Fixup byte order */ tf->fixreg[FIRSTARG] = 0; tf->fixreg[FIRSTARG + 1] = td->td_retval[0]; } else { tf->fixreg[FIRSTARG] = td->td_retval[0]; tf->fixreg[FIRSTARG + 1] = td->td_retval[1]; } tf->cr &= ~0x10000000; /* Unset summary overflow */ break; case ERESTART: /* * Set user's pc back to redo the system call. */ tf->srr0 -= 4; break; default: if (p->p_sysent->sv_errsize) { error = (error < p->p_sysent->sv_errsize) ? p->p_sysent->sv_errtbl[error] : -1; } tf->fixreg[FIRSTARG] = error; tf->cr |= 0x10000000; /* Set summary overflow */ break; } } /* * Threading functions */ void cpu_thread_exit(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { struct pcb *pcb; pcb = (struct pcb *)((td->td_kstack + td->td_kstack_pages * PAGE_SIZE - sizeof(struct pcb)) & ~0x2fUL); td->td_pcb = pcb; td->td_frame = (struct trapframe *)pcb - 1; } void cpu_thread_free(struct thread *td) { } int cpu_set_user_tls(struct thread *td, void *tls_base) { if (SV_PROC_FLAG(td->td_proc, SV_LP64)) td->td_frame->fixreg[13] = (register_t)tls_base + 0x7010; else td->td_frame->fixreg[2] = (register_t)tls_base + 0x7008; return (0); } void cpu_set_upcall(struct thread *td, struct thread *td0) { struct pcb *pcb2; struct trapframe *tf; struct callframe *cf; pcb2 = td->td_pcb; /* Copy the upcall pcb */ bcopy(td0->td_pcb, pcb2, sizeof(*pcb2)); /* Create a stack for the new thread */ tf = td->td_frame; bcopy(td0->td_frame, tf, sizeof(struct trapframe)); tf->fixreg[FIRSTARG] = 0; tf->fixreg[FIRSTARG + 1] = 0; tf->cr &= ~0x10000000; /* Set registers for trampoline to user mode. */ cf = (struct callframe *)tf - 1; memset(cf, 0, sizeof(struct callframe)); cf->cf_func = (register_t)fork_return; cf->cf_arg0 = (register_t)td; cf->cf_arg1 = (register_t)tf; pcb2->pcb_sp = (register_t)cf; #if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) pcb2->pcb_lr = ((register_t *)fork_trampoline)[0]; pcb2->pcb_toc = ((register_t *)fork_trampoline)[1]; #else pcb2->pcb_lr = (register_t)fork_trampoline; pcb2->pcb_context[0] = pcb2->pcb_lr; #endif pcb2->pcb_cpu.aim.usr_vsid = 0; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; td->td_md.md_saved_msr = PSL_KERNSET; } void cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf; uintptr_t sp; tf = td->td_frame; /* align stack and alloc space for frame ptr and saved LR */ #ifdef __powerpc64__ sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 48) & ~0x1f; #else sp = ((uintptr_t)stack->ss_sp + stack->ss_size - 8) & ~0x1f; #endif bzero(tf, sizeof(struct trapframe)); tf->fixreg[1] = (register_t)sp; tf->fixreg[3] = (register_t)arg; if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { tf->srr0 = (register_t)entry; tf->srr1 = PSL_USERSET | PSL_FE_DFLT; #ifdef __powerpc64__ tf->srr1 &= ~PSL_SF; #endif } else { #ifdef __powerpc64__ register_t entry_desc[3]; (void)copyin((void *)entry, entry_desc, sizeof(entry_desc)); tf->srr0 = entry_desc[0]; tf->fixreg[2] = entry_desc[1]; tf->fixreg[11] = entry_desc[2]; tf->srr1 = PSL_SF | PSL_USERSET | PSL_FE_DFLT; #endif } #ifdef __powerpc64__ if (mfmsr() & PSL_HV) tf->srr1 |= PSL_HV; #endif td->td_pcb->pcb_flags = 0; td->td_retval[0] = (register_t)entry; td->td_retval[1] = 0; } int ppc_instr_emulate(struct trapframe *frame, struct pcb *pcb) { uint32_t instr; int reg, sig; instr = fuword32((void *)frame->srr0); sig = SIGILL; if ((instr & 0xfc1fffff) == 0x7c1f42a6) { /* mfpvr */ reg = (instr & ~0xfc1fffff) >> 21; frame->fixreg[reg] = mfpvr(); frame->srr0 += 4; return (0); } if ((instr & 0xfc000ffe) == 0x7c0004ac) { /* various sync */ powerpc_sync(); /* Do a heavy-weight sync */ frame->srr0 += 4; return (0); } #ifdef FPU_EMU if (!(pcb->pcb_flags & PCB_FPREGS)) { bzero(&pcb->pcb_fpu, sizeof(pcb->pcb_fpu)); pcb->pcb_flags |= PCB_FPREGS; } sig = fpu_emulate(frame, (struct fpreg *)&pcb->pcb_fpu); #endif return (sig); } Index: head/sys/sparc64/sparc64/machdep.c =================================================================== --- head/sys/sparc64/sparc64/machdep.c (revision 294929) +++ head/sys/sparc64/sparc64/machdep.c (revision 294930) @@ -1,1112 +1,1112 @@ /*- * Copyright (c) 2001 Jake Burkholder. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 * from: FreeBSD: src/sys/i386/i386/machdep.c,v 1.477 2001/08/27 */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include typedef int ofw_vec_t(void *); int dtlb_slots; int itlb_slots; struct tlb_entry *kernel_tlbs; int kernel_tlb_slots; int cold = 1; long Maxmem; long realmem; void *dpcpu0; char pcpu0[PCPU_PAGES * PAGE_SIZE]; struct trapframe frame0; vm_offset_t kstack0; vm_paddr_t kstack0_phys; struct kva_md_info kmi; u_long ofw_vec; u_long ofw_tba; u_int tba_taken_over; char sparc64_model[32]; static int cpu_use_vis = 1; cpu_block_copy_t *cpu_block_copy; cpu_block_zero_t *cpu_block_zero; static phandle_t find_bsp(phandle_t node, uint32_t bspid, u_int cpu_impl); void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec); static void sparc64_shutdown_final(void *dummy, int howto); static void cpu_startup(void *arg); SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); CTASSERT((1 << INT_SHIFT) == sizeof(int)); CTASSERT((1 << PTR_SHIFT) == sizeof(char *)); CTASSERT(sizeof(struct reg) == 256); CTASSERT(sizeof(struct fpreg) == 272); CTASSERT(sizeof(struct __mcontext) == 512); CTASSERT((sizeof(struct pcb) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_kfp) & (64 - 1)) == 0); CTASSERT((offsetof(struct pcb, pcb_ufp) & (64 - 1)) == 0); CTASSERT(sizeof(struct pcb) <= ((KSTACK_PAGES * PAGE_SIZE) / 8)); CTASSERT(sizeof(struct pcpu) <= ((PCPU_PAGES * PAGE_SIZE) / 2)); static void cpu_startup(void *arg) { vm_paddr_t physsz; int i; physsz = 0; for (i = 0; i < sparc64_nmemreg; i++) physsz += sparc64_memreg[i].mr_size; printf("real memory = %lu (%lu MB)\n", physsz, physsz / (1024 * 1024)); realmem = (long)physsz / PAGE_SIZE; vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); EVENTHANDLER_REGISTER(shutdown_final, sparc64_shutdown_final, NULL, SHUTDOWN_PRI_LAST); printf("avail memory = %lu (%lu MB)\n", vm_cnt.v_free_count * PAGE_SIZE, vm_cnt.v_free_count / ((1024 * 1024) / PAGE_SIZE)); if (bootverbose) printf("machine: %s\n", sparc64_model); cpu_identify(rdpr(ver), PCPU_GET(clock), curcpu); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { struct intr_request *ir; int i; pcpu->pc_irtail = &pcpu->pc_irhead; for (i = 0; i < IR_FREE; i++) { ir = &pcpu->pc_irpool[i]; ir->ir_next = pcpu->pc_irfree; pcpu->pc_irfree = ir; } } void spinlock_enter(void) { struct thread *td; register_t pil; td = curthread; if (td->td_md.md_spinlock_count == 0) { pil = rdpr(pil); wrpr(pil, 0, PIL_TICK); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_pil = pil; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t pil; td = curthread; critical_exit(); pil = td->td_md.md_saved_pil; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) wrpr(pil, pil, 0); } static phandle_t find_bsp(phandle_t node, uint32_t bspid, u_int cpu_impl) { char type[sizeof("cpu")]; phandle_t child; uint32_t portid; for (; node != 0; node = OF_peer(node)) { child = OF_child(node); if (child > 0) { child = find_bsp(child, bspid, cpu_impl); if (child > 0) return (child); } else { if (OF_getprop(node, "device_type", type, sizeof(type)) <= 0) continue; if (strcmp(type, "cpu") != 0) continue; if (OF_getprop(node, cpu_portid_prop(cpu_impl), &portid, sizeof(portid)) <= 0) continue; if (portid == bspid) return (node); } } return (0); } const char * cpu_portid_prop(u_int cpu_impl) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: return ("upa-portid"); case CPU_IMPL_ULTRASPARCIII: case CPU_IMPL_ULTRASPARCIIIp: case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIIIip: return ("portid"); case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: return ("cpuid"); default: return (""); } } uint32_t cpu_get_mid(u_int cpu_impl) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_SPARC64V: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: return (UPA_CR_GET_MID(ldxa(0, ASI_UPA_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIII: case CPU_IMPL_ULTRASPARCIIIp: return (FIREPLANE_CR_GET_AID(ldxa(AA_FIREPLANE_CONFIG, ASI_FIREPLANE_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIIIip: return (JBUS_CR_GET_JID(ldxa(0, ASI_JBUS_CONFIG_REG))); case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: return (INTR_ID_GET_ID(ldxa(AA_INTR_ID, ASI_INTR_ID))); default: return (0); } } void sparc64_init(caddr_t mdp, u_long o1, u_long o2, u_long o3, ofw_vec_t *vec) { char *env; struct pcpu *pc; vm_offset_t end; vm_offset_t va; caddr_t kmdp; phandle_t root; u_int cpu_impl; end = 0; kmdp = NULL; /* * Find out what kind of CPU we have first, for anything that changes * behaviour. */ cpu_impl = VER_IMPL(rdpr(ver)); /* * Do CPU-specific initialization. */ if (cpu_impl >= CPU_IMPL_ULTRASPARCIII) cheetah_init(cpu_impl); else if (cpu_impl == CPU_IMPL_SPARC64V) zeus_init(cpu_impl); /* * Clear (S)TICK timer (including NPT). */ tick_clear(cpu_impl); /* * UltraSparc II[e,i] based systems come up with the tick interrupt * enabled and a handler that resets the tick counter, causing DELAY() * to not work properly when used early in boot. * UltraSPARC III based systems come up with the system tick interrupt * enabled, causing an interrupt storm on startup since they are not * handled. */ tick_stop(cpu_impl); /* * Set up Open Firmware entry points. */ ofw_tba = rdpr(tba); ofw_vec = (u_long)vec; /* * Parse metadata if present and fetch parameters. Must be before the * console is inited so cninit() gets the right value of boothowto. */ if (mdp != NULL) { preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) { boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); end = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); kernel_tlb_slots = MD_FETCH(kmdp, MODINFOMD_DTLB_SLOTS, int); kernel_tlbs = (void *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_DTLB); } } init_param1(); /* * Initialize Open Firmware (needed for console). */ OF_install(OFW_STD_DIRECT, 0); OF_init(ofw_entry); /* * Prime our per-CPU data page for use. Note, we are using it for * our stack, so don't pass the real size (PAGE_SIZE) to pcpu_init * or it'll zero it out from under us. */ pc = (struct pcpu *)(pcpu0 + (PCPU_PAGES * PAGE_SIZE)) - 1; pcpu_init(pc, 0, sizeof(struct pcpu)); pc->pc_addr = (vm_offset_t)pcpu0; pc->pc_impl = cpu_impl; pc->pc_mid = cpu_get_mid(cpu_impl); pc->pc_tlb_ctx = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_min = TLB_CTX_USER_MIN; pc->pc_tlb_ctx_max = TLB_CTX_USER_MAX; /* * Determine the OFW node and frequency of the BSP (and ensure the * BSP is in the device tree in the first place). */ root = OF_peer(0); pc->pc_node = find_bsp(root, pc->pc_mid, cpu_impl); if (pc->pc_node == 0) OF_panic("%s: cannot find boot CPU node", __func__); if (OF_getprop(pc->pc_node, "clock-frequency", &pc->pc_clock, sizeof(pc->pc_clock)) <= 0) OF_panic("%s: cannot determine boot CPU clock", __func__); /* * Panic if there is no metadata. Most likely the kernel was booted * directly, instead of through loader(8). */ if (mdp == NULL || kmdp == NULL || end == 0 || kernel_tlb_slots == 0 || kernel_tlbs == NULL) OF_panic("%s: missing loader metadata.\nThis probably means " "you are not using loader(8).", __func__); /* * Work around the broken loader behavior of not demapping no * longer used kernel TLB slots when unloading the kernel or * modules. */ for (va = KERNBASE + (kernel_tlb_slots - 1) * PAGE_SIZE_4M; va >= roundup2(end, PAGE_SIZE_4M); va -= PAGE_SIZE_4M) { if (bootverbose) OF_printf("demapping unused kernel TLB slot " "(va %#lx - %#lx)\n", va, va + PAGE_SIZE_4M - 1); stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_DMMU_DEMAP, 0); stxa(TLB_DEMAP_VA(va) | TLB_DEMAP_PRIMARY | TLB_DEMAP_PAGE, ASI_IMMU_DEMAP, 0); flush(KERNBASE); kernel_tlb_slots--; } /* * Determine the TLB slot maxima, which are expected to be * equal across all CPUs. * NB: for cheetah-class CPUs, these properties only refer * to the t16s. */ if (OF_getprop(pc->pc_node, "#dtlb-entries", &dtlb_slots, sizeof(dtlb_slots)) == -1) OF_panic("%s: cannot determine number of dTLB slots", __func__); if (OF_getprop(pc->pc_node, "#itlb-entries", &itlb_slots, sizeof(itlb_slots)) == -1) OF_panic("%s: cannot determine number of iTLB slots", __func__); /* * Initialize and enable the caches. Note that this may include * applying workarounds. */ cache_init(pc); cache_enable(cpu_impl); uma_set_align(pc->pc_cache.dc_linesize - 1); cpu_block_copy = bcopy; cpu_block_zero = bzero; getenv_int("machdep.use_vis", &cpu_use_vis); if (cpu_use_vis) { switch (cpu_impl) { case CPU_IMPL_SPARC64: case CPU_IMPL_ULTRASPARCI: case CPU_IMPL_ULTRASPARCII: case CPU_IMPL_ULTRASPARCIIi: case CPU_IMPL_ULTRASPARCIIe: case CPU_IMPL_ULTRASPARCIII: /* NB: we've disabled P$. */ case CPU_IMPL_ULTRASPARCIIIp: case CPU_IMPL_ULTRASPARCIIIi: case CPU_IMPL_ULTRASPARCIV: case CPU_IMPL_ULTRASPARCIVp: case CPU_IMPL_ULTRASPARCIIIip: cpu_block_copy = spitfire_block_copy; cpu_block_zero = spitfire_block_zero; break; case CPU_IMPL_SPARC64V: cpu_block_copy = zeus_block_copy; cpu_block_zero = zeus_block_zero; break; } } #ifdef SMP mp_init(); #endif /* * Initialize virtual memory and calculate physmem. */ pmap_bootstrap(cpu_impl); /* * Initialize tunables. */ init_param2(physmem); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } /* * Initialize the interrupt tables. */ intr_init1(); /* * Initialize proc0, set kstack0, frame0, curthread and curpcb. */ proc_linkup0(&proc0, &thread0); proc0.p_md.md_sigtramp = NULL; proc0.p_md.md_utrap = NULL; thread0.td_kstack = kstack0; thread0.td_kstack_pages = KSTACK_PAGES; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1; frame0.tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_PRIV; thread0.td_frame = &frame0; pc->pc_curthread = &thread0; pc->pc_curpcb = thread0.td_pcb; /* * Initialize global registers. */ cpu_setregs(pc); /* * Take over the trap table via the PROM. Using the PROM for this * is necessary in order to set obp-control-relinquished to true * within the PROM so obtaining /virtual-memory/translations doesn't * trigger a fatal reset error or worse things further down the road. * XXX it should be possible to use this solely instead of writing * %tba in cpu_setregs(). Doing so causes a hang however. * * NB: the low-level console drivers require a working DELAY() and * some compiler optimizations may cause the curthread accesses of * mutex(9) to be factored out even if the latter aren't actually * called. Both of these require PCPU_REG to be set. However, we * can't set PCPU_REG without also taking over the trap table or the * firmware will overwrite it. */ sun4u_set_traptable(tl0_base); /* * Initialize the dynamic per-CPU area for the BSP and the message * buffer (after setting the trap table). */ dpcpu_init(dpcpu0, 0); msgbufinit(msgbufp, msgbufsize); /* * Initialize mutexes. */ mutex_init(); /* * Initialize console now that we have a reasonable set of system * services. */ cninit(); /* * Finish the interrupt initialization now that mutexes work and * enable them. */ intr_init2(); wrpr(pil, 0, 0); wrpr(pstate, 0, PSTATE_KERNEL); OF_getprop(root, "name", sparc64_model, sizeof(sparc64_model) - 1); kdb_init(); #ifdef KDB if (boothowto & RB_KDB) kdb_enter(KDB_WHY_BOOTFLAGS, "Boot flags requested debugger"); #endif } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct trapframe *tf; struct sigframe *sfp; struct sigacts *psp; struct sigframe sf; struct thread *td; struct frame *fp; struct proc *p; u_long sp; int oonstack; int sig; oonstack = 0; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; sp = tf->tf_sp + SPOFF; oonstack = sigonstack(sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Make sure we have a signal trampoline to return to. */ if (p->p_md.md_sigtramp == NULL) { /* * No signal trampoline... kill the process. */ CTR0(KTR_SIG, "sendsig: no sigtramp"); printf("sendsig: %s is too old, rebuild it\n", p->p_comm); sigexit(td, sig); /* NOTREACHED */ } /* Save user context. */ bzero(&sf, sizeof(sf)); get_mcontext(td, &sf.sf_uc.uc_mcontext, 0); sf.sf_uc.uc_sigmask = *mask; sf.sf_uc.uc_stack = td->td_sigstk; sf.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((oonstack) ? SS_ONSTACK : 0) : SS_DISABLE; /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !oonstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { - sfp = (struct sigframe *)(td->td_sigstk.ss_sp + + sfp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size - sizeof(struct sigframe)); } else sfp = (struct sigframe *)sp - 1; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(p); fp = (struct frame *)sfp - 1; /* Build the argument list for the signal handler. */ tf->tf_out[0] = sig; tf->tf_out[2] = (register_t)&sfp->sf_uc; tf->tf_out[4] = (register_t)catcher; if (SIGISMEMBER(psp->ps_siginfo, sig)) { /* Signal handler installed with SA_SIGINFO. */ tf->tf_out[1] = (register_t)&sfp->sf_si; /* Fill in POSIX parts. */ sf.sf_si = ksi->ksi_info; sf.sf_si.si_signo = sig; /* maybe a translated signal */ } else { /* Old FreeBSD-style arguments. */ tf->tf_out[1] = ksi->ksi_code; tf->tf_out[3] = (register_t)ksi->ksi_addr; } /* Copy the sigframe out to the user's stack. */ if (rwindow_save(td) != 0 || copyout(&sf, sfp, sizeof(*sfp)) != 0 || suword(&fp->fr_in[6], tf->tf_out[6]) != 0) { /* * Something is wrong with the stack pointer. * ...Kill the process. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p sfp=%p", td, sfp); PROC_LOCK(p); sigexit(td, SIGILL); /* NOTREACHED */ } tf->tf_tpc = (u_long)p->p_md.md_sigtramp; tf->tf_tnpc = tf->tf_tpc + 4; tf->tf_sp = (u_long)fp - SPOFF; CTR3(KTR_SIG, "sendsig: return td=%p pc=%#lx sp=%#lx", td, tf->tf_tpc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif /* * MPSAFE */ int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { struct proc *p; mcontext_t *mc; ucontext_t uc; int error; p = td->td_proc; if (rwindow_save(td)) { PROC_LOCK(p); sigexit(td, SIGILL); } CTR2(KTR_SIG, "sigreturn: td=%p ucp=%p", td, uap->sigcntxp); if (copyin(uap->sigcntxp, &uc, sizeof(uc)) != 0) { CTR1(KTR_SIG, "sigreturn: efault td=%p", td); return (EFAULT); } mc = &uc.uc_mcontext; error = set_mcontext(td, mc); if (error != 0) return (error); kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); CTR4(KTR_SIG, "sigreturn: return td=%p pc=%#lx sp=%#lx tstate=%#lx", td, mc->_mc_tpc, mc->_mc_sp, mc->_mc_tstate); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_pc = tf->tf_tpc; pcb->pcb_sp = tf->tf_sp; } int get_mcontext(struct thread *td, mcontext_t *mc, int flags) { struct trapframe *tf; struct pcb *pcb; tf = td->td_frame; pcb = td->td_pcb; /* * Copy the registers which will be restored by tl0_ret() from the * trapframe. * Note that we skip %g7 which is used as the userland TLS register * and %wstate. */ mc->_mc_flags = _MC_VERSION; mc->mc_global[1] = tf->tf_global[1]; mc->mc_global[2] = tf->tf_global[2]; mc->mc_global[3] = tf->tf_global[3]; mc->mc_global[4] = tf->tf_global[4]; mc->mc_global[5] = tf->tf_global[5]; mc->mc_global[6] = tf->tf_global[6]; if (flags & GET_MC_CLEAR_RET) { mc->mc_out[0] = 0; mc->mc_out[1] = 0; } else { mc->mc_out[0] = tf->tf_out[0]; mc->mc_out[1] = tf->tf_out[1]; } mc->mc_out[2] = tf->tf_out[2]; mc->mc_out[3] = tf->tf_out[3]; mc->mc_out[4] = tf->tf_out[4]; mc->mc_out[5] = tf->tf_out[5]; mc->mc_out[6] = tf->tf_out[6]; mc->mc_out[7] = tf->tf_out[7]; mc->_mc_fprs = tf->tf_fprs; mc->_mc_fsr = tf->tf_fsr; mc->_mc_gsr = tf->tf_gsr; mc->_mc_tnpc = tf->tf_tnpc; mc->_mc_tpc = tf->tf_tpc; mc->_mc_tstate = tf->tf_tstate; mc->_mc_y = tf->tf_y; critical_enter(); if ((tf->tf_fprs & FPRS_FEF) != 0) { savefpctx(pcb->pcb_ufp); tf->tf_fprs &= ~FPRS_FEF; pcb->pcb_flags |= PCB_FEF; } if ((pcb->pcb_flags & PCB_FEF) != 0) { bcopy(pcb->pcb_ufp, mc->mc_fp, sizeof(mc->mc_fp)); mc->_mc_fprs |= FPRS_FEF; } critical_exit(); return (0); } int set_mcontext(struct thread *td, mcontext_t *mc) { struct trapframe *tf; struct pcb *pcb; if (!TSTATE_SECURE(mc->_mc_tstate) || (mc->_mc_flags & ((1L << _MC_VERSION_BITS) - 1)) != _MC_VERSION) return (EINVAL); tf = td->td_frame; pcb = td->td_pcb; /* Make sure the windows are spilled first. */ flushw(); /* * Copy the registers which will be restored by tl0_ret() to the * trapframe. * Note that we skip %g7 which is used as the userland TLS register * and %wstate. */ tf->tf_global[1] = mc->mc_global[1]; tf->tf_global[2] = mc->mc_global[2]; tf->tf_global[3] = mc->mc_global[3]; tf->tf_global[4] = mc->mc_global[4]; tf->tf_global[5] = mc->mc_global[5]; tf->tf_global[6] = mc->mc_global[6]; tf->tf_out[0] = mc->mc_out[0]; tf->tf_out[1] = mc->mc_out[1]; tf->tf_out[2] = mc->mc_out[2]; tf->tf_out[3] = mc->mc_out[3]; tf->tf_out[4] = mc->mc_out[4]; tf->tf_out[5] = mc->mc_out[5]; tf->tf_out[6] = mc->mc_out[6]; tf->tf_out[7] = mc->mc_out[7]; tf->tf_fprs = mc->_mc_fprs; tf->tf_fsr = mc->_mc_fsr; tf->tf_gsr = mc->_mc_gsr; tf->tf_tnpc = mc->_mc_tnpc; tf->tf_tpc = mc->_mc_tpc; tf->tf_tstate = mc->_mc_tstate; tf->tf_y = mc->_mc_y; if ((mc->_mc_fprs & FPRS_FEF) != 0) { tf->tf_fprs = 0; bcopy(mc->mc_fp, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); pcb->pcb_flags |= PCB_FEF; } return (0); } /* * Exit the kernel and execute a firmware call that will not return, as * specified by the arguments. */ void cpu_shutdown(void *args) { #ifdef SMP cpu_mp_shutdown(); #endif ofw_exit(args); } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { struct pcpu *pc; pc = pcpu_find(cpu_id); if (pc == NULL || rate == NULL) return (EINVAL); *rate = pc->pc_clock; return (0); } /* * Duplicate OF_exit() with a different firmware call function that restores * the trap table, otherwise a RED state exception is triggered in at least * some firmware versions. */ void cpu_halt(void) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"exit", 0, 0 }; cpu_shutdown(&args); } static void sparc64_shutdown_final(void *dummy, int howto) { static struct { cell_t name; cell_t nargs; cell_t nreturns; } args = { (cell_t)"SUNW,power-off", 0, 0 }; /* Turn the power off? */ if ((howto & RB_POWEROFF) != 0) cpu_shutdown(&args); /* In case of halt, return to the firmware. */ if ((howto & RB_HALT) != 0) cpu_halt(); } void cpu_idle(int busy) { /* Insert code to halt (until next interrupt) for the idle loop. */ } int cpu_idle_wakeup(int cpu) { return (1); } int ptrace_set_pc(struct thread *td, u_long addr) { td->td_frame->tf_tpc = addr; td->td_frame->tf_tnpc = addr + 4; return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; struct pcb *pcb; struct proc *p; u_long sp; /* XXX no cpu_exec */ p = td->td_proc; p->p_md.md_sigtramp = NULL; if (p->p_md.md_utrap != NULL) { utrap_free(p->p_md.md_utrap); p->p_md.md_utrap = NULL; } pcb = td->td_pcb; tf = td->td_frame; sp = rounddown(stack, 16); bzero(pcb, sizeof(*pcb)); bzero(tf, sizeof(*tf)); tf->tf_out[0] = stack; tf->tf_out[3] = p->p_sysent->sv_psstrings; tf->tf_out[6] = sp - SPOFF - sizeof(struct frame); tf->tf_tnpc = imgp->entry_addr + 4; tf->tf_tpc = imgp->entry_addr; /* * While we could adhere to the memory model indicated in the ELF * header, it turns out that just always using TSO performs best. */ tf->tf_tstate = TSTATE_IE | TSTATE_PEF | TSTATE_MM_TSO; td->td_retval[0] = tf->tf_out[0]; td->td_retval[1] = tf->tf_out[1]; } int fill_regs(struct thread *td, struct reg *regs) { bcopy(td->td_frame, regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf; if (!TSTATE_SECURE(regs->r_tstate)) return (EINVAL); tf = td->td_frame; regs->r_wstate = tf->tf_wstate; bcopy(regs, tf, sizeof(*regs)); return (0); } int fill_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int set_dbregs(struct thread *td, struct dbreg *dbregs) { return (ENOSYS); } int fill_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; bcopy(pcb->pcb_ufp, fpregs->fr_regs, sizeof(fpregs->fr_regs)); fpregs->fr_fsr = tf->tf_fsr; fpregs->fr_gsr = tf->tf_gsr; return (0); } int set_fpregs(struct thread *td, struct fpreg *fpregs) { struct trapframe *tf; struct pcb *pcb; pcb = td->td_pcb; tf = td->td_frame; tf->tf_fprs &= ~FPRS_FEF; bcopy(fpregs->fr_regs, pcb->pcb_ufp, sizeof(pcb->pcb_ufp)); tf->tf_fsr = fpregs->fr_fsr; tf->tf_gsr = fpregs->fr_gsr; return (0); } struct md_utrap * utrap_alloc(void) { struct md_utrap *ut; ut = malloc(sizeof(struct md_utrap), M_SUBPROC, M_WAITOK | M_ZERO); ut->ut_refcnt = 1; return (ut); } void utrap_free(struct md_utrap *ut) { int refcnt; if (ut == NULL) return; mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt--; refcnt = ut->ut_refcnt; mtx_pool_unlock(mtxpool_sleep, ut); if (refcnt == 0) free(ut, M_SUBPROC); } struct md_utrap * utrap_hold(struct md_utrap *ut) { if (ut == NULL) return (NULL); mtx_pool_lock(mtxpool_sleep, ut); ut->ut_refcnt++; mtx_pool_unlock(mtxpool_sleep, ut); return (ut); } Index: head/sys/sys/signal.h =================================================================== --- head/sys/sys/signal.h (revision 294929) +++ head/sys/sys/signal.h (revision 294930) @@ -1,444 +1,443 @@ /*- * Copyright (c) 1982, 1986, 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)signal.h 8.4 (Berkeley) 5/4/95 * $FreeBSD$ */ #ifndef _SYS_SIGNAL_H_ #define _SYS_SIGNAL_H_ #include #include #include #include /* __MINSIGSTKSZ */ #include /* sig_atomic_t; trap codes; sigcontext */ /* * System defined signals. */ #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGHUP 1 /* hangup */ #endif #define SIGINT 2 /* interrupt */ #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGQUIT 3 /* quit */ #endif #define SIGILL 4 /* illegal instr. (not reset when caught) */ #if __XSI_VISIBLE #define SIGTRAP 5 /* trace trap (not reset when caught) */ #endif #define SIGABRT 6 /* abort() */ #if __BSD_VISIBLE #define SIGIOT SIGABRT /* compatibility */ #define SIGEMT 7 /* EMT instruction */ #endif #define SIGFPE 8 /* floating point exception */ #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGKILL 9 /* kill (cannot be caught or ignored) */ #endif #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE #define SIGBUS 10 /* bus error */ #endif #define SIGSEGV 11 /* segmentation violation */ #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE #define SIGSYS 12 /* non-existent system call invoked */ #endif #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGPIPE 13 /* write on a pipe with no one to read it */ #define SIGALRM 14 /* alarm clock */ #endif #define SIGTERM 15 /* software termination signal from kill */ #if __POSIX_VISIBLE >= 200112 || __XSI_VISIBLE #define SIGURG 16 /* urgent condition on IO channel */ #endif #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGSTOP 17 /* sendable stop signal not from tty */ #define SIGTSTP 18 /* stop signal from tty */ #define SIGCONT 19 /* continue a stopped process */ #define SIGCHLD 20 /* to parent on child stop or exit */ #define SIGTTIN 21 /* to readers pgrp upon background tty read */ #define SIGTTOU 22 /* like TTIN if (tp->t_local<OSTOP) */ #endif #if __BSD_VISIBLE #define SIGIO 23 /* input/output possible signal */ #endif #if __XSI_VISIBLE #define SIGXCPU 24 /* exceeded CPU time limit */ #define SIGXFSZ 25 /* exceeded file size limit */ #define SIGVTALRM 26 /* virtual time alarm */ #define SIGPROF 27 /* profiling time alarm */ #endif #if __BSD_VISIBLE #define SIGWINCH 28 /* window size changes */ #define SIGINFO 29 /* information request */ #endif #if __POSIX_VISIBLE || __XSI_VISIBLE #define SIGUSR1 30 /* user defined signal 1 */ #define SIGUSR2 31 /* user defined signal 2 */ #endif #if __BSD_VISIBLE #define SIGTHR 32 /* reserved by thread library. */ #define SIGLWP SIGTHR #define SIGLIBRT 33 /* reserved by real-time library. */ #endif #define SIGRTMIN 65 #define SIGRTMAX 126 #define SIG_DFL ((__sighandler_t *)0) #define SIG_IGN ((__sighandler_t *)1) #define SIG_ERR ((__sighandler_t *)-1) /* #define SIG_CATCH ((__sighandler_t *)2) See signalvar.h */ #define SIG_HOLD ((__sighandler_t *)3) /* * Type of a signal handling function. * * Language spec sez signal handlers take exactly one arg, even though we * actually supply three. Ugh! * * We don't try to hide the difference by leaving out the args because * that would cause warnings about conformant programs. Nonconformant * programs can avoid the warnings by casting to (__sighandler_t *) or * sig_t before calling signal() or assigning to sa_handler or sv_handler. * * The kernel should reverse the cast before calling the function. It * has no way to do this, but on most machines 1-arg and 3-arg functions * have the same calling protocol so there is no problem in practice. * A bit in sa_flags could be used to specify the number of args. */ typedef void __sighandler_t(int); #if __POSIX_VISIBLE || __XSI_VISIBLE #ifndef _SIGSET_T_DECLARED #define _SIGSET_T_DECLARED typedef __sigset_t sigset_t; #endif #endif #if __POSIX_VISIBLE >= 199309 || __XSI_VISIBLE >= 500 union sigval { /* Members as suggested by Annex C of POSIX 1003.1b. */ int sival_int; void *sival_ptr; /* 6.0 compatibility */ int sigval_int; void *sigval_ptr; }; #endif #if __POSIX_VISIBLE >= 199309 struct sigevent { int sigev_notify; /* Notification type */ int sigev_signo; /* Signal number */ union sigval sigev_value; /* Signal value */ union { __lwpid_t _threadid; struct { void (*_function)(union sigval); void *_attribute; /* pthread_attr_t * */ } _sigev_thread; unsigned short _kevent_flags; long __spare__[8]; } _sigev_un; }; #if __BSD_VISIBLE #define sigev_notify_kqueue sigev_signo #define sigev_notify_kevent_flags _sigev_un._kevent_flags #define sigev_notify_thread_id _sigev_un._threadid #endif #define sigev_notify_function _sigev_un._sigev_thread._function #define sigev_notify_attributes _sigev_un._sigev_thread._attribute #define SIGEV_NONE 0 /* No async notification. */ #define SIGEV_SIGNAL 1 /* Generate a queued signal. */ #define SIGEV_THREAD 2 /* Call back from another pthread. */ #if __BSD_VISIBLE #define SIGEV_KEVENT 3 /* Generate a kevent. */ #define SIGEV_THREAD_ID 4 /* Send signal to a kernel thread. */ #endif #endif /* __POSIX_VISIBLE >= 199309 */ #if __POSIX_VISIBLE >= 199309 || __XSI_VISIBLE typedef struct __siginfo { int si_signo; /* signal number */ int si_errno; /* errno association */ /* * Cause of signal, one of the SI_ macros or signal-specific * values, i.e. one of the FPE_... values for SIGFPE. This * value is equivalent to the second argument to an old-style * FreeBSD signal handler. */ int si_code; /* signal code */ __pid_t si_pid; /* sending process */ __uid_t si_uid; /* sender's ruid */ int si_status; /* exit value */ void *si_addr; /* faulting instruction */ union sigval si_value; /* signal value */ union { struct { int _trapno;/* machine specific trap code */ } _fault; struct { int _timerid; int _overrun; } _timer; struct { int _mqd; } _mesgq; struct { long _band; /* band event for SIGPOLL */ } _poll; /* was this ever used ? */ struct { long __spare1__; int __spare2__[7]; } __spare__; } _reason; } siginfo_t; #define si_trapno _reason._fault._trapno #define si_timerid _reason._timer._timerid #define si_overrun _reason._timer._overrun #define si_mqd _reason._mesgq._mqd #define si_band _reason._poll._band /** si_code **/ /* codes for SIGILL */ #define ILL_ILLOPC 1 /* Illegal opcode. */ #define ILL_ILLOPN 2 /* Illegal operand. */ #define ILL_ILLADR 3 /* Illegal addressing mode. */ #define ILL_ILLTRP 4 /* Illegal trap. */ #define ILL_PRVOPC 5 /* Privileged opcode. */ #define ILL_PRVREG 6 /* Privileged register. */ #define ILL_COPROC 7 /* Coprocessor error. */ #define ILL_BADSTK 8 /* Internal stack error. */ /* codes for SIGBUS */ #define BUS_ADRALN 1 /* Invalid address alignment. */ #define BUS_ADRERR 2 /* Nonexistent physical address. */ #define BUS_OBJERR 3 /* Object-specific hardware error. */ /* codes for SIGSEGV */ #define SEGV_MAPERR 1 /* Address not mapped to object. */ #define SEGV_ACCERR 2 /* Invalid permissions for mapped */ /* object. */ /* codes for SIGFPE */ #define FPE_INTOVF 1 /* Integer overflow. */ #define FPE_INTDIV 2 /* Integer divide by zero. */ #define FPE_FLTDIV 3 /* Floating point divide by zero. */ #define FPE_FLTOVF 4 /* Floating point overflow. */ #define FPE_FLTUND 5 /* Floating point underflow. */ #define FPE_FLTRES 6 /* Floating point inexact result. */ #define FPE_FLTINV 7 /* Invalid floating point operation. */ #define FPE_FLTSUB 8 /* Subscript out of range. */ /* codes for SIGTRAP */ #define TRAP_BRKPT 1 /* Process breakpoint. */ #define TRAP_TRACE 2 /* Process trace trap. */ #define TRAP_DTRACE 3 /* DTrace induced trap. */ /* codes for SIGCHLD */ #define CLD_EXITED 1 /* Child has exited */ #define CLD_KILLED 2 /* Child has terminated abnormally but */ /* did not create a core file */ #define CLD_DUMPED 3 /* Child has terminated abnormally and */ /* created a core file */ #define CLD_TRAPPED 4 /* Traced child has trapped */ #define CLD_STOPPED 5 /* Child has stopped */ #define CLD_CONTINUED 6 /* Stopped child has continued */ /* codes for SIGPOLL */ #define POLL_IN 1 /* Data input available */ #define POLL_OUT 2 /* Output buffers available */ #define POLL_MSG 3 /* Input message available */ #define POLL_ERR 4 /* I/O Error */ #define POLL_PRI 5 /* High priority input available */ #define POLL_HUP 6 /* Device disconnected */ #endif #if __POSIX_VISIBLE || __XSI_VISIBLE struct __siginfo; /* * Signal vector "template" used in sigaction call. */ struct sigaction { union { void (*__sa_handler)(int); void (*__sa_sigaction)(int, struct __siginfo *, void *); } __sigaction_u; /* signal handler */ int sa_flags; /* see signal options below */ sigset_t sa_mask; /* signal mask to apply */ }; #define sa_handler __sigaction_u.__sa_handler #endif #if __XSI_VISIBLE /* If SA_SIGINFO is set, sa_sigaction must be used instead of sa_handler. */ #define sa_sigaction __sigaction_u.__sa_sigaction #endif #if __POSIX_VISIBLE || __XSI_VISIBLE #define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */ #endif /* __POSIX_VISIBLE || __XSI_VISIBLE */ #if __XSI_VISIBLE #define SA_ONSTACK 0x0001 /* take signal on signal stack */ #define SA_RESTART 0x0002 /* restart system call on signal return */ #define SA_RESETHAND 0x0004 /* reset to SIG_DFL when taking signal */ #define SA_NODEFER 0x0010 /* don't mask the signal we're delivering */ #define SA_NOCLDWAIT 0x0020 /* don't keep zombies around */ #define SA_SIGINFO 0x0040 /* signal handler with SA_SIGINFO args */ #endif #if __BSD_VISIBLE #define NSIG 32 /* number of old signals (counting 0) */ #endif #if __POSIX_VISIBLE || __XSI_VISIBLE #define SI_NOINFO 0 /* No signal info besides si_signo. */ #define SI_USER 0x10001 /* Signal sent by kill(). */ #define SI_QUEUE 0x10002 /* Signal sent by the sigqueue(). */ #define SI_TIMER 0x10003 /* Signal generated by expiration of */ /* a timer set by timer_settime(). */ #define SI_ASYNCIO 0x10004 /* Signal generated by completion of */ /* an asynchronous I/O request.*/ #define SI_MESGQ 0x10005 /* Signal generated by arrival of a */ /* message on an empty message queue. */ #define SI_KERNEL 0x10006 #define SI_LWP 0x10007 /* Signal sent by thr_kill */ #endif #if __BSD_VISIBLE #define SI_UNDEFINED 0 #endif #if __BSD_VISIBLE typedef __sighandler_t *sig_t; /* type of pointer to a signal function */ typedef void __siginfohandler_t(int, struct __siginfo *, void *); #endif #if __XSI_VISIBLE /* * Structure used in sigaltstack call. */ #if __BSD_VISIBLE typedef struct sigaltstack { #else typedef struct { #endif - char *ss_sp; /* signal stack base */ + void *ss_sp; /* signal stack base */ __size_t ss_size; /* signal stack length */ int ss_flags; /* SS_DISABLE and/or SS_ONSTACK */ } stack_t; #define SS_ONSTACK 0x0001 /* take signal on alternate stack */ #define SS_DISABLE 0x0004 /* disable taking signals on alternate stack */ #define MINSIGSTKSZ __MINSIGSTKSZ /* minimum stack size */ #define SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended stack size */ #endif #if __BSD_VISIBLE /* * 4.3 compatibility: * Signal vector "template" used in sigvec call. */ struct sigvec { __sighandler_t *sv_handler; /* signal handler */ int sv_mask; /* signal mask to apply */ int sv_flags; /* see signal options below */ }; #define SV_ONSTACK SA_ONSTACK #define SV_INTERRUPT SA_RESTART /* same bit, opposite sense */ #define SV_RESETHAND SA_RESETHAND #define SV_NODEFER SA_NODEFER #define SV_NOCLDSTOP SA_NOCLDSTOP #define SV_SIGINFO SA_SIGINFO #define sv_onstack sv_flags /* isn't compatibility wonderful! */ #endif /* Keep this in one place only */ #if defined(_KERNEL) && defined(COMPAT_43) && \ !defined(__i386__) struct osigcontext { int _not_used; }; #endif #if __XSI_VISIBLE /* * Structure used in sigstack call. */ struct sigstack { - /* XXX ss_sp's type should be `void *'. */ - char *ss_sp; /* signal stack pointer */ + void *ss_sp; /* signal stack pointer */ int ss_onstack; /* current status */ }; #endif #if __BSD_VISIBLE || __POSIX_VISIBLE > 0 && __POSIX_VISIBLE <= 200112 /* * Macro for converting signal number to a mask suitable for * sigblock(). */ #define sigmask(m) (1 << ((m)-1)) #endif #if __BSD_VISIBLE #define BADSIG SIG_ERR #endif #if __POSIX_VISIBLE || __XSI_VISIBLE /* * Flags for sigprocmask: */ #define SIG_BLOCK 1 /* block specified signal set */ #define SIG_UNBLOCK 2 /* unblock specified signal set */ #define SIG_SETMASK 3 /* set specified signal set */ #endif /* * For historical reasons; programs expect signal's return value to be * defined by . */ __BEGIN_DECLS __sighandler_t *signal(int, __sighandler_t *); __END_DECLS #endif /* !_SYS_SIGNAL_H_ */