Index: sys/amd64/amd64/trap.c =================================================================== --- sys/amd64/amd64/trap.c +++ sys/amd64/amd64/trap.c @@ -111,7 +111,7 @@ void trap_check(struct trapframe *frame); void dblfault_handler(struct trapframe *frame); -static int trap_pfault(struct trapframe *, int); +static int trap_pfault(struct trapframe *, bool, int *, int *); static void trap_fatal(struct trapframe *, vm_offset_t); #ifdef KDTRACE_HOOKS static bool trap_user_dtrace(struct trapframe *, @@ -155,10 +155,6 @@ [T_DTRACE_RET] = "DTrace pid return trap", }; -static int prot_fault_translation; -SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, - &prot_fault_translation, 0, - "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN, &uprintf_signal, 0, @@ -192,14 +188,11 @@ struct thread *td; struct proc *p; register_t addr, dr6; - int signo, ucode; + int pf, signo, ucode; u_int type; td = curthread; p = td->td_proc; - signo = 0; - ucode = 0; - addr = 0; dr6 = 0; VM_CNT_INC(v_trap); @@ -352,40 +345,11 @@ return; addr = frame->tf_addr; - signo = trap_pfault(frame, TRUE); - if (signo == -1) + pf = trap_pfault(frame, true, &signo, &ucode); + if (pf == -1) return; - if (signo == 0) + if (pf == 0) goto userret; - if (signo == SIGSEGV) { - ucode = SEGV_MAPERR; - } else if (prot_fault_translation == 0) { - /* - * Autodetect. This check also covers - * the images without the ABI-tag ELF - * note. - */ - if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && - p->p_osrel >= P_OSREL_SIGSEGV) { - signo = SIGSEGV; - ucode = SEGV_ACCERR; - } else { - signo = SIGBUS; - ucode = T_PAGEFLT; - } - } else if (prot_fault_translation == 1) { - /* - * Always compat mode. - */ - signo = SIGBUS; - ucode = T_PAGEFLT; - } else { - /* - * Always SIGSEGV mode. - */ - signo = SIGSEGV; - ucode = SEGV_ACCERR; - } break; case T_DIVIDE: /* integer divide fault */ @@ -440,7 +404,7 @@ ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ - (void) trap_pfault(frame, FALSE); + (void)trap_pfault(frame, false, NULL, NULL); return; case T_DNA: @@ -712,16 +676,28 @@ (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK)); } +/* + * Handle all details of a page fault. + * Returns: + * -1 if this was a fatal fault, typically from the kernel mode + * (cannot happen, but we need to return something). + * 0 fault was handled by updating page table, either user or kernel + * mode, execution can continue. + * 1 fault is from usermode and was not handled, synchronous signal + * should be delivered to the thread. *signo returns the signal + * number, *ucode gives si_code. + */ static int -trap_pfault(struct trapframe *frame, int usermode) +trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode) { struct thread *td; struct proc *p; vm_map_t map; - vm_offset_t va; + vm_offset_t eva; int rv; vm_prot_t ftype; - vm_offset_t eva; + + MPASS(!usermode || (signo != NULL && ucode != NULL)); td = curthread; p = td->td_proc; @@ -771,13 +747,15 @@ return (-1); } } - va = trunc_page(eva); - if (va >= VM_MIN_KERNEL_ADDRESS) { + if (eva >= VM_MIN_KERNEL_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. */ - if (usermode) - return (SIGSEGV); + if (usermode) { + *signo = SIGSEGV; + *ucode = SEGV_MAPERR; + return (1); + } map = kernel_map; } else { @@ -819,7 +797,11 @@ trap_fatal(frame, eva); return (-1); } - rv = KERN_PROTECTION_FAILURE; + if (usermode) { + *signo = SIGSEGV; + *ucode = SEGV_PKUERR; + return (1); + } goto after_vmfault; } @@ -843,7 +825,7 @@ ftype = VM_PROT_READ; /* Fault in the page. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { @@ -858,17 +840,17 @@ #endif return (0); } + + if (usermode) + return (1); after_vmfault: - if (!usermode) { - if (td->td_intr_nesting_level == 0 && - curpcb->pcb_onfault != NULL) { - frame->tf_rip = (long)curpcb->pcb_onfault; - return (0); - } - trap_fatal(frame, eva); - return (-1); + if (td->td_intr_nesting_level == 0 && + curpcb->pcb_onfault != NULL) { + frame->tf_rip = (long)curpcb->pcb_onfault; + return (0); } - return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); + trap_fatal(frame, eva); + return (-1); } static void Index: sys/amd64/vmm/vmm.c =================================================================== --- sys/amd64/vmm/vmm.c +++ sys/amd64/vmm/vmm.c @@ -1411,7 +1411,7 @@ } map = &vm->vmspace->vm_map; - rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL); + rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL); VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, " "ftype = %d", rv, vme->u.paging.gpa, ftype); Index: sys/arm/arm/trap-v4.c =================================================================== --- sys/arm/arm/trap-v4.c +++ sys/arm/arm/trap-v4.c @@ -181,7 +181,7 @@ vm_prot_t ftype; void *onfault; vm_offset_t va; - int error = 0; + int error = 0, signo, ucode; struct ksig ksig; struct proc *p; @@ -230,6 +230,8 @@ if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) { if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far, td, &ksig)) { + signo = ksig.signb; + ucode = ksig.code; goto do_trapsignal; } goto out; @@ -262,8 +264,8 @@ * Give the user an illegal instruction signal. */ /* Deliver a SIGILL to the process */ - ksig.signb = SIGILL; - ksig.code = 0; + signo = SIGILL; + ucode = 0; goto do_trapsignal; } @@ -299,8 +301,8 @@ * but uses USR mode permissions for its accesses. */ user = 1; - ksig.signb = SIGSEGV; - ksig.code = 0; + signo = SIGSEGV; + ucode = 0; goto do_trapsignal; } } else { @@ -350,9 +352,9 @@ onfault = pcb->pcb_onfault; pcb->pcb_onfault = NULL; - error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &signo, &ucode); pcb->pcb_onfault = onfault; - if (__predict_true(error == 0)) + if (__predict_true(error == KERN_SUCCESS)) goto out; fatal_pagefault: if (user == 0) { @@ -368,18 +370,8 @@ } - if (error == ENOMEM) { - printf("VM: pid %d (%s), uid %d killed: " - "out of swap\n", td->td_proc->p_pid, td->td_name, - (td->td_proc->p_ucred) ? - td->td_proc->p_ucred->cr_uid : -1); - ksig.signb = SIGKILL; - } else { - ksig.signb = SIGSEGV; - } - ksig.code = 0; do_trapsignal: - call_trapsignal(td, ksig.signb, ksig.code); + call_trapsignal(td, signo, ucode); out: /* If returning to user mode, make sure to invoke userret() */ if (user) @@ -613,10 +605,9 @@ struct proc * p; struct vm_map *map; vm_offset_t fault_pc, va; - int error = 0; + int error = 0, signo, ucode; struct ksig ksig; - #if 0 /* Update vmmeter statistics */ uvmexp.traps++; @@ -652,8 +643,8 @@ /* Ok validate the address, can only execute in USER space */ if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS || (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) { - ksig.signb = SIGSEGV; - ksig.code = 0; + signo = SIGSEGV; + ucode = 0; goto do_trapsignal; } @@ -669,24 +660,13 @@ if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1)) goto out; - error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE, - VM_FAULT_NORMAL); - if (__predict_true(error == 0)) + error = vm_fault_trap(map, va, VM_PROT_READ | VM_PROT_EXECUTE, + VM_FAULT_NORMAL, &signo, &ucode); + if (__predict_true(error == KERN_SUCCESS)) goto out; - if (error == ENOMEM) { - printf("VM: pid %d (%s), uid %d killed: " - "out of swap\n", td->td_proc->p_pid, td->td_name, - (td->td_proc->p_ucred) ? - td->td_proc->p_ucred->cr_uid : -1); - ksig.signb = SIGKILL; - } else { - ksig.signb = SIGSEGV; - } - ksig.code = 0; - do_trapsignal: - call_trapsignal(td, ksig.signb, ksig.code); + call_trapsignal(td, signo, ucode); out: userret(td, tf); Index: sys/arm/arm/trap-v6.c =================================================================== --- sys/arm/arm/trap-v6.c +++ sys/arm/arm/trap-v6.c @@ -287,7 +287,7 @@ struct vmspace *vm; vm_prot_t ftype; bool usermode; - int bp_harden; + int bp_harden, signo, ucode; #ifdef INVARIANTS void *onfault; #endif @@ -497,7 +497,7 @@ #endif /* Fault in the page. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &signo, &ucode); #ifdef INVARIANTS pcb->pcb_onfault = onfault; @@ -518,12 +518,12 @@ return; } - ksig.sig = SIGSEGV; - ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR; + ksig.sig = signo; + ksig.code = ucode; ksig.addr = far; do_trapsignal: - call_trapsignal(td, ksig.sig, ksig.code, ksig.addr); + call_trapsignal(td, signo, ucode, ksig.addr); out: if (usermode) userret(td, tf); Index: sys/arm64/arm64/trap.c =================================================================== --- sys/arm64/arm64/trap.c +++ sys/arm64/arm64/trap.c @@ -155,7 +155,6 @@ struct proc *p; struct pcb *pcb; vm_prot_t ftype; - vm_offset_t va; int error, sig, ucode; #ifdef KDB bool handled; @@ -211,7 +210,6 @@ panic("data abort in critical section or under mutex"); } - va = trunc_page(far); if (exec) ftype = VM_PROT_EXECUTE; else @@ -219,14 +217,9 @@ VM_PROT_READ | VM_PROT_WRITE; /* Fault in the page. */ - error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (lower) { - sig = SIGSEGV; - if (error == KERN_PROTECTION_FAILURE) - ucode = SEGV_ACCERR; - else - ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)far); } else { if (td->td_intr_nesting_level == 0 && Index: sys/i386/i386/trap.c =================================================================== --- sys/i386/i386/trap.c +++ sys/i386/i386/trap.c @@ -114,7 +114,7 @@ void trap(struct trapframe *frame); void syscall(struct trapframe *frame); -static int trap_pfault(struct trapframe *, int, vm_offset_t); +static int trap_pfault(struct trapframe *, bool, vm_offset_t, int *, int *); static void trap_fatal(struct trapframe *, vm_offset_t); #ifdef KDTRACE_HOOKS static bool trap_user_dtrace(struct trapframe *, @@ -181,9 +181,6 @@ int has_f00f_bug = 0; /* Initialized so that it can be patched. */ #endif -static int prot_fault_translation = 0; -SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW, - &prot_fault_translation, 0, "Select signal to deliver on protection fault"); static int uprintf_signal; SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW, &uprintf_signal, 0, @@ -202,7 +199,7 @@ ksiginfo_t ksi; struct thread *td; struct proc *p; - int signo, ucode; + int pf, signo, ucode; u_int type; register_t addr, dr6; vm_offset_t eva; @@ -212,9 +209,6 @@ td = curthread; p = td->td_proc; - signo = 0; - ucode = 0; - addr = 0; dr6 = 0; VM_CNT_INC(v_trap); @@ -395,57 +389,23 @@ break; case T_PAGEFLT: /* page fault */ - signo = trap_pfault(frame, TRUE, eva); + addr = eva; + pf = trap_pfault(frame, true, eva, &signo, &ucode); #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if (signo == -2) { + if (pf == -2) { /* * The f00f hack workaround has triggered, so * treat the fault as an illegal instruction * (T_PRIVINFLT) instead of a page fault. */ type = frame->tf_trapno = T_PRIVINFLT; - - /* Proceed as in that case. */ - ucode = ILL_PRVOPC; - signo = SIGILL; break; } #endif - if (signo == -1) + if (pf == -1) return; - if (signo == 0) + if (pf == 0) goto user; - - if (signo == SIGSEGV) - ucode = SEGV_MAPERR; - else if (prot_fault_translation == 0) { - /* - * Autodetect. This check also covers - * the images without the ABI-tag ELF - * note. - */ - if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && - p->p_osrel >= P_OSREL_SIGSEGV) { - signo = SIGSEGV; - ucode = SEGV_ACCERR; - } else { - signo = SIGBUS; - ucode = T_PAGEFLT; - } - } else if (prot_fault_translation == 1) { - /* - * Always compat mode. - */ - signo = SIGBUS; - ucode = T_PAGEFLT; - } else { - /* - * Always SIGSEGV mode. - */ - signo = SIGSEGV; - ucode = SEGV_ACCERR; - } - addr = eva; break; case T_DIVIDE: /* integer divide fault */ @@ -517,7 +477,7 @@ ("kernel trap doesn't have ucred")); switch (type) { case T_PAGEFLT: /* page fault */ - (void) trap_pfault(frame, FALSE, eva); + (void)trap_pfault(frame, false, eva, NULL, NULL); return; case T_DNA: @@ -770,15 +730,17 @@ } static int -trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva) +trap_pfault(struct trapframe *frame, bool usermode, vm_offset_t eva, + int *signo, int *ucode) { struct thread *td; struct proc *p; - vm_offset_t va; vm_map_t map; int rv; vm_prot_t ftype; + MPASS(!usermode || (signo != NULL && ucode != NULL)); + td = curthread; p = td->td_proc; @@ -826,8 +788,7 @@ return (-1); } } - va = trunc_page(eva); - if (va >= PMAP_TRM_MIN_ADDRESS) { + if (eva >= PMAP_TRM_MIN_ADDRESS) { /* * Don't allow user-mode faults in kernel address space. * An exception: if the faulting address is the invalid @@ -837,11 +798,17 @@ * fault. */ #if defined(I586_CPU) && !defined(NO_F00F_HACK) - if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) + if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) { + *ucode = ILL_PRVOPC; + *signo = SIGILL; return (-2); + } #endif - if (usermode) - return (SIGSEGV); + if (usermode) { + *signo = SIGSEGV; + *ucode = SEGV_MAPERR; + return (1); + } trap_fatal(frame, eva); return (-1); } else { @@ -878,7 +845,7 @@ ftype = VM_PROT_READ; /* Fault in the page. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode); if (rv == KERN_SUCCESS) { #ifdef HWPMC_HOOKS if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) { @@ -893,16 +860,15 @@ #endif return (0); } - if (!usermode) { - if (td->td_intr_nesting_level == 0 && - curpcb->pcb_onfault != NULL) { - frame->tf_eip = (int)curpcb->pcb_onfault; - return (0); - } - trap_fatal(frame, eva); - return (-1); + if (usermode) + return (1); + if (td->td_intr_nesting_level == 0 && + curpcb->pcb_onfault != NULL) { + frame->tf_eip = (int)curpcb->pcb_onfault; + return (0); } - return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); + trap_fatal(frame, eva); + return (-1); } static void Index: sys/kern/sys_process.c =================================================================== --- sys/kern/sys_process.c +++ sys/kern/sys_process.c @@ -286,7 +286,7 @@ /* * Fault and hold the page on behalf of the process. */ - error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m); + error = vm_fault(map, pageno, reqprot, fault_flags, &m); if (error != KERN_SUCCESS) { if (error == KERN_RESOURCE_SHORTAGE) error = ENOMEM; Index: sys/mips/mips/trap.c =================================================================== --- sys/mips/mips/trap.c +++ sys/mips/mips/trap.c @@ -670,7 +670,8 @@ kernel_fault: va = trunc_page((vm_offset_t)trapframe->badvaddr); - rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(kernel_map, va, ftype, + VM_FAULT_NORMAL, NULL, NULL); if (rv == KERN_SUCCESS) return (trapframe->pc); if (td->td_pcb->pcb_onfault != NULL) { @@ -714,7 +715,8 @@ goto nogo; } - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, + &i, &ucode); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ @@ -739,11 +741,6 @@ } goto err; } - i = SIGSEGV; - if (rv == KERN_PROTECTION_FAILURE) - ucode = SEGV_ACCERR; - else - ucode = SEGV_MAPERR; addr = trapframe->pc; msg = "BAD_PAGE_FAULT"; Index: sys/powerpc/powerpc/trap.c =================================================================== --- sys/powerpc/powerpc/trap.c +++ sys/powerpc/powerpc/trap.c @@ -87,7 +87,8 @@ static void trap_fatal(struct trapframe *frame); static void printtrap(u_int vector, struct trapframe *frame, int isfatal, int user); -static int trap_pfault(struct trapframe *frame, int user); +static bool trap_pfault(struct trapframe *frame, int user, int *signo, + int *ucode); static int fix_unaligned(struct thread *td, struct trapframe *frame); static int handle_onfault(struct trapframe *frame); static void syscall(struct trapframe *frame); @@ -269,9 +270,8 @@ #endif case EXC_DSI: case EXC_ISI: - sig = trap_pfault(frame, 1); - if (sig == SIGSEGV) - ucode = SEGV_MAPERR; + if (trap_pfault(frame, 1, &sig, &ucode)) + sig = 0; break; case EXC_SC: @@ -460,7 +460,7 @@ break; #endif case EXC_DSI: - if (trap_pfault(frame, 0) == 0) + if (trap_pfault(frame, 0, &sig, &ucode)) return; break; case EXC_MCHK: @@ -719,7 +719,7 @@ } static int -trap_pfault(struct trapframe *frame, int user) +trap_pfault(struct trapframe *frame, int user, int *signo, int *ucode) { vm_offset_t eva, va; struct thread *td; @@ -752,8 +752,11 @@ map = &p->p_vmspace->vm_map; } else { rv = pmap_decode_kernel_ptr(eva, &is_user, &eva); - if (rv != 0) - return (SIGSEGV); + if (rv != 0) { + *signo = SIGSEGV; + *ucode = SEGV_ACCERR; + return (false); + } if (is_user) map = &p->p_vmspace->vm_map; @@ -763,18 +766,18 @@ va = trunc_page(eva); /* Fault in the page. */ - rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, signo, ucode); /* * XXXDTRACE: add dtrace_doubletrap_func here? */ if (rv == KERN_SUCCESS) - return (0); + return (true); if (!user && handle_onfault(frame)) - return (0); + return (true); - return (SIGSEGV); + return (false); } /* Index: sys/riscv/riscv/trap.c =================================================================== --- sys/riscv/riscv/trap.c +++ sys/riscv/riscv/trap.c @@ -217,36 +217,10 @@ if (pmap_fault_fixup(map->pmap, va, ftype)) goto done; - if (map != kernel_map) { - /* - * Keep swapout from messing with us during this - * critical time. - */ - PROC_LOCK(p); - ++p->p_lock; - PROC_UNLOCK(p); - - /* Fault in the user page: */ - error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); - - PROC_LOCK(p); - --p->p_lock; - PROC_UNLOCK(p); - } else { - /* - * Don't have to worry about process locking or stacks in the - * kernel. - */ - error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); - } + error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode); if (error != KERN_SUCCESS) { if (usermode) { - sig = SIGSEGV; - if (error == KERN_PROTECTION_FAILURE) - ucode = SEGV_ACCERR; - else - ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)stval); } else { if (pcb->pcb_onfault != 0) { Index: sys/sparc64/sparc64/trap.c =================================================================== --- sys/sparc64/sparc64/trap.c +++ sys/sparc64/sparc64/trap.c @@ -91,7 +91,8 @@ void syscall(struct trapframe *tf); static int trap_cecc(void); -static int trap_pfault(struct thread *td, struct trapframe *tf); +static bool trap_pfault(struct thread *td, struct trapframe *tf, int *signo, + int *ucode); extern char copy_fault[]; extern char copy_nofault_begin[]; @@ -287,7 +288,8 @@ addr = tf->tf_sfar; /* FALLTHROUGH */ case T_INSTRUCTION_MISS: - sig = trap_pfault(td, tf); + if (trap_pfault(td, tf, &sig, &ucode)) + sig = 0; break; case T_FILL: sig = rwindow_load(td, tf, 2); @@ -358,7 +360,7 @@ case T_DATA_MISS: case T_DATA_PROTECTION: case T_INSTRUCTION_MISS: - error = trap_pfault(td, tf); + error = !trap_pfault(td, tf, &sig, &ucode); break; case T_DATA_EXCEPTION: case T_MEM_ADDRESS_NOT_ALIGNED: @@ -443,8 +445,8 @@ return (0); } -static int -trap_pfault(struct thread *td, struct trapframe *tf) +static bool +trap_pfault(struct thread *td, struct trapframe *tf, int *signo, int *ucode) { vm_map_t map; struct proc *p; @@ -508,27 +510,27 @@ } /* Fault in the page. */ - rv = vm_fault(map, va, prot, VM_FAULT_NORMAL); + rv = vm_fault_trap(map, va, prot, VM_FAULT_NORMAL, signo, ucode); CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d", td, va, rv); if (rv == KERN_SUCCESS) - return (0); + return (true); if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) { if (tf->tf_tpc >= (u_long)fs_nofault_begin && tf->tf_tpc <= (u_long)fs_nofault_end) { tf->tf_tpc = (u_long)fs_fault; tf->tf_tnpc = tf->tf_tpc + 4; - return (0); + return (true); } if (tf->tf_tpc >= (u_long)copy_nofault_begin && tf->tf_tpc <= (u_long)copy_nofault_end) { tf->tf_tpc = (u_long)copy_fault; tf->tf_tnpc = tf->tf_tpc + 4; - return (0); + return (true); } } - return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV); + return (false); } /* Maximum number of arguments that can be passed via the out registers. */ Index: sys/sys/signal.h =================================================================== --- sys/sys/signal.h +++ sys/sys/signal.h @@ -315,11 +315,13 @@ #define BUS_ADRALN 1 /* Invalid address alignment. */ #define BUS_ADRERR 2 /* Nonexistent physical address. */ #define BUS_OBJERR 3 /* Object-specific hardware error. */ +#define BUS_OOMERR 100 /* Non-standard: No memory. */ /* codes for SIGSEGV */ #define SEGV_MAPERR 1 /* Address not mapped to object. */ #define SEGV_ACCERR 2 /* Invalid permissions for mapped */ /* object. */ +#define SEGV_PKUERR 100 /* x86: PKU violation */ /* codes for SIGFPE */ #define FPE_INTOVF 1 /* Integer overflow. */ Index: sys/vm/vm_extern.h =================================================================== --- sys/vm/vm_extern.h +++ sys/vm/vm_extern.h @@ -85,13 +85,14 @@ int kernacc(void *, int, int); int useracc(void *, int, int); -int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int); +int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, vm_page_t *m_hold); +int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, int *signo, int *ucode); void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t, vm_ooffset_t *); int vm_fault_disable_pagefaults(void); void vm_fault_enable_pagefaults(int save); -int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, - int fault_flags, vm_page_t *m_hold); int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count); int vm_forkproc(struct thread *, struct proc *, struct thread *, Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -89,7 +89,9 @@ #include #include #include +#include #include +#include #include #include #ifdef KTRACE @@ -519,8 +521,19 @@ return (KERN_SUCCESS); } +static int prot_fault_translation; +SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, + &prot_fault_translation, 0, + "Select signal to deliver on protection fault"); + +/* compat definition to keep common code for signal translation */ +#define UCODE_PAGEFLT 12 +#ifdef T_PAGEFLT +_Static_assert(UCODE_PAGEFLT == T_PAGEFLT, "T_PAGEFLT"); +#endif + /* - * vm_fault: + * vm_fault_trap: * * Handle a page fault occurring at the given address, * requiring the given permissions, in the map specified. @@ -537,8 +550,8 @@ * Caller may hold no locks. */ int -vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, - int fault_flags) +vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, + int fault_flags, int *signo, int *ucode) { struct thread *td; int result; @@ -550,17 +563,56 @@ if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif - result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, + result = vm_fault(map, trunc_page(vaddr), fault_type, fault_flags, NULL); + KASSERT(result == KERN_SUCCESS || result == KERN_FAILURE || + result == KERN_INVALID_ADDRESS || + result == KERN_RESOURCE_SHORTAGE || + result == KERN_PROTECTION_FAILURE, + ("Unexpected Mach error %d from vm_fault()", result)); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); #endif + if (result != KERN_SUCCESS && map != kernel_map) { + if (result == KERN_FAILURE) { + *signo = SIGSEGV; + *ucode = SEGV_MAPERR; + } else if (result == KERN_RESOURCE_SHORTAGE) { + *signo = SIGBUS; + *ucode = BUS_OOMERR; + } else if (result == KERN_INVALID_ADDRESS) { + *signo = SIGBUS; + *ucode = BUS_OBJERR; + } else if (prot_fault_translation == 0) { + /* + * Autodetect. This check also covers + * the images without the ABI-tag ELF + * note. + */ + if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && + curproc->p_osrel >= P_OSREL_SIGSEGV) { + *signo = SIGSEGV; + *ucode = SEGV_ACCERR; + } else { + *signo = SIGBUS; + *ucode = UCODE_PAGEFLT; + } + } else if (prot_fault_translation == 1) { + /* Always compat mode. */ + *signo = SIGBUS; + *ucode = UCODE_PAGEFLT; + } else { + /* Always SIGSEGV mode. */ + *signo = SIGSEGV; + *ucode = SEGV_ACCERR; + } + } return (result); } int -vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, +vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { struct faultstate fs; @@ -774,7 +826,7 @@ fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); - return (KERN_PROTECTION_FAILURE); + return (KERN_INVALID_ADDRESS); } if (fs.object == fs.first_object && @@ -1023,8 +1075,7 @@ vm_page_xunbusy(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); - return (rv == VM_PAGER_ERROR ? KERN_FAILURE : - KERN_PROTECTION_FAILURE); + return (KERN_INVALID_ADDRESS); } /* @@ -1584,7 +1635,7 @@ * If vm_fault_disable_pagefaults() was called, * i.e., TDP_NOFAULTING is set, we must not sleep nor * acquire MD VM locks, which means we must not call - * vm_fault_hold(). Some (out of tree) callers mark + * vm_fault(). Some (out of tree) callers mark * too wide a code area with vm_fault_disable_pagefaults() * already, use the VM_PROT_QUICK_NOFAULT flag to request * the proper behaviour explicitly. @@ -1593,7 +1644,7 @@ (curthread->td_pflags & TDP_NOFAULTING) != 0) goto error; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) - if (*mp == NULL && vm_fault_hold(map, va, prot, + if (*mp == NULL && vm_fault(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } Index: sys/vm/vm_map.c =================================================================== --- sys/vm/vm_map.c +++ sys/vm/vm_map.c @@ -3201,8 +3201,9 @@ * Simulate a fault to get the page and enter * it into the physical map. */ - if ((rv = vm_fault(map, faddr, VM_PROT_NONE, - VM_FAULT_WIRE)) != KERN_SUCCESS) + if ((rv = vm_fault(map, faddr, + VM_PROT_NONE, VM_FAULT_WIRE, NULL)) != + KERN_SUCCESS) break; } while ((faddr += PAGE_SIZE) < saved_end); vm_map_lock(map);