Changeset View
Standalone View
sys/vm/vm_fault.c
Show First 20 Lines • Show All 83 Lines • ▼ Show 20 Lines | |||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/lock.h> | #include <sys/lock.h> | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/mutex.h> | #include <sys/mutex.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/racct.h> | #include <sys/racct.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/signalvar.h> | |||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/sysent.h> | |||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#ifdef KTRACE | #ifdef KTRACE | ||||
#include <sys/ktrace.h> | #include <sys/ktrace.h> | ||||
#endif | #endif | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
▲ Show 20 Lines • Show All 413 Lines • ▼ Show 20 Lines | #endif | ||||
} | } | ||||
if (m_mtx != NULL) | if (m_mtx != NULL) | ||||
mtx_unlock(m_mtx); | mtx_unlock(m_mtx); | ||||
} | } | ||||
curthread->td_ru.ru_majflt++; | curthread->td_ru.ru_majflt++; | ||||
return (KERN_SUCCESS); | return (KERN_SUCCESS); | ||||
} | } | ||||
static int prot_fault_translation; | |||||
SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN, | |||||
&prot_fault_translation, 0, | |||||
"Select signal to deliver on protection fault"); | |||||
alc: Feel free to ignore or defer action on the following comment: To me, the wording of this… | |||||
Done Inline Actionss/select/control/ kib: s/select/control/ | |||||
/* compat definition to keep common code for signal translation */ | |||||
#define UCODE_PAGEFLT 12 | |||||
#ifdef T_PAGEFLT | |||||
_Static_assert(UCODE_PAGEFLT == T_PAGEFLT, "T_PAGEFLT"); | |||||
#endif | |||||
/* | /* | ||||
* vm_fault: | * vm_fault_trap: | ||||
* | * | ||||
* Handle a page fault occurring at the given address, | * Handle a page fault occurring at the given address, | ||||
* requiring the given permissions, in the map specified. | * requiring the given permissions, in the map specified. | ||||
* If successful, the page is inserted into the | * If successful, the page is inserted into the | ||||
* associated physical map. | * associated physical map. | ||||
* | * | ||||
* NOTE: the given address should be truncated to the | * NOTE: the given address should be truncated to the | ||||
* proper page address. | * proper page address. | ||||
* | * | ||||
* KERN_SUCCESS is returned if the page fault is handled; otherwise, | * KERN_SUCCESS is returned if the page fault is handled; otherwise, | ||||
* a standard error specifying why the fault is fatal is returned. | * a standard error specifying why the fault is fatal is returned. | ||||
* | * | ||||
* The map in question must be referenced, and remains so. | * The map in question must be referenced, and remains so. | ||||
* Caller may hold no locks. | * Caller may hold no locks. | ||||
*/ | */ | ||||
int | int | ||||
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | ||||
int fault_flags) | int fault_flags, int *signo, int *ucode) | ||||
{ | { | ||||
struct thread *td; | struct thread *td; | ||||
int result; | int result; | ||||
td = curthread; | td = curthread; | ||||
if ((td->td_pflags & TDP_NOFAULTING) != 0) | if ((td->td_pflags & TDP_NOFAULTING) != 0) | ||||
return (KERN_PROTECTION_FAILURE); | return (KERN_PROTECTION_FAILURE); | ||||
#ifdef KTRACE | #ifdef KTRACE | ||||
if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) | if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) | ||||
ktrfault(vaddr, fault_type); | ktrfault(vaddr, fault_type); | ||||
#endif | #endif | ||||
result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, | result = vm_fault(map, trunc_page(vaddr), fault_type, fault_flags, | ||||
NULL); | NULL); | ||||
KASSERT(result == KERN_SUCCESS || result == KERN_FAILURE || | |||||
result == KERN_INVALID_ADDRESS || | |||||
result == KERN_RESOURCE_SHORTAGE || | |||||
result == KERN_PROTECTION_FAILURE, | |||||
("Unexpected Mach error %d from vm_fault()", result)); | |||||
Done Inline Actions"Unexpected" would be more accurate than "Unknown". markj: "Unexpected" would be more accurate than "Unknown". | |||||
#ifdef KTRACE | #ifdef KTRACE | ||||
if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) | if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) | ||||
ktrfaultend(result); | ktrfaultend(result); | ||||
#endif | #endif | ||||
if (result != KERN_SUCCESS && map != kernel_map) { | |||||
if (result == KERN_FAILURE) { | |||||
*signo = SIGSEGV; | |||||
*ucode = SEGV_MAPERR; | |||||
} else if (result == KERN_RESOURCE_SHORTAGE) { | |||||
*signo = SIGBUS; | |||||
*ucode = BUS_OOMERR; | |||||
Done Inline ActionsBUS_OBJERR might be more correct, but it may also be helpful to have a different value to help in diagnosis. jilles: `BUS_OBJERR` might be more correct, but it may also be helpful to have a different value to… | |||||
Done Inline ActionsI added BUS_OOMERR. kib: I added BUS_OOMERR. | |||||
} else if (result == KERN_INVALID_ADDRESS) { | |||||
*signo = SIGBUS; | |||||
*ucode = BUS_OBJERR; | |||||
} else if (prot_fault_translation == 0) { | |||||
Done Inline ActionsExtra space after else. markj: Extra space after `else`. | |||||
/* | |||||
* Autodetect. This check also covers | |||||
* the images without the ABI-tag ELF | |||||
* note. | |||||
*/ | |||||
if (SV_CURPROC_ABI() == SV_ABI_FREEBSD && | |||||
curproc->p_osrel >= P_OSREL_SIGSEGV) { | |||||
*signo = SIGSEGV; | |||||
*ucode = SEGV_ACCERR; | |||||
} else { | |||||
*signo = SIGBUS; | |||||
*ucode = UCODE_PAGEFLT; | |||||
Done Inline ActionsI guess it is fine to change the ucode for old binaries on non-tier 1 arches since we do not attempt to maintain compatibility for them? It might be worth explaining in a comment that this is only really intended on i386/amd64. markj: I guess it is fine to change the ucode for old binaries on non-tier 1 arches since we do not… | |||||
Done Inline ActionsI already looked at non-x86 arches, and they have typically huge non-compliance there. E.g. arm always deliver SIGSEGV. Enabling compat mode requires manually frobbing the sysctl from user, so I think this change is fine. kib: I already looked at non-x86 arches, and they have typically huge non-compliance there. E.g. | |||||
} | |||||
} else if (prot_fault_translation == 1) { | |||||
/* Always compat mode. */ | |||||
*signo = SIGBUS; | |||||
*ucode = UCODE_PAGEFLT; | |||||
} else { | |||||
/* Always SIGSEGV mode. */ | |||||
*signo = SIGSEGV; | |||||
*ucode = SEGV_ACCERR; | |||||
} | |||||
} | |||||
return (result); | return (result); | ||||
} | } | ||||
int | int | ||||
vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, | ||||
int fault_flags, vm_page_t *m_hold) | int fault_flags, vm_page_t *m_hold) | ||||
{ | { | ||||
struct faultstate fs; | struct faultstate fs; | ||||
Done Inline Actions1->0 alc: 1->0 | |||||
struct vnode *vp; | struct vnode *vp; | ||||
struct domainset *dset; | struct domainset *dset; | ||||
vm_object_t next_object, retry_object; | vm_object_t next_object, retry_object; | ||||
vm_offset_t e_end, e_start; | vm_offset_t e_end, e_start; | ||||
vm_pindex_t retry_pindex; | vm_pindex_t retry_pindex; | ||||
vm_prot_t prot, retry_prot; | vm_prot_t prot, retry_prot; | ||||
int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; | int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; | ||||
int locked, nera, oom, result, rv; | int locked, nera, oom, result, rv; | ||||
▲ Show 20 Lines • Show All 194 Lines • ▼ Show 20 Lines | while (TRUE) { | ||||
* or this is the beginning of the search, allocate a new | * or this is the beginning of the search, allocate a new | ||||
* page. (Default objects are zero-fill, so there is no real | * page. (Default objects are zero-fill, so there is no real | ||||
* pager for them.) | * pager for them.) | ||||
*/ | */ | ||||
if (fs.object->type != OBJT_DEFAULT || | if (fs.object->type != OBJT_DEFAULT || | ||||
fs.object == fs.first_object) { | fs.object == fs.first_object) { | ||||
if (fs.pindex >= fs.object->size) { | if (fs.pindex >= fs.object->size) { | ||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
return (KERN_PROTECTION_FAILURE); | return (KERN_INVALID_ADDRESS); | ||||
} | } | ||||
if (fs.object == fs.first_object && | if (fs.object == fs.first_object && | ||||
(fs.first_object->flags & OBJ_POPULATE) != 0 && | (fs.first_object->flags & OBJ_POPULATE) != 0 && | ||||
fs.first_object->shadow_count == 0) { | fs.first_object->shadow_count == 0) { | ||||
rv = vm_fault_populate(&fs, prot, fault_type, | rv = vm_fault_populate(&fs, prot, fault_type, | ||||
fault_flags, wired, m_hold); | fault_flags, wired, m_hold); | ||||
switch (rv) { | switch (rv) { | ||||
▲ Show 20 Lines • Show All 231 Lines • ▼ Show 20 Lines | if (fs.object->type != OBJT_DEFAULT) { | ||||
* an error. | * an error. | ||||
*/ | */ | ||||
if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { | if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { | ||||
if (!vm_page_wired(fs.m)) | if (!vm_page_wired(fs.m)) | ||||
vm_page_free(fs.m); | vm_page_free(fs.m); | ||||
else | else | ||||
vm_page_xunbusy(fs.m); | vm_page_xunbusy(fs.m); | ||||
fs.m = NULL; | fs.m = NULL; | ||||
unlock_and_deallocate(&fs); | unlock_and_deallocate(&fs); | ||||
return (rv == VM_PAGER_ERROR ? KERN_FAILURE : | return (KERN_INVALID_ADDRESS); | ||||
Done Inline ActionsI expect signal SIGBUS with code BUS_OBJERR in both of these cases. jilles: I expect signal `SIGBUS` with code `BUS_OBJERR` in both of these cases. | |||||
Done Inline ActionsCan you double-check this case? Does jilles@ comment still apply here? I ask because KERN_INVALID_ADDRESS is going to generate a SIGSEGV, not a SIGBUS. alc: Can you double-check this case? Does jilles@ comment still apply here? I ask because… | |||||
Done Inline ActionsI believe that this line change was done before introduction of KERN_OUT_OF_BOUNDS, then the signal generation code was rewritten to accommodate new errors but this line was left behind. kib: I believe that this line change was done before introduction of KERN_OUT_OF_BOUNDS, then the… | |||||
KERN_PROTECTION_FAILURE); | |||||
} | } | ||||
/* | /* | ||||
* The requested page does not exist at this object/ | * The requested page does not exist at this object/ | ||||
* offset. Remove the invalid page from the object, | * offset. Remove the invalid page from the object, | ||||
* waking up anyone waiting for it, and continue on to | * waking up anyone waiting for it, and continue on to | ||||
* the next object. However, if this is the top-level | * the next object. However, if this is the top-level | ||||
* object, we must leave the busy page in place to | * object, we must leave the busy page in place to | ||||
▲ Show 20 Lines • Show All 543 Lines • ▼ Show 20 Lines | if (pmap_failed) { | ||||
* One or more pages could not be held by the pmap. Either no | * One or more pages could not be held by the pmap. Either no | ||||
* page was mapped at the specified virtual address or that | * page was mapped at the specified virtual address or that | ||||
* mapping had insufficient permissions. Attempt to fault in | * mapping had insufficient permissions. Attempt to fault in | ||||
* and hold these pages. | * and hold these pages. | ||||
* | * | ||||
* If vm_fault_disable_pagefaults() was called, | * If vm_fault_disable_pagefaults() was called, | ||||
* i.e., TDP_NOFAULTING is set, we must not sleep nor | * i.e., TDP_NOFAULTING is set, we must not sleep nor | ||||
* acquire MD VM locks, which means we must not call | * acquire MD VM locks, which means we must not call | ||||
* vm_fault_hold(). Some (out of tree) callers mark | * vm_fault(). Some (out of tree) callers mark | ||||
* too wide a code area with vm_fault_disable_pagefaults() | * too wide a code area with vm_fault_disable_pagefaults() | ||||
* already, use the VM_PROT_QUICK_NOFAULT flag to request | * already, use the VM_PROT_QUICK_NOFAULT flag to request | ||||
* the proper behaviour explicitly. | * the proper behaviour explicitly. | ||||
*/ | */ | ||||
if ((prot & VM_PROT_QUICK_NOFAULT) != 0 && | if ((prot & VM_PROT_QUICK_NOFAULT) != 0 && | ||||
(curthread->td_pflags & TDP_NOFAULTING) != 0) | (curthread->td_pflags & TDP_NOFAULTING) != 0) | ||||
goto error; | goto error; | ||||
for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) | for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) | ||||
if (*mp == NULL && vm_fault_hold(map, va, prot, | if (*mp == NULL && vm_fault(map, va, prot, | ||||
VM_FAULT_NORMAL, mp) != KERN_SUCCESS) | VM_FAULT_NORMAL, mp) != KERN_SUCCESS) | ||||
goto error; | goto error; | ||||
} | } | ||||
return (count); | return (count); | ||||
error: | error: | ||||
for (mp = ma; mp < ma + count; mp++) | for (mp = ma; mp < ma + count; mp++) | ||||
if (*mp != NULL) | if (*mp != NULL) | ||||
vm_page_unwire(*mp, PQ_INACTIVE); | vm_page_unwire(*mp, PQ_INACTIVE); | ||||
▲ Show 20 Lines • Show All 234 Lines • Show Last 20 Lines |
Feel free to ignore or defer action on the following comment: To me, the wording of this description suggests that I should set this sysctl to the signal number that I want delivered.