Changeset View
Standalone View
sys/vm/vm_mmap.c
Show First 20 Lines • Show All 97 Lines • ▼ Show 20 Lines | |||||
#endif | #endif | ||||
int old_mlock = 0; | int old_mlock = 0; | ||||
SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, | SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0, | ||||
"Do not apply RLIMIT_MEMLOCK on mlockall"); | "Do not apply RLIMIT_MEMLOCK on mlockall"); | ||||
static int mincore_mapped = 1; | static int mincore_mapped = 1; | ||||
SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, | SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0, | ||||
"mincore reports mappings, not residency"); | "mincore reports mappings, not residency"); | ||||
static int imply_prot_max = 0; | |||||
SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0, | |||||
emaste: `s/mincore_mapped/imply_prot_max/` | |||||
"Imply maximum page permissions in mmap() when none are specified"); | |||||
Done Inline Actionssysctl description strings usually don't contain a period. markj: sysctl description strings usually don't contain a period. | |||||
#ifdef MAP_32BIT | #ifdef MAP_32BIT | ||||
#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) | #define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) | ||||
#endif | #endif | ||||
#ifndef _SYS_SYSPROTO_H_ | #ifndef _SYS_SYSPROTO_H_ | ||||
struct sbrk_args { | struct sbrk_args { | ||||
int incr; | int incr; | ||||
▲ Show 20 Lines • Show All 68 Lines • ▼ Show 20 Lines | |||||
kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, | kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags, | ||||
int fd, off_t pos) | int fd, off_t pos) | ||||
{ | { | ||||
struct vmspace *vms; | struct vmspace *vms; | ||||
struct file *fp; | struct file *fp; | ||||
vm_offset_t addr; | vm_offset_t addr; | ||||
vm_size_t pageoff; | vm_size_t pageoff; | ||||
vm_prot_t cap_maxprot; | vm_prot_t cap_maxprot; | ||||
int align, error; | int align, error, max_prot; | ||||
cap_rights_t rights; | cap_rights_t rights; | ||||
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) | |||||
return (EINVAL); | |||||
Done Inline ActionsThis has the effect of masking invalid flags (e.g. PROT_READ | PROT_WRITE | 0x100000 or 0xffff in tests/sys/vm/mmap_test.c). emaste: This has the effect of masking invalid flags (e.g. `PROT_READ | PROT_WRITE | 0x100000` or… | |||||
max_prot = PROT_MAX_EXTRACT(prot); | |||||
prot = PROT_EXTRACT(prot); | |||||
Done Inline ActionsDon't we also need to verify that prot is a subset of max_prot? markj: Don't we also need to verify that prot is a subset of max_prot? | |||||
if (max_prot != 0 && (max_prot & prot) != prot) | |||||
return (EINVAL); | |||||
/* | |||||
* Always honor PROT_MAX if set. If not, default to all | |||||
* permissions unless we're implying maximum permissions. | |||||
* | |||||
* XXX: should be tunable per process and ABI. | |||||
*/ | |||||
if (max_prot == 0) | |||||
Done Inline ActionsI would write this as kib: I would write this as
`
max_prot = imply_prot_max ? prot : _PROT_ALL
` | |||||
max_prot = (imply_prot_max && prot != PROT_NONE) ? | |||||
prot : _PROT_ALL; | |||||
vms = td->td_proc->p_vmspace; | vms = td->td_proc->p_vmspace; | ||||
fp = NULL; | fp = NULL; | ||||
AUDIT_ARG_FD(fd); | AUDIT_ARG_FD(fd); | ||||
addr = addr0; | addr = addr0; | ||||
/* | /* | ||||
* Ignore old flags that used to be defined but did not do anything. | * Ignore old flags that used to be defined but did not do anything. | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 126 Lines • ▼ Show 20 Lines | error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE, | ||||
VM_PROT_NONE, flags, NULL, pos, FALSE, td); | VM_PROT_NONE, flags, NULL, pos, FALSE, td); | ||||
} else if ((flags & MAP_ANON) != 0) { | } else if ((flags & MAP_ANON) != 0) { | ||||
/* | /* | ||||
* Mapping blank space is trivial. | * Mapping blank space is trivial. | ||||
* | * | ||||
* This relies on VM_PROT_* matching PROT_*. | * This relies on VM_PROT_* matching PROT_*. | ||||
*/ | */ | ||||
error = vm_mmap_object(&vms->vm_map, &addr, size, prot, | error = vm_mmap_object(&vms->vm_map, &addr, size, prot, | ||||
VM_PROT_ALL, flags, NULL, pos, FALSE, td); | max_prot, flags, NULL, pos, FALSE, td); | ||||
} else { | } else { | ||||
/* | /* | ||||
* Mapping file, get fp for validation and don't let the | * Mapping file, get fp for validation and don't let the | ||||
* descriptor disappear on us if we block. Check capability | * descriptor disappear on us if we block. Check capability | ||||
* rights, but also return the maximum rights to be combined | * rights, but also return the maximum rights to be combined | ||||
* with maxprot later. | * with maxprot later. | ||||
*/ | */ | ||||
cap_rights_init(&rights, CAP_MMAP); | cap_rights_init(&rights, CAP_MMAP); | ||||
if (prot & PROT_READ) | if (prot & PROT_READ) | ||||
cap_rights_set(&rights, CAP_MMAP_R); | cap_rights_set(&rights, CAP_MMAP_R); | ||||
if ((flags & MAP_SHARED) != 0) { | if ((flags & MAP_SHARED) != 0) { | ||||
if (prot & PROT_WRITE) | if (prot & PROT_WRITE) | ||||
cap_rights_set(&rights, CAP_MMAP_W); | cap_rights_set(&rights, CAP_MMAP_W); | ||||
} | } | ||||
if (prot & PROT_EXEC) | if (prot & PROT_EXEC) | ||||
cap_rights_set(&rights, CAP_MMAP_X); | cap_rights_set(&rights, CAP_MMAP_X); | ||||
error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); | error = fget_mmap(td, fd, &rights, &cap_maxprot, &fp); | ||||
if (error != 0) | if (error != 0) | ||||
goto done; | goto done; | ||||
KASSERT((max_prot & cap_maxprot) == cap_maxprot, | |||||
markjUnsubmitted Not Done Inline ActionsShouldn't it be (max_prot & cap_maxprot) == max_prot? Otherwise userland can trigger this assertion by requesting a PROT_MAX(PROT_READ) | PROT_READ mapping using a descriptor with read and write rights. markj: Shouldn't it be `(max_prot & cap_maxprot) == max_prot`? Otherwise userland can trigger this… | |||||
brooksAuthorUnsubmitted Done Inline ActionsMy belief was that this can't happen because of the way fget_mmap is implemented, but rereading, it looks like that's only true when CAPABILITIES is defined (in which case cap_maxprot == prot so this is safe because prot must be a subset of max_prot per the check above). In the non-CAPABILITIES case cap_maxprot is VM_PROT_ALL so this will fail. Dropping the assertion may be the best fix. brooks: My belief was that this can't happen because of the way fget_mmap is implemented, but rereading… | |||||
markjUnsubmitted Done Inline ActionsSorry, I don't see why that assertion worked even with CAPABILITIES defined. fget_mmap() returns the maximum protections permitted by capabilities on the descriptor. markj: Sorry, I don't see why that assertion worked even with CAPABILITIES defined. fget_mmap()… | |||||
brooksAuthorUnsubmitted Done Inline ActionsOn further reading, you're correct. brooks: On further reading, you're correct. | |||||
("cap_maxprot (%x) contains permissions not in " | |||||
"max_prot (%x)", cap_maxprot, max_prot)); | |||||
if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && | if ((flags & (MAP_SHARED | MAP_PRIVATE)) == 0 && | ||||
td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { | td->td_proc->p_osrel >= P_OSREL_MAP_FSTRICT) { | ||||
error = EINVAL; | error = EINVAL; | ||||
goto done; | goto done; | ||||
} | } | ||||
/* This relies on VM_PROT_* matching PROT_*. */ | /* This relies on VM_PROT_* matching PROT_*. */ | ||||
error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, | error = fo_mmap(fp, &vms->vm_map, &addr, size, prot, | ||||
cap_maxprot, flags, pos, td); | max_prot & cap_maxprot, flags, pos, td); | ||||
Not Done Inline ActionsShould we deny the request if (max_prot & cap_maxprot) != max_prot, so that user knows that the call was not really satisfied ? kib: Should we deny the request if `(max_prot & cap_maxprot) != max_prot`, so that user knows that… | |||||
Done Inline ActionsI'm leaning toward not. I think we'd only want to error when we'd explicitly passed a PROT_MAX(FOO) expression so we'd have to add state for that since we don't want to fail when max_prot defaults to _PROT_ALL or when it's implied. We'd also just add yet another inexplicable EINVAL for a rare case. brooks: I'm leaning toward not. I think we'd only want to error when we'd explicitly passed a PROT_MAX… | |||||
Done Inline ActionsI've added a KASSERT of this because I'm convinced that with the (prot & max_prot) == prot check above that this will always be safe (cap_maxprot ends up set to something derived directly from prot). brooks: I've added a KASSERT of this because I'm convinced that with the `(prot & max_prot) == prot`… | |||||
} | } | ||||
if (error == 0) | if (error == 0) | ||||
td->td_retval[0] = (register_t) (addr + pageoff); | td->td_retval[0] = (register_t) (addr + pageoff); | ||||
done: | done: | ||||
if (fp) | if (fp) | ||||
fdrop(fp, td); | fdrop(fp, td); | ||||
▲ Show 20 Lines • Show All 214 Lines • ▼ Show 20 Lines | sys_mprotect(struct thread *td, struct mprotect_args *uap) | ||||
return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); | return (kern_mprotect(td, (uintptr_t)uap->addr, uap->len, uap->prot)); | ||||
} | } | ||||
int | int | ||||
kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) | kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot) | ||||
{ | { | ||||
vm_offset_t addr; | vm_offset_t addr; | ||||
vm_size_t pageoff; | vm_size_t pageoff; | ||||
int vm_error, max_prot; | |||||
addr = addr0; | addr = addr0; | ||||
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0) | |||||
return (EINVAL); | |||||
Done Inline ActionsThis check is interesting. Linux documents this behavior (EINVAL for invalid protections), but POSIX seems to endorse our silent stripping. I'm inclined to think Linux is more correct here. brooks: This check is interesting. Linux documents this behavior (EINVAL for invalid protections), but… | |||||
Done Inline ActionsWe should probably document this error in the man page too. markj: We should probably document this error in the man page too. | |||||
max_prot = PROT_MAX_EXTRACT(prot); | |||||
prot = (prot & VM_PROT_ALL); | prot = (prot & VM_PROT_ALL); | ||||
alcUnsubmitted Done Inline ActionsWhile not strictly necessary for correctness, I would advocate changing this to prot = PROT_EXTRACT(prot); alc: While not strictly necessary for correctness, I would advocate changing this to
```
prot =… | |||||
pageoff = (addr & PAGE_MASK); | pageoff = (addr & PAGE_MASK); | ||||
addr -= pageoff; | addr -= pageoff; | ||||
size += pageoff; | size += pageoff; | ||||
size = (vm_size_t) round_page(size); | size = (vm_size_t) round_page(size); | ||||
#ifdef COMPAT_FREEBSD32 | #ifdef COMPAT_FREEBSD32 | ||||
if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { | if (SV_PROC_FLAG(td->td_proc, SV_ILP32)) { | ||||
if (((addr + size) & 0xffffffff) < addr) | if (((addr + size) & 0xffffffff) < addr) | ||||
return (EINVAL); | return (EINVAL); | ||||
} else | } else | ||||
#endif | #endif | ||||
if (addr + size < addr) | if (addr + size < addr) | ||||
return (EINVAL); | return (EINVAL); | ||||
switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr, | vm_error = KERN_SUCCESS; | ||||
addr + size, prot, FALSE)) { | if (max_prot != 0) { | ||||
if ((max_prot & prot) != prot) | |||||
return (EINVAL); | |||||
Not Done Inline ActionsI do not understand this. Suppose you have a readonly shared mapping of the file, which was created using O_RDONLY file descriptor. Isn't this addition allows to add VM_PROT_WRITE to the corresponding vm_map_entry max_prot ? Then on the next step user can mprotect(2) the mapping to modify the file. kib: I do not understand this.
Suppose you have a readonly shared mapping of the file, which was… | |||||
Done Inline ActionsI think this does the right thing, but I had to read the implementation a couple times to convince myself. In vm_map_protect, when set_max is true, the new max_prot is checked against existing entries max_protection in the first loop over the region: if ((new_prot & current->max_protection) != new_prot) { vm_map_unlock(map); return (KERN_PROTECTION_FAILURE); } This rejects attempts to upgrade max_prot. brooks: I think this does the right thing, but I had to read the implementation a couple times to… | |||||
vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, | |||||
addr, addr + size, max_prot, TRUE); | |||||
} | |||||
Done Inline ActionsYou can avoid a goto by initializing vm_error to KERN_SUCCESS and skipping the final vm_map_protect() if vm_error != KERN_SUCCESS. markj: You can avoid a goto by initializing `vm_error` to `KERN_SUCCESS` and skipping the final… | |||||
if (vm_error == KERN_SUCCESS) | |||||
vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map, | |||||
addr, addr + size, prot, FALSE); | |||||
switch (vm_error) { | |||||
case KERN_SUCCESS: | case KERN_SUCCESS: | ||||
return (0); | return (0); | ||||
case KERN_PROTECTION_FAILURE: | case KERN_PROTECTION_FAILURE: | ||||
return (EACCES); | return (EACCES); | ||||
case KERN_RESOURCE_SHORTAGE: | case KERN_RESOURCE_SHORTAGE: | ||||
return (ENOMEM); | return (ENOMEM); | ||||
} | } | ||||
return (EINVAL); | return (EINVAL); | ||||
▲ Show 20 Lines • Show All 977 Lines • Show Last 20 Lines |
s/mincore_mapped/imply_prot_max/