Changeset View
Standalone View
sys/compat/linux/linux_mmap.c
Show All 32 Lines | |||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include <sys/capsicum.h> | #include <sys/capsicum.h> | ||||
#include <sys/file.h> | #include <sys/file.h> | ||||
#include <sys/imgact.h> | #include <sys/imgact.h> | ||||
#include <sys/ktr.h> | #include <sys/ktr.h> | ||||
#include <sys/lock.h> | |||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/rwlock.h> | |||||
#include <sys/syscallsubr.h> | #include <sys/syscallsubr.h> | ||||
#include <sys/sysent.h> | #include <sys/sysent.h> | ||||
#include <sys/sysproto.h> | #include <sys/sysproto.h> | ||||
#include <vm/pmap.h> | #include <vm/pmap.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/vm_map.h> | #include <vm/vm_map.h> | ||||
#include <vm/vm_object.h> | |||||
#include <compat/linux/linux_emul.h> | #include <compat/linux/linux_emul.h> | ||||
#include <compat/linux/linux_mib.h> | #include <compat/linux/linux_mib.h> | ||||
#include <compat/linux/linux_mmap.h> | #include <compat/linux/linux_mmap.h> | ||||
#include <compat/linux/linux_persona.h> | #include <compat/linux/linux_persona.h> | ||||
#include <compat/linux/linux_util.h> | #include <compat/linux/linux_util.h> | ||||
#define STACK_SIZE (2 * 1024 * 1024) | #define STACK_SIZE (2 * 1024 * 1024) | ||||
▲ Show 20 Lines • Show All 178 Lines • ▼ Show 20 Lines | if ((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) | ||||
return (EINVAL); | return (EINVAL); | ||||
#if defined(__amd64__) | #if defined(__amd64__) | ||||
linux_fixup_prot(td, &prot); | linux_fixup_prot(td, &prot); | ||||
#endif | #endif | ||||
return (kern_mprotect(td, addr, len, prot)); | return (kern_mprotect(td, addr, len, prot)); | ||||
} | } | ||||
/* | |||||
* Implement Linux madvise(MADV_DONTNEED), which has unusual semantics: for | |||||
* anonymous memory, pages in the range are immediately discarded. | |||||
*/ | |||||
static int | |||||
linux_madvise_dontneed(struct thread *td, vm_offset_t start, vm_offset_t end) | |||||
{ | |||||
vm_map_t map; | |||||
vm_map_entry_t entry; | |||||
vm_object_t object; | |||||
vm_offset_t estart, eend; | |||||
vm_pindex_t pstart, pend; | |||||
int error; | |||||
map = &td->td_proc->p_vmspace->vm_map; | |||||
if (!vm_map_check_range(map, start, end)) | |||||
return (EINVAL); | |||||
start = trunc_page(start); | |||||
end = round_page(end); | |||||
error = 0; | |||||
vm_map_lock_read(map); | |||||
if (!vm_map_lookup_entry(map, start, &entry)) | |||||
entry = vm_map_entry_succ(entry); | |||||
for (; entry->start < end; entry = vm_map_entry_succ(entry)) { | |||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) | |||||
continue; | |||||
if (entry->wired_count != 0) { | |||||
error = EINVAL; | |||||
break; | |||||
} | |||||
object = entry->object.vm_object; | |||||
if (object == NULL) | |||||
continue; | |||||
pstart = OFF_TO_IDX(entry->offset); | |||||
if (start > entry->start) { | |||||
pstart += atop(start - entry->start); | |||||
estart = start; | |||||
} else { | |||||
estart = entry->start; | |||||
} | |||||
pend = OFF_TO_IDX(entry->offset) + | |||||
atop(entry->end - entry->start); | |||||
if (entry->end > end) { | |||||
pend -= atop(entry->end - end); | |||||
eend = end; | |||||
} else { | |||||
eend = entry->end; | |||||
} | |||||
if ((object->flags & (OBJ_ANON | OBJ_ONEMAPPING)) != | |||||
(OBJ_ANON | OBJ_ONEMAPPING)) { | |||||
kib: ONEMAPPING is not an indicator of the shared mapping, and it is only advisory. | |||||
markjAuthorUnsubmitted Done Inline ActionsIf ONEMAPPING is set, then the object is privately mapped, but the converse statement is not true. I am trying to be conservative here: if it is possible that the mapping is shared, then we do not free any pages. There is a race here though. OBJ_ANON is stable (i.e., never cleared during the object lifecycle), but ONEMAPPING is not. The flags must be checked under the object lock. markj: If ONEMAPPING is set, then the object is privately mapped, but the converse statement is not… | |||||
/* | |||||
* Handle shared mappings the same way as native FreeBSD | |||||
* madvise(MADV_DONTNEED). | |||||
*/ | |||||
pmap_advise(map->pmap, estart, eend, MADV_DONTNEED); | |||||
vm_object_madvise(object, pstart, pend, MADV_DONTNEED); | |||||
} else { | |||||
/* | |||||
* Singly-mapped anonymous memory is discarded. This | |||||
* does not match Linux's semantics when the object | |||||
* belongs to a shadow chain of length > 1, since | |||||
Not Done Inline ActionsI do not think this recheck is needed. The map is locked (even if for read), and since we saw OBJ_ONEMAPPING set, it cannot be cleared by any other way than forking or doing some operation on our map. kib: I do not think this recheck is needed. The map is locked (even if for read), and since we saw… | |||||
Done Inline ActionsI believe you are right. This assumption is rather subtle though and will lead to some nasty bugs if it ever becomes false. Since rechecking is not particularly expensive, I prefer to keep it. If we had some way of asserting that the corresponding vm_map lock is held when ONEMAPPING is checked or cleared, then I would be willing to change this. Maybe instead of checking the flag directly, we could have bool vm_map_owns_object(vm_map_t map, vm_map_entry_t entry) { vm_object_t object; VM_MAP_ASSERT_LOCKED(map); object = entry->object.vm_object; return ((object->flags & OBJ_ONEMAPPING) != 0); } and a similar function to clear OBJ_ONEMAPPING which asserts that both the map and object locks are held. markj: I believe you are right. This assumption is rather subtle though and will lead to some nasty… | |||||
Not Done Inline ActionsI am mostly fine with it, but 'owns' is arguably the wrong term, and I cannot propose anything better. Please go ahead with the current patch. kib: I am mostly fine with it, but 'owns' is arguably the wrong term, and I cannot propose anything… | |||||
* subsequent faults may retrieve pages from an | |||||
* intermediate anonymous object. However, handling | |||||
* this case correctly introduces a fair bit of | |||||
* complexity. | |||||
*/ | |||||
VM_OBJECT_WLOCK(object); | |||||
vm_object_collapse(object); | |||||
vm_object_page_remove(object, pstart, pend, 0); | |||||
kibUnsubmitted Not Done Inline ActionsHm, wouldn't this destroy dirty pages of the vnode when the mapping is a shared mapping of the file ? This has obvious security implications. For something like a device managed mappings it has other but equally interesting effects. kib: Hm, wouldn't this destroy dirty pages of the vnode when the mapping is a shared mapping of the… | |||||
markjAuthorUnsubmitted Done Inline ActionsI don't quite follow: to reach this point we must have OBJ_ANON set, which implies that the object type is OBJT_DEFAULT or _SWAP. markj: I don't quite follow: to reach this point we must have OBJ_ANON set, which implies that the… | |||||
kibUnsubmitted Not Done Inline ActionsI missed this, ok. kib: I missed this, ok. | |||||
if (object->backing_object != NULL && | |||||
(object->backing_object->flags & OBJ_ANON) != 0) | |||||
linux_msg("possibly incorrect MADV_DONTNEED"); | |||||
VM_OBJECT_WUNLOCK(object); | |||||
} | |||||
} | |||||
vm_map_unlock_read(map); | |||||
return (error); | |||||
} | |||||
int | int | ||||
linux_madvise_common(struct thread *td, uintptr_t addr, size_t len, int l_behav) | linux_madvise_common(struct thread *td, uintptr_t addr, size_t len, int l_behav) | ||||
{ | { | ||||
int behav; | int behav; | ||||
switch (l_behav) { | switch (l_behav) { | ||||
case LINUX_MADV_NORMAL: | case LINUX_MADV_NORMAL: | ||||
behav = MADV_NORMAL; | behav = MADV_NORMAL; | ||||
break; | break; | ||||
case LINUX_MADV_RANDOM: | case LINUX_MADV_RANDOM: | ||||
behav = MADV_RANDOM; | behav = MADV_RANDOM; | ||||
break; | break; | ||||
case LINUX_MADV_SEQUENTIAL: | case LINUX_MADV_SEQUENTIAL: | ||||
behav = MADV_SEQUENTIAL; | behav = MADV_SEQUENTIAL; | ||||
break; | break; | ||||
case LINUX_MADV_WILLNEED: | case LINUX_MADV_WILLNEED: | ||||
behav = MADV_WILLNEED; | behav = MADV_WILLNEED; | ||||
break; | break; | ||||
case LINUX_MADV_DONTNEED: | case LINUX_MADV_DONTNEED: | ||||
/* | return (linux_madvise_dontneed(td, addr, addr + len)); | ||||
* Note that the semantics of Linux MADV_DONTNEED is very | |||||
* different from the FreeBSD's madvise(2) flag of the same name. | |||||
*/ | |||||
behav = MADV_DONTNEED; | |||||
break; | |||||
case LINUX_MADV_FREE: | case LINUX_MADV_FREE: | ||||
behav = MADV_FREE; | behav = MADV_FREE; | ||||
break; | break; | ||||
case LINUX_MADV_REMOVE: | case LINUX_MADV_REMOVE: | ||||
linux_msg(curthread, "unsupported madvise MADV_REMOVE"); | linux_msg(curthread, "unsupported madvise MADV_REMOVE"); | ||||
return (EINVAL); | return (EINVAL); | ||||
case LINUX_MADV_DONTFORK: | case LINUX_MADV_DONTFORK: | ||||
return (kern_minherit(td, addr, len, INHERIT_NONE)); | return (kern_minherit(td, addr, len, INHERIT_NONE)); | ||||
▲ Show 20 Lines • Show All 52 Lines • Show Last 20 Lines |
ONEMAPPING is not an indicator of the shared mapping, and it is only advisory.