Changeset View
Standalone View
sys/vm/vm_map.c
Show First 20 Lines • Show All 1,479 Lines • ▼ Show 20 Lines | vm_map_fixed(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | ||||
} else { | } else { | ||||
result = vm_map_insert(map, object, offset, start, end, | result = vm_map_insert(map, object, offset, start, end, | ||||
prot, max, cow); | prot, max, cow); | ||||
} | } | ||||
vm_map_unlock(map); | vm_map_unlock(map); | ||||
return (result); | return (result); | ||||
} | } | ||||
static const int aslr_pages_rnd_64[2] = {0x1000, 0x10}; | |||||
static const int aslr_pages_rnd_32[2] = {0x100, 0x4}; | |||||
static int coalesce_anon = 1; | |||||
emaste: should this make reference to "tries" or "retries" or such? | |||||
SYSCTL_INT(_vm, OID_AUTO, coalesce_anon, CTLFLAG_RW, | |||||
&coalesce_anon, 0, | |||||
Not Done Inline ActionsMissing description. op: Missing description. | |||||
Done Inline ActionsI think "coalesce_anon" or "merge_anon" might be a better name, but I don't feel too strongly about it. As Alan pointed out, this merging has benefits independent of ASR; perhaps we should make it orthogonal to any randomization that we may do? markj: I think "coalesce_anon" or "merge_anon" might be a better name, but I don't feel too strongly… | |||||
Done Inline ActionsOk, moved it out of pure alsr control. Now, combined with the retry change, it should be usable alone (I hope so). kib: Ok, moved it out of pure alsr control. Now, combined with the retry change, it should be… | |||||
Done Inline ActionsSorry for being indecisive. Thinking some more, I think "clustering" actually makes more sense than "coalescing." Coalescing is the process of bringing together multiple entities that were previously separate, but in this case, the anonymous mappings are not separate to begin with. markj: Sorry for being indecisive. Thinking some more, I think "clustering" actually makes more sense… | |||||
Not Done Inline Actions@markj are you suggesting even committing the coalescing separately? emaste: @markj are you suggesting even committing the coalescing separately? | |||||
"Coalesce anonymous mappings"); | |||||
#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31) | |||||
static bool | |||||
Not Done Inline ActionsHere too. op: Here too. | |||||
Not Done Inline ActionsMaybe "coalesce anonymous mappings"? And coalesce_anon instead of collapse_anon? emaste: Maybe "coalesce anonymous mappings"? And coalesce_anon instead of collapse_anon? | |||||
vm_map_find_coalesce_anon(vm_object_t object, vm_prot_t prot, int cow) | |||||
{ | |||||
if (object != NULL || (cow & (MAP_INHERIT_SHARE | MAP_STACK_GROWS_UP | | |||||
MAP_STACK_GROWS_DOWN)) != 0 || prot == PROT_NONE) | |||||
return (false); | |||||
return (coalesce_anon != 0); | |||||
} | |||||
/* | /* | ||||
* Searches for the specified amount of free space in the given map with the | * Searches for the specified amount of free space in the given map with the | ||||
* specified alignment. Performs an address-ordered, first-fit search from | * specified alignment. Performs an address-ordered, first-fit search from | ||||
* the given address "*addr", with an optional upper bound "max_addr". If the | * the given address "*addr", with an optional upper bound "max_addr". If the | ||||
* parameter "alignment" is zero, then the alignment is computed from the | * parameter "alignment" is zero, then the alignment is computed from the | ||||
* given (object, offset) pair so as to enable the greatest possible use of | * given (object, offset) pair so as to enable the greatest possible use of | ||||
* superpage mappings. Returns KERN_SUCCESS and the address of the free space | * superpage mappings. Returns KERN_SUCCESS and the address of the free space | ||||
* in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. | * in "*addr" if successful. Otherwise, returns KERN_NO_SPACE. | ||||
* | * | ||||
* The map must be locked. Initially, there must be at least "length" bytes | * The map must be locked. Initially, there must be at least "length" bytes | ||||
* of free space at the given address. | * of free space at the given address. | ||||
*/ | */ | ||||
static int | static int | ||||
vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | vm_map_alignspace(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | ||||
vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, | vm_offset_t *addr, vm_size_t length, vm_offset_t max_addr, | ||||
vm_offset_t alignment) | vm_offset_t alignment) | ||||
{ | { | ||||
vm_offset_t aligned_addr, free_addr; | vm_offset_t aligned_addr, free_addr; | ||||
VM_MAP_ASSERT_LOCKED(map); | VM_MAP_ASSERT_LOCKED(map); | ||||
free_addr = *addr; | free_addr = *addr; | ||||
KASSERT(!vm_map_findspace(map, free_addr, length, addr) && | KASSERT(!vm_map_findspace(map, free_addr, length, addr) && | ||||
free_addr == *addr, ("caller provided insufficient free space")); | free_addr == *addr, ("caller provided insufficient free space")); | ||||
for (;;) { | for (;;) { | ||||
/* | /* | ||||
Not Done Inline ActionsUpdate needed here after rS338370 emaste: Update needed here after rS338370 | |||||
* At the start of every iteration, the free space at address | * At the start of every iteration, the free space at address | ||||
* "*addr" is at least "length" bytes. | * "*addr" is at least "length" bytes. | ||||
*/ | */ | ||||
if (alignment == 0) | if (alignment == 0) | ||||
pmap_align_superpage(object, offset, addr, length); | pmap_align_superpage(object, offset, addr, length); | ||||
else if ((*addr & (alignment - 1)) != 0) { | else if ((*addr & (alignment - 1)) != 0) { | ||||
Done Inline ActionsI'm not sure that this makes sense after r327218. Consider the loop in vm_map_find() which causes do_aslr to decrement to 0. Subsequent iterations of the loop are not affected by this update to *addr. Really, we want to be updating min_addr. markj: I'm not sure that this makes sense after r327218. Consider the loop in vm_map_find() which… | |||||
*addr &= ~(alignment - 1); | *addr &= ~(alignment - 1); | ||||
*addr += alignment; | *addr += alignment; | ||||
} | } | ||||
aligned_addr = *addr; | aligned_addr = *addr; | ||||
if (aligned_addr == free_addr) { | if (aligned_addr == free_addr) { | ||||
/* | /* | ||||
* Alignment did not change "*addr", so "*addr" must | * Alignment did not change "*addr", so "*addr" must | ||||
* still provide sufficient free space. | * still provide sufficient free space. | ||||
Show All 32 Lines | |||||
* prior to making call to account for the new entry. | * prior to making call to account for the new entry. | ||||
*/ | */ | ||||
int | int | ||||
vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | ||||
vm_offset_t *addr, /* IN/OUT */ | vm_offset_t *addr, /* IN/OUT */ | ||||
vm_size_t length, vm_offset_t max_addr, int find_space, | vm_size_t length, vm_offset_t max_addr, int find_space, | ||||
vm_prot_t prot, vm_prot_t max, int cow) | vm_prot_t prot, vm_prot_t max, int cow) | ||||
{ | { | ||||
vm_offset_t alignment, min_addr; | vm_offset_t alignment, curr_min_addr, min_addr; | ||||
int rv; | int pidx, preserve, rv, try; | ||||
bool anon, do_aslr, en_aslr, update_anon; | |||||
KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || | KASSERT((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) == 0 || | ||||
object == NULL, | object == NULL, | ||||
("vm_map_find: non-NULL backing object for stack")); | ("vm_map_find: non-NULL backing object for stack")); | ||||
if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || | if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || | ||||
(object->flags & OBJ_COLORED) == 0)) | (object->flags & OBJ_COLORED) == 0)) | ||||
find_space = VMFS_ANY_SPACE; | find_space = VMFS_ANY_SPACE; | ||||
if (find_space >> 8 != 0) { | if (find_space >> 8 != 0) { | ||||
KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); | KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); | ||||
alignment = (vm_offset_t)1 << (find_space >> 8); | alignment = (vm_offset_t)1 << (find_space >> 8); | ||||
} else | } else | ||||
alignment = 0; | alignment = 0; | ||||
en_aslr = do_aslr = (map->flags & MAP_ASLR) != 0; | |||||
update_anon = anon = vm_map_find_coalesce_anon(object, prot, cow); | |||||
Not Done Inline ActionsI noticed that we are coalescing mappings in pipe_map. Is there any advantage to be gained by doing this? As a downside, I think the coalescing increases page table usage, especially since most pipe_map mappings are the same size. markj: I noticed that we are coalescing mappings in pipe_map. Is there any advantage to be gained by… | |||||
Done Inline ActionsI don't think we should update anon_loc if max_addr is specified (e.g., MAP_32BIT was passed). markj: I don't think we should update anon_loc if max_addr is specified (e.g., MAP_32BIT was passed). | |||||
curr_min_addr = min_addr = *addr; | |||||
if (en_aslr && min_addr == 0 && !anon && find_space != VMFS_NO_SPACE && | |||||
Not Done Inline ActionsI think this is still problematic. Consider an application which dlopen()s many small libraries (so anon coalescing isn't relevant). We always start the search at the same point in the address space. vm_map_find() performs a first-fit search and will thus return successive regions corresponding to the gaps between already-mapped objects. However, these gaps may not be large enough to satisfy vm_map_find_aslr_adjust(), so we end up giving up the search. markj: I think this is still problematic. Consider an application which dlopen()s many small libraries… | |||||
Done Inline ActionsThe code should give up the search for the region that allows the randomization part, and fall to the do_aslr == 0 case (if block right under the again: label). There, we would do the vm_map_findspace() from the original min_addr base and return whatever vm_map_findspace() found. So yes, this case would get less rnds than it could be, but the allocation should not fail. And the later part, not failing the allocation which can be satisfied at all in !aslr case, is really the only thing I care about. If user is concerned that this boundary case does not get enough rnd into VAs, he can increase aslr_slopiness by the cost of increased allocation CPU overhead. This is the reason for slopiness to be tunable. kib: The code should give up the search for the region that allows the randomization part, and fall… | |||||
(map->flags & MAP_ASLR_IGNSTART) != 0) | |||||
curr_min_addr = min_addr = vm_map_min(map); | |||||
try = 0; | |||||
vm_map_lock(map); | vm_map_lock(map); | ||||
if (anon) { | |||||
curr_min_addr = map->anon_loc; | |||||
if (curr_min_addr == 0) | |||||
anon = false; | |||||
Done Inline ActionsIMO it would be clearer if you rename "anon" to "coalesce". markj: IMO it would be clearer if you rename "anon" to "coalesce". | |||||
} | |||||
if (find_space != VMFS_NO_SPACE) { | if (find_space != VMFS_NO_SPACE) { | ||||
KASSERT(find_space == VMFS_ANY_SPACE || | KASSERT(find_space == VMFS_ANY_SPACE || | ||||
find_space == VMFS_OPTIMAL_SPACE || | find_space == VMFS_OPTIMAL_SPACE || | ||||
find_space == VMFS_SUPER_SPACE || | find_space == VMFS_SUPER_SPACE || | ||||
alignment != 0, ("unexpected VMFS flag")); | alignment != 0, ("unexpected VMFS flag")); | ||||
min_addr = *addr; | |||||
again: | again: | ||||
if (vm_map_findspace(map, min_addr, length, addr) || | /* | ||||
* No more than two tries, not counting | |||||
Not Done Inline Actions"When creating an anonymous mapping, try clustering with an existing anonymous mapping first." markj: "When creating an anonymous mapping, try clustering with an existing anonymous mapping first." | |||||
* VMFS_OPTIMAL_SPACE->VMFS_ANY_SPACE fallback. | |||||
Done Inline ActionsWon't this case be true any time we attempt an anon mapping? This looks like it should just be if (en_aslr && !do_aslr). markj: Won't this case be true any time we attempt an anon mapping? This looks like it should just be… | |||||
Done Inline ActionsI rewrote (or rather, reformulated) the retry logic to not depend on the combination of anon+do_aslr. Instead, there is the try number which explicitly guide the selection of the applicable choices. kib: I rewrote (or rather, reformulated) the retry logic to not depend on the combination of… | |||||
* First try might apply randomization. | |||||
*/ | |||||
try++; | |||||
Done Inline ActionsThe text is a bit misleading since coalesce == false doesn't imply that coalescing failed. How about: "We make up to two attempts to find address space for a given find_space value. The first attempt may apply randomization or may cluster with an existing anonymous mapping. If this first attempt fails, perform a first-fit search of the available address space." markj: The text is a bit misleading since coalesce == false doesn't imply that coalescing failed. How… | |||||
MPASS(try <= 2); | |||||
if (try == 2) { | |||||
/* | |||||
* Second try: we failed either to find a | |||||
* suitable region for randomizing the | |||||
* allocation, or the anon coalescing. Retry | |||||
* with free run. | |||||
*/ | |||||
anon = false; | |||||
curr_min_addr = (map->flags & MAP_ASLR_IGNSTART) != 0 ? | |||||
vm_map_min(map) : min_addr; | |||||
} | |||||
if (try == 1 && en_aslr && !anon) { | |||||
/* | |||||
* Find space for allocation, including | |||||
Done Inline Actions"... or to cluster with an existing mapping." markj: "... or to cluster with an existing mapping." | |||||
* reserve needed for later randomization. | |||||
*/ | |||||
pidx = MAXPAGESIZES > 1 && pagesizes[1] != 0 && | |||||
(find_space == VMFS_SUPER_SPACE || find_space == | |||||
VMFS_OPTIMAL_SPACE) ? 1 : 0; | |||||
preserve = vm_map_max(map) > MAP_32BIT_MAX_ADDR && | |||||
(max_addr == 0 || max_addr > MAP_32BIT_MAX_ADDR) ? | |||||
Not Done Inline ActionsI don't see how randomization gets applied if anon is true. markj: I don't see how randomization gets applied if `anon` is true. | |||||
Done Inline ActionsI think I fixed that in the new loop construction, by applying the rnd logic for anon case when curr_min_address == 0. Before the first non-coalesced anon mapping got the randomization because previous mappings were. kib: I think I fixed that in the new loop construction, by applying the rnd logic for anon case when… | |||||
Not Done Inline ActionsI think you need to initialize anon_loc to 0 in _vm_map_init() for this to work as intended? Even then, when curr_min_addr == 0, the amount of randomization applied to the initial anon mapping is quite small. For PIEs, libraries are loaded after the (random) base load address, but otherwise, the set of possible initial addresses is quite small. markj: I think you need to initialize anon_loc to 0 in _vm_map_init() for this to work as intended? | |||||
Done Inline ActionsI fixed several more bugs with anon_loc, e.g. copying it on fork. Also I added explicit setting of anon_loc on execution of the ELF binary in 'hard' mode, similar to the interpreter base address selection. I am not sure what do you mean by the amount of randomization. Either rnd is applied or not, if it is applied, then the amount of the entropy is guaranteed to be some. kib: I fixed several more bugs with anon_loc, e.g. copying it on fork. Also I added explicit… | |||||
Not Done Inline ActionsI mean that if the curr_min_addr is not randomized, the amount of entropy added is quite small. In the latest version this is still a problem for non-anonymous mappings in non-PIE binaries: the starting min address is constant (vm_daddr + lim(RLIMIT_DATA)), so the load address of libc.so, for example, can be guessed without much work. I am not sure if this is really a significant problem when the executable's address is not randomized, however. markj: I mean that if the curr_min_addr is not randomized, the amount of entropy added is quite small. | |||||
Done Inline ActionsI am still not sure about this. Do you mean that the amount of entropy allowed by the aslr_pages_rnd_XXX arrays is too small ? kib: I am still not sure about this. Do you mean that the amount of entropy allowed by the… | |||||
Not Done Inline ActionsIndeed, it does not provide nearly as much entropy as the initial randomization of et_dyn_addr for PIEs or anon_loc. Consider that libc.so is mapped with VMFS_OPTIMAL_SPACE, so we will set *addr += (arc4random() % 0x10) * 0x200000; For a non-PIE on amd64 this means that libc.so will get loaded somewhere in [0x800000000, 0x800200000], so the entropy added is quite minimal. PIEs do not have this problem. markj: Indeed, it does not provide nearly as much entropy as the initial randomization of et_dyn_addr… | |||||
Done Inline ActionsYes, this is how I want to keep it now, by disturbing the normal layout as minimal as possible for PoC. On the other hand, since PIE base, ld.elf load address, and now initial anon base are already 'hard' randomized, might be it is indeed does not make sense to keep that part of entropy low. In fact I think we will see after another exp run. kib: Yes, this is how I want to keep it now, by disturbing the normal layout as minimal as possible… | |||||
aslr_pages_rnd_64[pidx] : aslr_pages_rnd_32[pidx]; | |||||
if (vm_map_findspace(map, curr_min_addr, length + | |||||
preserve * pagesizes[pidx], addr) || | |||||
Done Inline ActionsI'd consider calling this a "gap" instead, here and in the code (instead of "preserve"). markj: I'd consider calling this a "gap" instead, here and in the code (instead of "preserve"). | |||||
(max_addr != 0 && *addr + length > max_addr)) { | (max_addr != 0 && *addr + length > max_addr)) { | ||||
curr_min_addr = min_addr; | |||||
goto again; | |||||
} | |||||
/* And randomize the start address. */ | |||||
*addr += (arc4random() % preserve) * pagesizes[pidx]; | |||||
} else if (vm_map_findspace(map, curr_min_addr, length, addr) || | |||||
Done Inline ActionsWhy is it necessary to set curr_min_addr here? We know try == 1, so after following the goto we will assign to curr_min_addr again. markj: Why is it necessary to set curr_min_addr here? We know try == 1, so after following the goto we… | |||||
(max_addr != 0 && *addr + length > max_addr)) { | |||||
Done Inline ActionsI think it would be worth adding a counter for vm_map_findspace() failures, at least for the en_aslr case. markj: I think it would be worth adding a counter for vm_map_findspace() failures, at least for the… | |||||
Done Inline ActionsI added the counter for try == 2 restarts. IMO iti is of limited usefulness because it is global, but I do not think it is worth adding the per-vmspace counters and the whole required infrastructure for it. kib: I added the counter for try == 2 restarts.
IMO iti is of limited usefulness because it is… | |||||
if (anon) | |||||
goto again; | |||||
Not Done Inline ActionsDon't we need to reset curr_min_addr here too? markj: Don't we need to reset curr_min_addr here too? | |||||
Done Inline ActionsThe intent is to make two normal passes without coalescing. Second pass resets curr_min_addr. kib: The intent is to make two normal passes without coalescing. Second pass resets curr_min_addr. | |||||
Not Done Inline Actionsmm, we reset curr_min_addr only if en_aslr is set though. markj: mm, we reset curr_min_addr only if en_aslr is set though. | |||||
rv = KERN_NO_SPACE; | rv = KERN_NO_SPACE; | ||||
goto done; | goto done; | ||||
} | } | ||||
if (find_space != VMFS_ANY_SPACE && | if (find_space != VMFS_ANY_SPACE && | ||||
(rv = vm_map_alignspace(map, object, offset, addr, length, | (rv = vm_map_alignspace(map, object, offset, addr, length, | ||||
max_addr, alignment)) != KERN_SUCCESS) { | max_addr, alignment)) != KERN_SUCCESS) { | ||||
if (find_space == VMFS_OPTIMAL_SPACE) { | if (find_space == VMFS_OPTIMAL_SPACE) { | ||||
find_space = VMFS_ANY_SPACE; | find_space = VMFS_ANY_SPACE; | ||||
curr_min_addr = min_addr; | |||||
anon = vm_map_find_coalesce_anon(object, prot, | |||||
Done Inline ActionsI guess you can just write anon = update_anon instead. markj: I guess you can just write `anon = update_anon` instead. | |||||
cow); | |||||
try = 0; | |||||
goto again; | goto again; | ||||
} | } | ||||
goto done; | goto done; | ||||
} | } | ||||
Not Done Inline ActionsTypo here, you should write ASR in the comment instead of ASLR. op: Typo here, you should write ASR in the comment instead of ASLR.
| |||||
Not Done Inline ActionsThe comment seems to suggest that this is the final step of a 4-step process (A, S, L, R). What about "Randomize the map address if ASLR is active, unless we can coalesce an anon memory request."? emaste: The comment seems to suggest that this is the final step of a 4-step process (A, S, L, R). | |||||
} | } | ||||
if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { | if ((cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP)) != 0) { | ||||
rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, | rv = vm_map_stack_locked(map, *addr, length, sgrowsiz, prot, | ||||
Not Done Inline ActionsIf I'm reading this correctly (and forgive me if I'm not), does this mean that if an attacker puts too much pressure on the VM subsystem (via heap spraying or other attacks), the attacker could effectively disable ASLR? lattera-gmail.com: If I'm reading this correctly (and forgive me if I'm not), does this mean that if an attacker… | |||||
max, cow); | max, cow); | ||||
} else { | } else { | ||||
rv = vm_map_insert(map, object, offset, *addr, *addr + length, | rv = vm_map_insert(map, object, offset, *addr, *addr + length, | ||||
prot, max, cow); | prot, max, cow); | ||||
} | } | ||||
done: | done: | ||||
if (rv == KERN_SUCCESS && update_anon) | |||||
map->anon_loc = *addr + length; | |||||
Done Inline ActionsDoes it make sense to update anon_loc if find_space == VMFS_NO_SPACE? markj: Does it make sense to update anon_loc if `find_space == VMFS_NO_SPACE`? | |||||
vm_map_unlock(map); | vm_map_unlock(map); | ||||
return (rv); | return (rv); | ||||
} | } | ||||
/* | /* | ||||
* vm_map_find_min() is a variant of vm_map_find() that takes an | * vm_map_find_min() is a variant of vm_map_find() that takes an | ||||
* additional parameter (min_addr) and treats the given address | * additional parameter (min_addr) and treats the given address | ||||
* (*addr) differently. Specifically, it treats *addr as a hint | * (*addr) differently. Specifically, it treats *addr as a hint | ||||
* and not as the minimum address where the mapping is created. | * and not as the minimum address where the mapping is created. | ||||
* | * | ||||
* This function works in two phases. First, it tries to | * This function works in two phases. First, it tries to | ||||
* allocate above the hint. If that fails and the hint is | * allocate above the hint. If that fails and the hint is | ||||
* greater than min_addr, it performs a second pass, replacing | * greater than min_addr, it performs a second pass, replacing | ||||
* the hint with min_addr as the minimum address for the | * the hint with min_addr as the minimum address for the | ||||
* allocation. | * allocation. | ||||
*/ | */ | ||||
Not Done Inline ActionsWhat is the performance penalty of calling arc4random() on each call to mmap(NULL, MAP_ANON), sometimes multiple times? lattera-gmail.com: What is the performance penalty of calling arc4random() on each call to mmap(NULL, MAP_ANON)… | |||||
Not Done Inline ActionsHow much does calling arc4random() cost in address space fragmentation on each call to mmap(NULL, MAP_ANON) when coalescing cannot happen? lattera-gmail.com: How much does calling arc4random() cost in address space fragmentation on each call to mmap… | |||||
Not Done Inline ActionsYou should not generate new random number on every mmap call, you should just generate one at exec() time, and apply them all the times when mmap called. The relevant parts from the original ASLR author's design documentation: PaX can apply ASLR to tasks that are created from ELF executables and use ELF libraries. The randomized layout is determined at task creation time in the load_elf_binary() function in fs/binfmt_elf.c where three per task (or more precisely, mm_struct) variables are initialized with random numbers: delta_exec, delta_mmap and delta_stack. The last set of side effects of ASLR is address space fragmentation and entropy pool exhaustion. Since randomization shifts entire ranges of memory, it will also randomly change the gaps between them (which were constant before). This in turn will change the maximum size of memory mappings that will fit in there and applications expecting to be able to create them will fail. Finally, ASLR increases the consumption of the system's entropy pool since every task creation (through the execve() system call) requires some bits of randomness to determine the new address space layout. Depending on the system's threat model however a given implementation can relax the requirements for the quality of this entropy. In particular, if only remote attacks are considered, then ASLR does not need cryptographically secure random bits as a remote attacker cannot observe them (or if he can, he does not need to care about ASLR at all). op: You should not generate new random number on every mmap call, you should just generate one at… | |||||
Not Done Inline Actions
Why is it preferable to have mappings at fixed offsets from each other? Also, "entropy pool exhaustion" is not a relevant concept here. emaste: > You should not generate new random number on every mmap call
Why is it preferable to have… | |||||
Not Done Inline Actions
because that's the 'L' in ASLR :). per-mapping randomization is ASR as mentioned somewhere above already, see also KASLR: An Exercise in Cargo Cult Security. in practice libc and other common and big enough libraries contain a Turing complete gadget set so it makes no difference to an attacker whether everything else is randomized independently or not. on the other hand ASR has the drawback (that @kib incorrectly associated with ASLR) of unnecessarily fragmenting the virtual address space (a real issue on 32 bit archs even back in 2001) and thus increase page table usage for no net gain in security. pageexec_freemail.hu: > Why is it preferable to have mappings at fixed offsets from each other?
because that's the… | |||||
Not Done Inline Actions
That seems like a circular justification. The layout of objects in the address space is being randomized, regardless of whether or not they remain at fixed offsets to each other. There's another axis to consider when we evaluate any change like this and that's the complexity; all else being equal errors are more likely in larger and more complex changes, and that's an argument in favour of a smaller change, regardless of whether or not it has any other benefit. emaste: > because that's the 'L' in ASLR
That seems like a circular justification. The layout of… | |||||
Not Done Inline ActionsCorrectly implementing something which keeps fixed offsets between mappings (same as they happen to occur in the address space layout without any snake oil spread over) is actually not trivial if ever possible. Thing is, some mappings must appear at the ABI-fixed locations. E.g., if you have non-PIE binary, it is mapped where the linker loaded it. Main stack must be contiguously mapped from top of the user memory down, code which crawls around ps_strings expect that etc. The result is that the anchors affect the allocator if you provide tilted hint to the mapping base. And, of course, you cannot just do (hint + offset) mod size, because of that mappings. In other words, so proclaimed 'simple shift' actually is not that simple. Then you have to note that there is very little available shift values for small (32bit) address spaces, when you also must not destroy the superpage-friendly alignment. So while you could somewhat get away with shift on large AS (64bit), although you sacrifice significant portion of the address space to be unused, you cannot get away with it on 32bit. At the end, doing proper randomization of each mapping appear to be both simpler from the architectural PoV (resulting in concise code) and ease to reason (as in, not only observing) to verify correctness. Of course, issue of AS and page tables fragmentation is there, but mitigated by the anon coalescing. Numbers might come. kib: Correctly implementing something which keeps fixed offsets between mappings (same as they… | |||||
Not Done Inline Actions
it is not only possible but trivial, the living example is PaX and its ASLR implementation that hasn't changed in any fundamental ways for 15 years.
there's exactly one such rule that one must obey (MAP_FIXED), everything else can be freely randomized.
no, there's no such ABI rule, in fact it's trivial to map the main stack anywhere in the address space.
that's a bug/feature in your implementation, nothing to do with any ABI 'rule'. fix it and you can move even the main stack around in arbitrary ways (though ASLR still observes the layout in that it tries to keep the primary stack as the highest map during execve, it's just not enforced later).
why is that? have you tried to implement it? have you looked at existing implementations? the thing is, there's nothing complex with region base address randomization, all you need is a per address space random constant to use for address space hole lookups in mmap (see mm_struct.mmap_base and delta_mmap in PaX).
superpage allocations are orthogonal to ASLR. aligned allocations will be aligned with correspondingly less randomization, the rest will be randomized as much as the region base address and previous allocations (all depends on the 'find the requested address space hole' algo) allow.
you could make this claim only after you have implemented ASLR as well, then you'd have a basis for comparison. till then it's pure speculation (unfounded too, as far as i can tell based on my own experience).
what is 'correctness' here? your current code and explicit omission of stack/etc randomization, no words on brute force prevention, no handling of address space exhaustion attacks, etc show that you're very far from anything that i'd consider correct. as for observation, your use of paxtest to show numbers of your ASR implementation is fundamentally wrong because its measurement algo was written specifically for ASLR, it cannot and will not produce correct values for ASR. pageexec_freemail.hu: > Correctly implementing something which keeps fixed offsets between mappings (same as they… | |||||
Not Done Inline Actions
given that i coined the term ASLR in the first place, allow me to know better than you what it actually means. the Layout in ASLR explicitly refers to how a typical process virtual address space is laid out based on regions (roughly main executable+brk, mmap region, stack) and how it is enough to randomize the base address of those regions for the purposes of ASLR. as for complexity, ASLR (as implemented in PaX) would be similar amount of code to what you're discussing here. pageexec_freemail.hu: > The layout of objects in the address space is being randomized, regardless of whether or not… | |||||
int | int | ||||
vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset, | ||||
vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, | vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr, | ||||
vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, | vm_offset_t max_addr, int find_space, vm_prot_t prot, vm_prot_t max, | ||||
int cow) | int cow) | ||||
{ | { | ||||
vm_offset_t hint; | vm_offset_t hint; | ||||
int rv; | int rv; | ||||
▲ Show 20 Lines • Show All 281 Lines • ▼ Show 20 Lines | vm_map_submap( | ||||
vm_map_t submap) | vm_map_t submap) | ||||
{ | { | ||||
vm_map_entry_t entry; | vm_map_entry_t entry; | ||||
int result = KERN_INVALID_ARGUMENT; | int result = KERN_INVALID_ARGUMENT; | ||||
vm_map_lock(map); | vm_map_lock(map); | ||||
VM_MAP_RANGE_CHECK(map, start, end); | VM_MAP_RANGE_CHECK(map, start, end); | ||||
Done Inline ActionsMAP_IS_SUB_MAP, for consistency with MAP_ENTRY_IS_SUB_MAP? markj: MAP_IS_SUB_MAP, for consistency with MAP_ENTRY_IS_SUB_MAP? | |||||
if (vm_map_lookup_entry(map, start, &entry)) { | if (vm_map_lookup_entry(map, start, &entry)) { | ||||
vm_map_clip_start(map, entry, start); | vm_map_clip_start(map, entry, start); | ||||
} else | } else | ||||
entry = entry->next; | entry = entry->next; | ||||
vm_map_clip_end(map, entry, end); | vm_map_clip_end(map, entry, end); | ||||
if ((entry->start == start) && (entry->end == end) && | if ((entry->start == start) && (entry->end == end) && | ||||
((entry->eflags & MAP_ENTRY_COW) == 0) && | ((entry->eflags & MAP_ENTRY_COW) == 0) && | ||||
(entry->object.vm_object == NULL)) { | (entry->object.vm_object == NULL)) { | ||||
entry->object.sub_map = submap; | entry->object.sub_map = submap; | ||||
entry->eflags |= MAP_ENTRY_IS_SUB_MAP; | entry->eflags |= MAP_ENTRY_IS_SUB_MAP; | ||||
result = KERN_SUCCESS; | result = KERN_SUCCESS; | ||||
} | } | ||||
vm_map_unlock(map); | vm_map_unlock(map); | ||||
return (result); | return (result); | ||||
} | } | ||||
Done Inline ActionsDid you mean to clear the flag here? markj: Did you mean to clear the flag here? | |||||
/* | /* | ||||
* The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified | * The maximum number of pages to map if MAP_PREFAULT_PARTIAL is specified | ||||
*/ | */ | ||||
#define MAX_INIT_PT 96 | #define MAX_INIT_PT 96 | ||||
/* | /* | ||||
* vm_map_pmap_enter: | * vm_map_pmap_enter: | ||||
* | * | ||||
▲ Show 20 Lines • Show All 1,206 Lines • ▼ Show 20 Lines | while (entry->start < end) { | ||||
/* | /* | ||||
* Remove mappings for the pages, but only if the | * Remove mappings for the pages, but only if the | ||||
* mappings could exist. For instance, it does not | * mappings could exist. For instance, it does not | ||||
* make sense to call pmap_remove() for guard entries. | * make sense to call pmap_remove() for guard entries. | ||||
*/ | */ | ||||
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || | if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0 || | ||||
entry->object.vm_object != NULL) | entry->object.vm_object != NULL) | ||||
pmap_remove(map->pmap, entry->start, entry->end); | pmap_remove(map->pmap, entry->start, entry->end); | ||||
if (entry->end == map->anon_loc) | |||||
map->anon_loc = entry->prev->end; | |||||
/* | /* | ||||
* Delete the entry only after removing all pmap | * Delete the entry only after removing all pmap | ||||
* entries pointing to its pages. (Otherwise, its | * entries pointing to its pages. (Otherwise, its | ||||
* page frames may be reallocated, and any modify bits | * page frames may be reallocated, and any modify bits | ||||
* will be set in the wrong object!) | * will be set in the wrong object!) | ||||
*/ | */ | ||||
vm_map_entry_delete(map, entry); | vm_map_entry_delete(map, entry); | ||||
▲ Show 20 Lines • Show All 1,269 Lines • Show Last 20 Lines |
should this make reference to "tries" or "retries" or such?