Changeset View
Standalone View
vm/vm_fault.c
Context not available. | |||||
vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, | vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, | ||||
int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) | int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) | ||||
{ | { | ||||
vm_page_t m; | vm_page_t m, m_map; | ||||
int rv; | #if defined(__amd64__) && VM_NRESERVLEVEL > 0 | ||||
vm_page_t m_super; | |||||
#endif | |||||
int flags, psind, rv; | |||||
MPASS(fs->vp == NULL); | MPASS(fs->vp == NULL); | ||||
m = vm_page_lookup(fs->first_object, fs->first_pindex); | m = vm_page_lookup(fs->first_object, fs->first_pindex); | ||||
Context not available. | |||||
if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && | if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && | ||||
vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) | vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) | ||||
return (KERN_FAILURE); | return (KERN_FAILURE); | ||||
rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | | m_map = m; | ||||
PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), 0); | psind = 0; | ||||
#if defined(__amd64__) && VM_NRESERVLEVEL > 0 | |||||
if ((m_super = vm_reserv_to_superpage(m)) != NULL && | |||||
rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && | |||||
roundup2(vaddr, pagesizes[m_super->psind]) <= fs->entry->end) { | |||||
kib: Don't you need to check the (vaddr % page index) alignment ? | |||||
alcAuthorUnsubmitted Done Inline ActionsYes. alc: Yes. | |||||
Not Done Inline ActionsMark, I'm pretty confident that the explanation for your assertion failure is the lack of a "+ 1" here. The previous line should round up "vaddr + 1", not "vaddr". alc: Mark, I'm pretty confident that the explanation for your assertion failure is the lack of a "+… | |||||
Not Done Inline ActionsThat seems to have done the trick. :) markj: That seems to have done the trick. :) | |||||
Not Done Inline ActionsYour assertion failure strongly suggests the following rather interesting scenario occurred: You are mmap()ing a large shared library, and the last N pages of code were memory resident, but N is less than 512, so it can't be a superpage mapping. However, the next 512 - N pages of presumably copy-on-write initialized data is also resident. That is how the vm_reserv_to_superpage() check passed. On amd64 there is always a superpage boundary between the end of the code segment and the beginning of the data, so we could use the data as read/execute-only padding to fill out a superpage mapping for the end of the code segment. alc: Your assertion failure strongly suggests the following rather interesting scenario occurred… | |||||
Not Done Inline ActionsI'll note a few things that I observed while debugging yesterday:
markj: I'll note a few things that I observed while debugging yesterday:
- The page triggering the… | |||||
Not Done Inline ActionsI'm a little puzzled at how the bug could have led to an assertion failure with shmat(). The obvious trigger is a situation where a subrange of a vm object gets mapped, and the bug leads to mappings at the pmap level without corresponding mappings at the vm map entry level, so we fail to do a pmap_remove() before vm_page_free(). alc: I'm a little puzzled at how the bug could have led to an assertion failure with shmat(). The… | |||||
Not Done Inline ActionsI don't quite understand it either. I'm planning to dig into this further, FWIW. I haven't seen any segfaults after applying the fix, unlike Peter. markj: I don't quite understand it either. I'm planning to dig into this further, FWIW.
I haven't… | |||||
Not Done Inline ActionsThe shm segments are shared between firefox and Xorg. The segments of interest have size 0x7af000. What appears to happen is that anonymous RW mappings are created in the small region immediately following one of the segments, and they end up being backed by the segment's VM object. If this happens in one of the processes but not the other, and the mapping of the last 2MB of a segment is promoted, I think we can hit the situation you described. markj: The shm segments are shared between firefox and Xorg. The segments of interest have size… | |||||
MPASS(m_super->object == fs->first_object); | |||||
flags = PS_ALL_VALID; | |||||
if ((prot & VM_PROT_WRITE) != 0) { | |||||
flags |= PS_NONE_BUSY; | |||||
if ((fs->first_object->flags & OBJ_UNMANAGED) == 0) | |||||
flags |= PS_ALL_DIRTY; | |||||
kibUnsubmitted Not Done Inline ActionsI am not sure about non-amd64 pmaps. Is the assumption that writeable superpage is dirty amd64-specific ? I understand that this block is under #ifdef amd64, but later the call to vm_fault_dirty() is arch-independend and gated on psind == 0. kib: I am not sure about non-amd64 pmaps. Is the assumption that writeable superpage is dirty amd64… | |||||
alcAuthorUnsubmitted Not Done Inline Actions
I'm pretty confident that the arm v6 pmap is the same. I don't think that the arm64 pmap implements a proper dirty bit. Specifically, I think that any writeable page may be regarded as dirty. alc: > I am not sure about non-amd64 pmaps. Is the assumption that writeable superpage is dirty… | |||||
} | |||||
if (vm_page_ps_test(m_super, flags, m)) { | |||||
m_map = m_super; | |||||
psind = m_super->psind; | |||||
vaddr = rounddown2(vaddr, pagesizes[psind]); | |||||
/* Preset the modified bit for dirty superpages. */ | |||||
if ((flags & PS_ALL_DIRTY) != 0) | |||||
fault_type |= VM_PROT_WRITE; | |||||
} | |||||
} | |||||
#endif | |||||
rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type | | |||||
PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); | |||||
if (rv != KERN_SUCCESS) | if (rv != KERN_SUCCESS) | ||||
return (rv); | return (rv); | ||||
vm_fault_fill_hold(m_hold, m); | vm_fault_fill_hold(m_hold, m); | ||||
vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); | if (psind == 0) | ||||
vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, | |||||
false); | |||||
Done Inline ActionsThis "psind == 0" test is wrong and should be removed. vm_fault_dirty() should always be called. The current fault might well be the one that dirties 'm'. In fact, the whole point of the last argument to vm_page_ps_test() was to handle such situations. alc: This "psind == 0" test is wrong and should be removed. vm_fault_dirty() should always be… | |||||
Not Done Inline ActionsIt took some time to convince myself that vm_fault_dirty() there needs only be called on the m and not on the all pages making up the superpage. Perhaps a comment is due there. kib: It took some time to convince myself that vm_fault_dirty() there needs only be called on the m… | |||||
VM_OBJECT_RUNLOCK(fs->first_object); | VM_OBJECT_RUNLOCK(fs->first_object); | ||||
if (!wired) | if (!wired && psind == 0) | ||||
vm_fault_prefault(fs, vaddr, PFBAK, PFFOR); | vm_fault_prefault(fs, vaddr, PFBAK, PFFOR); | ||||
vm_map_lookup_done(fs->map, fs->entry); | vm_map_lookup_done(fs->map, fs->entry); | ||||
curthread->td_ru.ru_minflt++; | curthread->td_ru.ru_minflt++; | ||||
Context not available. |
Don't you need to check the (vaddr % page index) alignment ?