Index: sys/vm/vm_fault.c =================================================================== --- sys/vm/vm_fault.c +++ sys/vm/vm_fault.c @@ -123,6 +123,7 @@ vm_map_t map; vm_map_entry_t entry; int lookup_still_valid; + int ra_done; struct vnode *vp; }; @@ -292,13 +293,15 @@ struct faultstate fs; struct vnode *vp; vm_page_t m; - int ahead, behind, cluster_offset, error, locked; + int ahead, behind, cluster_offset, error, locked, rv; + u_char behavior; hardfault = 0; growstack = TRUE; PCPU_INC(cnt.v_vm_faults); fs.vp = NULL; faultcount = 0; + fs.ra_done = FALSE; RetryFault:; @@ -557,9 +560,18 @@ * at the same time. */ if (fs.object->type != OBJT_DEFAULT) { - int rv; - u_char behavior = vm_map_entry_behavior(fs.entry); - + if (!fs.lookup_still_valid) { + locked = vm_map_trylock_read(fs.map); + if (locked) + fs.lookup_still_valid = TRUE; + if (!locked || fs.map->timestamp != + map_generation) { + release_page(&fs); + unlock_and_deallocate(&fs); + goto RetryFault; + } + } + behavior = vm_map_entry_behavior(fs.entry); era = fs.entry->read_ahead; if (behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { @@ -570,7 +582,8 @@ behind = 0; nera = VM_FAULT_READ_AHEAD_MAX; ahead = nera; - if (fs.pindex == fs.entry->next_read) + if (fs.pindex == fs.entry->next_read && + !fs.ra_done) vm_fault_dontneed(&fs, vaddr, ahead); } else if (fs.pindex == fs.entry->next_read) { /* @@ -581,14 +594,17 @@ * x (read ahead min + 1) + read ahead min" */ behind = 0; - nera = VM_FAULT_READ_AHEAD_MIN; - if (era > 0) { - nera += era + 1; - if (nera > VM_FAULT_READ_AHEAD_MAX) - nera = VM_FAULT_READ_AHEAD_MAX; + if (!fs.ra_done) { + if (era > 0) { + nera += era + 1; + if (nera > VM_FAULT_READ_AHEAD_MAX) + nera = VM_FAULT_READ_AHEAD_MAX; + } else + nera = VM_FAULT_READ_AHEAD_MIN; } ahead = nera; - if (era == VM_FAULT_READ_AHEAD_MAX) + if (era == VM_FAULT_READ_AHEAD_MAX && + !fs.ra_done) vm_fault_dontneed(&fs, vaddr, ahead); } else { /* @@ -610,7 +626,18 @@ } ahead = ulmin(ahead, atop(fs.entry->end - vaddr) - 1); if (era != nera) + /* + * Only read-lock on map is held + * there. It is fine for other thread + * faulting on the same entry to race + * with us for this update, causing + * some inaccuracy in the read-ahead + * heuristic. We do not separate two + * different streams of sequential + * faults on one entry anyway. + */ fs.entry->read_ahead = nera; + fs.ra_done = TRUE; /* * Call the pager to retrieve the data, if any, after @@ -931,8 +958,10 @@ * If the page was filled by a pager, update the map entry's * last read offset. * - * XXX The following assignment modifies the map - * without holding a write lock on it. + * The next_read assignment modifies the map without holding a + * write lock on it, which is acceptable. See the motivation + * in the comment above, before updating the entry->read_ahead + * field in the same manner. */ if (hardfault) fs.entry->next_read = fs.pindex + ahead + 1;