Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156847955
D6085.id16794.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
7 KB
Referenced Files
None
Subscribers
None
D6085.id16794.diff
View Options
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -123,6 +123,7 @@
vm_map_t map;
vm_map_entry_t entry;
int lookup_still_valid;
+ int ra_done;
struct vnode *vp;
};
@@ -137,7 +138,12 @@
vm_page_xunbusy(fs->m);
vm_page_lock(fs->m);
- vm_page_deactivate(fs->m);
+ if (fs->m->valid == 0) {
+ if (fs->m->wire_count == 0)
+ vm_page_free(fs->m);
+ } else {
+ vm_page_deactivate(fs->m);
+ }
vm_page_unlock(fs->m);
fs->m = NULL;
}
@@ -292,13 +298,15 @@
struct faultstate fs;
struct vnode *vp;
vm_page_t m;
- int ahead, behind, cluster_offset, error, locked;
+ int ahead, behind, cluster_offset, dead, error, locked, rv;
+ u_char behavior;
hardfault = 0;
growstack = TRUE;
PCPU_INC(cnt.v_vm_faults);
fs.vp = NULL;
faultcount = 0;
+ fs.ra_done = FALSE;
RetryFault:;
@@ -412,7 +420,7 @@
fs.lookup_still_valid = TRUE;
- fs.first_m = NULL;
+ fs.m = fs.first_m = NULL;
/*
* Search for the page at object/offset.
@@ -421,11 +429,20 @@
fs.pindex = fs.first_pindex;
while (TRUE) {
/*
- * If the object is dead, we stop here
+ * If the object is marked for imminent termination,
+ * we retry here, since the collapse pass has raced
+ * with us. Otherwise, if we see terminally dead
+ * object, return fail.
*/
- if (fs.object->flags & OBJ_DEAD) {
+ if ((fs.object->flags & OBJ_DEAD) != 0) {
+ dead = fs.object->type == OBJT_DEAD;
+ if (fs.m != NULL && fs.m != fs.first_m)
+ release_page(&fs);
unlock_and_deallocate(&fs);
- return (KERN_PROTECTION_FAILURE);
+ if (dead)
+ return (KERN_PROTECTION_FAILURE);
+ pause("vmf_de", 1);
+ goto RetryFault;
}
/*
@@ -550,9 +567,18 @@
* at the same time.
*/
if (fs.object->type != OBJT_DEFAULT) {
- int rv;
- u_char behavior = vm_map_entry_behavior(fs.entry);
-
+ if (!fs.lookup_still_valid) {
+ locked = vm_map_trylock_read(fs.map);
+ if (locked)
+ fs.lookup_still_valid = TRUE;
+ if (!locked || fs.map->timestamp !=
+ map_generation) {
+ release_page(&fs);
+ unlock_and_deallocate(&fs);
+ goto RetryFault;
+ }
+ }
+ behavior = vm_map_entry_behavior(fs.entry);
era = fs.entry->read_ahead;
if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
P_KILLED(curproc)) {
@@ -563,7 +589,8 @@
behind = 0;
nera = VM_FAULT_READ_AHEAD_MAX;
ahead = nera;
- if (fs.pindex == fs.entry->next_read)
+ if (fs.pindex == fs.entry->next_read &&
+ !fs.ra_done)
vm_fault_dontneed(&fs, vaddr, ahead);
} else if (fs.pindex == fs.entry->next_read) {
/*
@@ -574,14 +601,17 @@
* x (read ahead min + 1) + read ahead min"
*/
behind = 0;
- nera = VM_FAULT_READ_AHEAD_MIN;
- if (era > 0) {
- nera += era + 1;
- if (nera > VM_FAULT_READ_AHEAD_MAX)
- nera = VM_FAULT_READ_AHEAD_MAX;
+ if (!fs.ra_done) {
+ if (era > 0) {
+ nera += era + 1;
+ if (nera > VM_FAULT_READ_AHEAD_MAX)
+ nera = VM_FAULT_READ_AHEAD_MAX;
+ } else
+ nera = VM_FAULT_READ_AHEAD_MIN;
}
ahead = nera;
- if (era == VM_FAULT_READ_AHEAD_MAX)
+ if (era == VM_FAULT_READ_AHEAD_MAX &&
+ !fs.ra_done)
vm_fault_dontneed(&fs, vaddr, ahead);
} else {
/*
@@ -603,7 +633,18 @@
}
ahead = ulmin(ahead, atop(fs.entry->end - vaddr) - 1);
if (era != nera)
+ /*
+ * Only read-lock on map is held
+ * there. It is fine for other thread
+ * faulting on the same entry to race
+ * with us for this update, causing
+ * some inaccuracy in the read-ahead
+ * heuristic. We do not separate two
+ * different streams of sequential
+ * faults on one entry anyway.
+ */
fs.entry->read_ahead = nera;
+ fs.ra_done = TRUE;
/*
* Call the pager to retrieve the data, if any, after
@@ -813,6 +854,7 @@
if (vm_page_rename(fs.m, fs.first_object,
fs.first_pindex)) {
VM_OBJECT_WUNLOCK(fs.first_object);
+ release_page(&fs);
unlock_and_deallocate(&fs);
goto RetryFault;
}
@@ -931,8 +973,10 @@
* If the page was filled by a pager, update the map entry's
* last read offset.
*
- * XXX The following assignment modifies the map
- * without holding a write lock on it.
+ * The next_read assignment modifies the map without holding a
+ * write lock on it, which is acceptable. See the motivation
+ * in the comment above, before updating the entry->read_ahead
+ * field in the same manner.
*/
if (hardfault)
fs.entry->next_read = fs.pindex + ahead + 1;
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -1584,7 +1584,7 @@
continue;
}
- KASSERT(pp == NULL || pp->valid != 0,
+ KASSERT(pp == NULL || pp->wire_count > 0 || pp->valid != 0,
("unbusy invalid page %p", pp));
if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
@@ -1669,11 +1669,14 @@
void
vm_object_collapse(vm_object_t object)
{
+ vm_object_t backing_object, new_backing_object;
+
VM_OBJECT_ASSERT_WLOCKED(object);
-
- while (TRUE) {
- vm_object_t backing_object;
+ if ((object->flags & OBJ_DEAD) != 0)
+ return;
+ vm_object_pip_add(object, 1);
+ while (TRUE) {
/*
* Verify that the conditions are right for collapse:
*
@@ -1699,14 +1702,14 @@
break;
}
- if (
- object->paging_in_progress != 0 ||
- backing_object->paging_in_progress != 0
- ) {
+ if (object->paging_in_progress > 1 /* one ref is from us */ ||
+ backing_object->paging_in_progress != 0) {
vm_object_qcollapse(object);
VM_OBJECT_WUNLOCK(backing_object);
break;
}
+ vm_object_pip_add(backing_object, 1);
+
/*
* We know that we can either collapse the backing object (if
* the parent is the only reference to it) or (perhaps) have
@@ -1789,6 +1792,7 @@
KASSERT(backing_object->ref_count == 1, (
"backing_object %p was somehow re-referenced during collapse!",
backing_object));
+ vm_object_pip_wakeup(backing_object);
backing_object->type = OBJT_DEAD;
backing_object->ref_count = 0;
VM_OBJECT_WUNLOCK(backing_object);
@@ -1796,14 +1800,13 @@
object_collapses++;
} else {
- vm_object_t new_backing_object;
-
/*
* If we do not entirely shadow the backing object,
* there is nothing we can do so we give up.
*/
if (object->resident_page_count != object->size &&
!vm_object_scan_all_shadowed(object)) {
+ vm_object_pip_wakeup(backing_object);
VM_OBJECT_WUNLOCK(backing_object);
break;
}
@@ -1836,6 +1839,7 @@
* its ref_count was at least 2, it will not vanish.
*/
backing_object->ref_count--;
+ vm_object_pip_wakeup(backing_object);
VM_OBJECT_WUNLOCK(backing_object);
object_bypasses++;
}
@@ -1844,6 +1848,7 @@
* Try again with this object's new backing object.
*/
}
+ vm_object_pip_wakeup(object);
}
/*
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -3290,7 +3290,8 @@
cache_was_empty = vm_radix_is_empty(&object->cache);
if (vm_radix_insert(&object->cache, m)) {
mtx_unlock(&vm_page_queue_free_mtx);
- if (object->resident_page_count == 0)
+ if (object->type == OBJT_VNODE &&
+ object->resident_page_count == 0)
vdrop(object->handle);
m->object = NULL;
vm_page_free(m);
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Sun, May 17, 9:27 PM (10 h, 24 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33210625
Default Alt Text
D6085.id16794.diff (7 KB)
Attached To
Mode
D6085: Collapse/vm_fault races and small fixes for radix insertion failures
Attached
Detach File
Event Timeline
Log In to Comment