Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F156921744
D6085.id17652.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
9 KB
Referenced Files
None
Subscribers
None
D6085.id17652.diff
View Options
Index: sys/vm/vm_fault.c
===================================================================
--- sys/vm/vm_fault.c
+++ sys/vm/vm_fault.c
@@ -123,6 +123,7 @@
vm_map_t map;
vm_map_entry_t entry;
int lookup_still_valid;
+ int ra_done;
struct vnode *vp;
};
@@ -137,7 +138,12 @@
vm_page_xunbusy(fs->m);
vm_page_lock(fs->m);
- vm_page_deactivate(fs->m);
+ if (fs->m->valid == 0) {
+ if (fs->m->wire_count == 0)
+ vm_page_free(fs->m);
+ } else {
+ vm_page_deactivate(fs->m);
+ }
vm_page_unlock(fs->m);
fs->m = NULL;
}
@@ -292,13 +298,15 @@
struct faultstate fs;
struct vnode *vp;
vm_page_t m;
- int ahead, behind, cluster_offset, error, locked;
+ int ahead, behind, cluster_offset, dead, error, locked, rv;
+ u_char behavior;
hardfault = 0;
growstack = TRUE;
PCPU_INC(cnt.v_vm_faults);
fs.vp = NULL;
faultcount = 0;
+ fs.ra_done = FALSE;
RetryFault:;
@@ -412,7 +420,7 @@
fs.lookup_still_valid = TRUE;
- fs.first_m = NULL;
+ fs.m = fs.first_m = NULL;
/*
* Search for the page at object/offset.
@@ -421,11 +429,20 @@
fs.pindex = fs.first_pindex;
while (TRUE) {
/*
- * If the object is dead, we stop here
+ * If the object is marked for imminent termination,
+ * we retry here, since the collapse pass has raced
+ * with us. Otherwise, if we see terminally dead
+ * object, return fail.
*/
- if (fs.object->flags & OBJ_DEAD) {
+ if ((fs.object->flags & OBJ_DEAD) != 0) {
+ dead = fs.object->type == OBJT_DEAD;
+ if (fs.m != NULL && fs.m != fs.first_m)
+ release_page(&fs);
unlock_and_deallocate(&fs);
- return (KERN_PROTECTION_FAILURE);
+ if (dead)
+ return (KERN_PROTECTION_FAILURE);
+ pause("vmf_de", 1);
+ goto RetryFault;
}
/*
@@ -550,9 +567,18 @@
* at the same time.
*/
if (fs.object->type != OBJT_DEFAULT) {
- int rv;
- u_char behavior = vm_map_entry_behavior(fs.entry);
-
+ if (!fs.lookup_still_valid) {
+ locked = vm_map_trylock_read(fs.map);
+ if (locked)
+ fs.lookup_still_valid = TRUE;
+ if (!locked || fs.map->timestamp !=
+ map_generation) {
+ release_page(&fs);
+ unlock_and_deallocate(&fs);
+ goto RetryFault;
+ }
+ }
+ behavior = vm_map_entry_behavior(fs.entry);
era = fs.entry->read_ahead;
if (behavior == MAP_ENTRY_BEHAV_RANDOM ||
P_KILLED(curproc)) {
@@ -563,7 +589,8 @@
behind = 0;
nera = VM_FAULT_READ_AHEAD_MAX;
ahead = nera;
- if (fs.pindex == fs.entry->next_read)
+ if (fs.pindex == fs.entry->next_read &&
+ !fs.ra_done)
vm_fault_dontneed(&fs, vaddr, ahead);
} else if (fs.pindex == fs.entry->next_read) {
/*
@@ -574,14 +601,17 @@
* x (read ahead min + 1) + read ahead min"
*/
behind = 0;
- nera = VM_FAULT_READ_AHEAD_MIN;
- if (era > 0) {
- nera += era + 1;
- if (nera > VM_FAULT_READ_AHEAD_MAX)
- nera = VM_FAULT_READ_AHEAD_MAX;
+ if (!fs.ra_done) {
+ if (era > 0) {
+ nera += era + 1;
+ if (nera > VM_FAULT_READ_AHEAD_MAX)
+ nera = VM_FAULT_READ_AHEAD_MAX;
+ } else
+ nera = VM_FAULT_READ_AHEAD_MIN;
}
ahead = nera;
- if (era == VM_FAULT_READ_AHEAD_MAX)
+ if (era == VM_FAULT_READ_AHEAD_MAX &&
+ !fs.ra_done)
vm_fault_dontneed(&fs, vaddr, ahead);
} else {
/*
@@ -603,7 +633,18 @@
}
ahead = ulmin(ahead, atop(fs.entry->end - vaddr) - 1);
if (era != nera)
+ /*
+ * Only read-lock on map is held
+ * there. It is fine for other thread
+ * faulting on the same entry to race
+ * with us for this update, causing
+ * some inaccuracy in the read-ahead
+ * heuristic. We do not separate two
+ * different streams of sequential
+ * faults on one entry anyway.
+ */
fs.entry->read_ahead = nera;
+ fs.ra_done = TRUE;
/*
* Call the pager to retrieve the data, if any, after
@@ -924,8 +965,10 @@
* If the page was filled by a pager, update the map entry's
* last read offset.
*
- * XXX The following assignment modifies the map
- * without holding a write lock on it.
+ * The next_read assignment modifies the map without holding a
+ * write lock on it, which is acceptable. See the motivation
+ * in the comment above, before updating the entry->read_ahead
+ * field in the same manner.
*/
if (hardfault)
fs.entry->next_read = fs.pindex + ahead + 1;
Index: sys/vm/vm_meter.c
===================================================================
--- sys/vm/vm_meter.c
+++ sys/vm/vm_meter.c
@@ -89,6 +89,9 @@
CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_loadavg, "S,loadavg",
"Machine loadaverage history");
+static struct sx vmmeter_lock;
+SX_SYSINIT(vmmeter, &vmmeter_lock, "vmmtrx");
+
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
@@ -102,14 +105,13 @@
struct vmspace *vm;
bzero(&total, sizeof(total));
+ sx_xlock(&vmmeter_lock);
/*
* Mark all objects as inactive.
*/
mtx_lock(&vm_object_list_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
- VM_OBJECT_WLOCK(object);
- vm_object_clear_flag(object, OBJ_ACTIVE);
- VM_OBJECT_WUNLOCK(object);
+ object->actmark = FALSE;
}
mtx_unlock(&vm_object_list_mtx);
/*
@@ -168,10 +170,8 @@
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
(object = entry->object.vm_object) == NULL)
continue;
- VM_OBJECT_WLOCK(object);
- vm_object_set_flag(object, OBJ_ACTIVE);
+ object->actmark = TRUE;
paging |= object->paging_in_progress;
- VM_OBJECT_WUNLOCK(object);
}
vm_map_unlock_read(map);
vmspace_free(vm);
@@ -204,7 +204,7 @@
}
total.t_vm += object->size;
total.t_rm += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (object->actmark) {
total.t_avm += object->size;
total.t_arm += object->resident_page_count;
}
@@ -212,13 +212,15 @@
/* shared object */
total.t_vmshr += object->size;
total.t_rmshr += object->resident_page_count;
- if (object->flags & OBJ_ACTIVE) {
+ if (object->actmark) {
total.t_avmshr += object->size;
total.t_armshr += object->resident_page_count;
}
}
+ object->actmark = FALSE;
}
mtx_unlock(&vm_object_list_mtx);
+ sx_xunlock(&vmmeter_lock);
total.t_free = vm_cnt.v_free_count + vm_cnt.v_cache_count;
return (sysctl_handle_opaque(oidp, &total, sizeof(total), req));
}
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -94,6 +94,7 @@
* (c) const until freed
* (o) per-object lock
* (f) free pages queue mutex
+ * (v) vm_meter lock
*
*/
@@ -110,6 +111,7 @@
int shadow_count; /* how many objects that this is a shadow for */
vm_memattr_t memattr; /* default memory attribute for pages */
objtype_t type; /* type of pager */
+ u_char actmark; /* (v) accounted as active */
u_short flags; /* see below */
u_short pg_color; /* (c) color of first page in obj */
u_int paging_in_progress; /* Paging (in or out) so don't collapse or destroy */
@@ -182,7 +184,6 @@
*/
#define OBJ_FICTITIOUS 0x0001 /* (c) contains fictitious pages */
#define OBJ_UNMANAGED 0x0002 /* (c) contains unmanaged pages */
-#define OBJ_ACTIVE 0x0004 /* active objects */
#define OBJ_DEAD 0x0008 /* dead objects (during rundown) */
#define OBJ_NOSPLIT 0x0010 /* dont split this object */
#define OBJ_UMTXDEAD 0x0020 /* umtx pshared was terminated */
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -1584,7 +1584,7 @@
continue;
}
- KASSERT(pp == NULL || pp->valid != 0,
+ KASSERT(pp == NULL || pp->wire_count > 0 || pp->valid != 0,
("unbusy invalid page %p", pp));
if (pp != NULL || vm_pager_has_page(object, new_pindex, NULL,
Index: sys/vm/vm_page.h
===================================================================
--- sys/vm/vm_page.h
+++ sys/vm/vm_page.h
@@ -552,6 +552,7 @@
(m)); \
} while (0)
+/* Note: the lock of page m must not be owned by caller. */
#define vm_page_xunbusy(m) do { \
if (!atomic_cmpset_rel_int(&(m)->busy_lock, \
VPB_SINGLE_EXCLUSIVER, VPB_UNBUSIED)) \
Index: sys/vm/vm_page.c
===================================================================
--- sys/vm/vm_page.c
+++ sys/vm/vm_page.c
@@ -760,17 +760,36 @@
}
static void
+vm_page_xunbusy_locked(vm_page_t m)
+{
+
+ vm_page_assert_xbusied(m);
+ vm_page_assert_locked(m);
+
+ atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+ wakeup(m);
+}
+
+static void
vm_page_xunbusy_maybelocked(vm_page_t m)
{
bool lockacq;
vm_page_assert_xbusied(m);
+ /*
+ * Fast path for unbusy. If it succeed, we know that there
+ * are no waiters, so we do not need a wakeup.
+ */
+ if (atomic_cmpset_rel_int(&m->busy_lock, VPB_SINGLE_EXCLUSIVER,
+ VPB_UNBUSIED))
+ return;
+
lockacq = !mtx_owned(vm_page_lockptr(m));
if (lockacq)
vm_page_lock(m);
- vm_page_flash(m);
- atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
+ /* There is a waiter, do wakeup() instead of vm_page_flash(). */
+ vm_page_xunbusy_locked(m);
if (lockacq)
vm_page_unlock(m);
}
@@ -788,8 +807,7 @@
vm_page_assert_xbusied(m);
vm_page_lock(m);
- atomic_store_rel_int(&m->busy_lock, VPB_UNBUSIED);
- wakeup(m);
+ vm_page_xunbusy_locked(m);
vm_page_unlock(m);
}
Index: sys/vm/vnode_pager.c
===================================================================
--- sys/vm/vnode_pager.c
+++ sys/vm/vnode_pager.c
@@ -169,10 +169,16 @@
/*
* don't double-terminate the object
*/
- if ((obj->flags & OBJ_DEAD) == 0)
+ if ((obj->flags & OBJ_DEAD) == 0) {
vm_object_terminate(obj);
- else
+ } else {
+ if ((obj->flags & OBJ_DISCONNECTWNT) != 0) {
+ vm_object_clear_flag(obj, OBJ_DISCONNECTWNT);
+ wakeup(obj);
+ }
+ vp->v_object = NULL;
VM_OBJECT_WUNLOCK(obj);
+ }
} else {
/*
* Woe to the process that tries to page now :-).
@@ -180,7 +186,7 @@
vm_pager_deallocate(obj);
VM_OBJECT_WUNLOCK(obj);
}
- vp->v_object = NULL;
+ KASSERT(vp->v_object == NULL, ("vp %p obj %p", vp, vp->v_object));
}
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Mon, May 18, 9:18 AM (14 h, 45 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
33241687
Default Alt Text
D6085.id17652.diff (9 KB)
Attached To
Mode
D6085: Collapse/vm_fault races and small fixes for radix insertion failures
Attached
Detach File
Event Timeline
Log In to Comment