Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F112024880
D11435.id30807.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
24 KB
Referenced Files
None
Subscribers
None
D11435.id30807.diff
View Options
Index: sys/kern/subr_pctrie.c
===================================================================
--- sys/kern/subr_pctrie.c
+++ sys/kern/subr_pctrie.c
@@ -679,6 +679,13 @@
pctrie_reclaim_allnodes_int(ptree, root, freefn);
}
+void
+pctrie_init(struct pctrie *ptree)
+{
+
+ ptree->pt_root = 0;
+}
+
#ifdef DDB
/*
* Show details about the given node.
Index: sys/sys/pctrie.h
===================================================================
--- sys/sys/pctrie.h
+++ sys/sys/pctrie.h
@@ -76,7 +76,7 @@
return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \
} \
\
-static __inline struct type * \
+static __inline __unused struct type * \
name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \
{ \
\
@@ -107,6 +107,7 @@
typedef void *(*pctrie_alloc_t)(struct pctrie *ptree);
typedef void (*pctrie_free_t)(struct pctrie *ptree, void *node);
+void pctrie_init(struct pctrie *ptree);
int pctrie_insert(struct pctrie *ptree, uint64_t *val,
pctrie_alloc_t allocfn);
uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key);
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -87,6 +87,7 @@
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
+#include <sys/pctrie.h>
#include <sys/racct.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
@@ -127,22 +128,18 @@
#define SWB_NPAGES MAX_PAGEOUT_CLUSTER
#endif
-/*
- * The swblock structure maps an object and a small, fixed-size range
- * of page indices to disk addresses within a swap area.
- * The collection of these mappings is implemented as a hash table.
- * Unused disk addresses within a swap area are allocated and managed
- * using a blist.
- */
-#define SWAP_META_PAGES 32
+#define SWAP_META_PAGES (SWB_NPAGES * 2)
#define SWAP_META_MASK (SWAP_META_PAGES - 1)
-struct swblock {
- struct swblock *swb_hnext;
- vm_object_t swb_object;
- vm_pindex_t swb_index;
- int swb_count;
- daddr_t swb_pages[SWAP_META_PAGES];
+/*
+ * The swblk structure contains the address of the on-disk swap block
+ * for the given page index. The collection of these mappings for the
+ * object is implemented as a pc-trie. Unused disk addresses within a
+ * swap area are allocated and managed using a blist.
+ */
+struct swblk {
+ vm_pindex_t p;
+ daddr_t d[SWAP_META_PAGES];
};
static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data");
@@ -328,10 +325,6 @@
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
"Maximum running async swap ops");
-static struct swblock **swhash;
-static int swhash_mask;
-static struct mtx swhash_mtx;
-
static struct sx sw_alloc_sx;
/*
@@ -345,7 +338,8 @@
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
static struct pagerlst swap_pager_object_list[NOBJLISTS];
-static uma_zone_t swap_zone;
+static uma_zone_t swblk_zone;
+static uma_zone_t swpctrie_zone;
/*
* pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure
@@ -403,12 +397,28 @@
/*
* Metadata functions
*/
-static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index);
static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t);
static void swp_pager_meta_free_all(vm_object_t);
static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int);
+static void *
+swblk_trie_alloc(struct pctrie *ptree)
+{
+
+ return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ?
+ M_USE_RESERVE : 0)));
+}
+
+static void
+swblk_trie_free(struct pctrie *ptree, void *node)
+{
+
+ uma_zfree(swpctrie_zone, node);
+}
+
+PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free);
+
/*
* SWP_SIZECHECK() - update swap_pager_full indication
*
@@ -437,33 +447,6 @@
}
/*
- * SWP_PAGER_HASH() - hash swap meta data
- *
- * This is an helper function which hashes the swapblk given
- * the object and page index. It returns a pointer to a pointer
- * to the object, or a pointer to a NULL pointer if it could not
- * find a swapblk.
- */
-static struct swblock **
-swp_pager_hash(vm_object_t object, vm_pindex_t index)
-{
- struct swblock **pswap;
- struct swblock *swap;
-
- index &= ~(vm_pindex_t)SWAP_META_MASK;
- pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask];
- while ((swap = *pswap) != NULL) {
- if (swap->swb_object == object &&
- swap->swb_index == index
- ) {
- break;
- }
- pswap = &swap->swb_hnext;
- }
- return (pswap);
-}
-
-/*
* SWAP_PAGER_INIT() - initialize the swap pager!
*
* Expected to be started from system init. NOTE: This code is run
@@ -528,21 +511,26 @@
mtx_unlock(&pbuf_mtx);
/*
- * Initialize our zone. Right now I'm just guessing on the number
- * we need based on the number of pages in the system. Each swblock
- * can hold 32 pages, so this is probably overkill. This reservation
- * is typically limited to around 32MB by default.
+ * Initialize our zone, guessing on the number we need based
+ * on the number of pages in the system. This reservation is
+ * typically limited to around 32MB by default.
*/
n = vm_cnt.v_page_count / 2;
- if (maxswzone && n > maxswzone / sizeof(struct swblock))
- n = maxswzone / sizeof(struct swblock);
+ if (maxswzone && n > maxswzone / sizeof(struct swblk))
+ n = maxswzone / sizeof(struct swblk);
+ swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
+ pctrie_zone_init, NULL, UMA_ALIGN_PTR,
+ UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ if (swpctrie_zone == NULL)
+ panic("failed to create swap pctrie zone.");
+ swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
+ NULL, NULL, _Alignof(struct swblk) - 1,
+ UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ if (swblk_zone == NULL)
+ panic("failed to create swap blk zone.");
n2 = n;
- swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
- if (swap_zone == NULL)
- panic("failed to create swap_zone.");
do {
- if (uma_zone_reserve_kva(swap_zone, n))
+ if (uma_zone_reserve_kva(swblk_zone, n))
break;
/*
* if the allocation failed, try a zone two thirds the
@@ -551,24 +539,12 @@
n -= ((n + 2) / 3);
} while (n > 0);
if (n2 != n)
- printf("Swap zone entries reduced from %lu to %lu.\n", n2, n);
+ printf("Swap blk zone entries reduced from %lu to %lu.\n",
+ n2, n);
swap_maxpages = n * SWAP_META_PAGES;
- swzone = n * sizeof(struct swblock);
- n2 = n;
-
- /*
- * Initialize our meta-data hash table. The swapper does not need to
- * be quite as efficient as the VM system, so we do not use an
- * oversized hash table.
- *
- * n: size of hash table, must be power of 2
- * swhash_mask: hash table index mask
- */
- for (n = 1; n < n2 / 8; n *= 2)
- ;
- swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO);
- swhash_mask = n - 1;
- mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF);
+ swzone = n * sizeof(struct swblk);
+ if (!uma_zone_reserve_kva(swpctrie_zone, n))
+ panic("Cannot reverse swap pctrie zone entries.");
}
static vm_object_t
@@ -582,14 +558,20 @@
return (NULL);
crhold(cred);
}
+
+ /*
+ * un_pager.swp.swp_blks trie is initialized by
+ * vm_object_allocate() to provide correct order of
+ * visibility.
+ */
object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset +
PAGE_MASK + size));
+
object->handle = handle;
if (cred != NULL) {
object->cred = cred;
object->charge = size;
}
- object->un_pager.swp.swp_bcount = 0;
return (object);
}
@@ -1596,29 +1578,25 @@
int
swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
{
- daddr_t index = 0;
- int bcount;
+ struct swblk *sb;
+ vm_pindex_t pi;
int i;
VM_OBJECT_ASSERT_WLOCKED(object);
if (object->type != OBJT_SWAP)
return (0);
- mtx_lock(&swhash_mtx);
- for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
- struct swblock *swap;
-
- if ((swap = *swp_pager_hash(object, index)) != NULL) {
- for (i = 0; i < SWAP_META_PAGES; ++i) {
- if (swp_pager_isondev(swap->swb_pages[i], sp)) {
- mtx_unlock(&swhash_mtx);
- return (1);
- }
- }
+ for (pi = 0;; pi = sb->p + SWAP_META_PAGES) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
+ continue;
+ if (swp_pager_isondev(sb->d[i], sp))
+ return (1);
}
- index += SWAP_META_PAGES;
}
- mtx_unlock(&swhash_mtx);
return (0);
}
@@ -1688,50 +1666,59 @@
static void
swap_pager_swapoff(struct swdevt *sp)
{
- struct swblock *swap;
- vm_object_t locked_obj, object;
- vm_pindex_t pindex;
- int i, j, retries;
+ struct swblk *sb;
+ vm_object_t object;
+ vm_pindex_t pi;
+ int i, retries;
sx_assert(&swdev_syscall_lock, SA_XLOCKED);
retries = 0;
- locked_obj = NULL;
full_rescan:
- mtx_lock(&swhash_mtx);
- for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
-restart:
- for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) {
- object = swap->swb_object;
- pindex = swap->swb_index;
- for (j = 0; j < SWAP_META_PAGES; ++j) {
- if (!swp_pager_isondev(swap->swb_pages[j], sp))
+ mtx_lock(&vm_object_list_mtx);
+ TAILQ_FOREACH(object, &vm_object_list, object_list) {
+ if (object->type != OBJT_SWAP)
+ continue;
+ mtx_unlock(&vm_object_list_mtx);
+ /* Depends on type-stability. */
+ VM_OBJECT_WLOCK(object);
+
+ /*
+ * Dead objects are eventually terminated on their own.
+ */
+ if ((object->flags & OBJ_DEAD) != 0)
+ goto next_obj;
+
+ /*
+ * Sync with fences placed after pctrie
+ * initialization. We must not access pctrie below
+ * unless we checked that our object is swap and not
+ * dead.
+ */
+ atomic_thread_fence_acq();
+ if (object->type != OBJT_SWAP)
+ goto next_obj;
+
+ for (pi = 0;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(
+ &object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ pi = sb->p + SWAP_META_PAGES;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
continue;
- if (locked_obj != object) {
- if (locked_obj != NULL)
- VM_OBJECT_WUNLOCK(locked_obj);
- locked_obj = object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
- mtx_unlock(&swhash_mtx);
- /* Depends on type-stability. */
- VM_OBJECT_WLOCK(object);
- mtx_lock(&swhash_mtx);
- goto restart;
- }
- }
- MPASS(locked_obj == object);
- mtx_unlock(&swhash_mtx);
- swp_pager_force_pagein(object, pindex + j);
- mtx_lock(&swhash_mtx);
- goto restart;
+ if (swp_pager_isondev(sb->d[i], sp))
+ swp_pager_force_pagein(object,
+ sb->p + i);
}
}
+next_obj:
+ VM_OBJECT_WUNLOCK(object);
+ mtx_lock(&vm_object_list_mtx);
}
- mtx_unlock(&swhash_mtx);
- if (locked_obj != NULL) {
- VM_OBJECT_WUNLOCK(locked_obj);
- locked_obj = NULL;
- }
+ mtx_unlock(&vm_object_list_mtx);
+
if (sp->sw_used) {
/*
* Objects may be locked or paging to the device being
@@ -1775,85 +1762,88 @@
static void
swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
{
- static volatile int exhausted;
- struct swblock *swap;
- struct swblock **pswap;
- int idx;
+ static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
+ struct swblk *sb;
+ vm_pindex_t modpi, rdpi;
+ int error, i;
VM_OBJECT_ASSERT_WLOCKED(object);
+
/*
* Convert default object to swap object if necessary
*/
if (object->type != OBJT_SWAP) {
+ pctrie_init(&object->un_pager.swp.swp_blks);
+
+ /*
+ * Ensure that swap_pager_swapoff() iteration over
+ * object_list does not see garbage pctrie.
+ */
+ atomic_thread_fence_rel();
+
object->type = OBJT_SWAP;
- object->un_pager.swp.swp_bcount = 0;
KASSERT(object->handle == NULL, ("default pager with handle"));
}
- /*
- * Locate hash entry. If not found create, but if we aren't adding
- * anything just return. If we run out of space in the map we wait
- * and, since the hash table may have changed, retry.
- */
-retry:
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, pindex);
-
- if ((swap = *pswap) == NULL) {
- int i;
-
+ rdpi = rounddown(pindex, SWAP_META_PAGES);
+ sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi);
+ if (sb == NULL) {
if (swapblk == SWAPBLK_NONE)
- goto done;
-
- swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT |
- (curproc == pageproc ? M_USE_RESERVE : 0));
- if (swap == NULL) {
- mtx_unlock(&swhash_mtx);
+ return;
+ for (;;) {
+ sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc ==
+ pageproc ? M_USE_RESERVE : 0));
+ if (sb != NULL) {
+ sb->p = rdpi;
+ for (i = 0; i < SWAP_META_PAGES; i++)
+ sb->d[i] = SWAPBLK_NONE;
+ if (atomic_cmpset_int(&swblk_zone_exhausted,
+ 1, 0))
+ printf("swblk zone ok\n");
+ break;
+ }
VM_OBJECT_WUNLOCK(object);
- if (uma_zone_exhausted(swap_zone)) {
- if (atomic_cmpset_int(&exhausted, 0, 1))
- printf("swap zone exhausted, "
+ if (uma_zone_exhausted(swblk_zone)) {
+ if (atomic_cmpset_int(&swblk_zone_exhausted,
+ 0, 1))
+ printf("swap blk zone exhausted, "
"increase kern.maxswzone\n");
vm_pageout_oom(VM_OOM_SWAPZ);
- pause("swzonex", 10);
+ pause("swzonxb", 10);
+ } else
+ VM_WAIT;
+ VM_OBJECT_WLOCK(object);
+ }
+ for (;;) {
+ error = SWAP_PCTRIE_INSERT(
+ &object->un_pager.swp.swp_blks, sb);
+ if (error == 0) {
+ if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+ 1, 0))
+ printf("swpctrie zone ok\n");
+ break;
+ }
+ VM_OBJECT_WUNLOCK(object);
+ if (uma_zone_exhausted(swpctrie_zone)) {
+ if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+ 0, 1))
+ printf("swap pctrie zone exhausted, "
+ "increase kern.maxswzone\n");
+ vm_pageout_oom(VM_OOM_SWAPZ);
+ pause("swzonxp", 10);
} else
VM_WAIT;
VM_OBJECT_WLOCK(object);
- goto retry;
}
-
- if (atomic_cmpset_int(&exhausted, 1, 0))
- printf("swap zone ok\n");
-
- swap->swb_hnext = NULL;
- swap->swb_object = object;
- swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK;
- swap->swb_count = 0;
-
- ++object->un_pager.swp.swp_bcount;
-
- for (i = 0; i < SWAP_META_PAGES; ++i)
- swap->swb_pages[i] = SWAPBLK_NONE;
- }
-
- /*
- * Delete prior contents of metadata
- */
- idx = pindex & SWAP_META_MASK;
-
- if (swap->swb_pages[idx] != SWAPBLK_NONE) {
- swp_pager_freeswapspace(swap->swb_pages[idx], 1);
- --swap->swb_count;
}
+ MPASS(sb->p == rdpi);
- /*
- * Enter block into metadata
- */
- swap->swb_pages[idx] = swapblk;
- if (swapblk != SWAPBLK_NONE)
- ++swap->swb_count;
-done:
- mtx_unlock(&swhash_mtx);
+ modpi = pindex % SWAP_META_PAGES;
+ /* Delete prior contents of metadata. */
+ if (sb->d[modpi] != SWAPBLK_NONE)
+ swp_pager_freeswapspace(sb->d[modpi], 1);
+ /* Enter block into metadata. */
+ sb->d[modpi] = swapblk;
}
/*
@@ -1867,42 +1857,40 @@
* with resident pages.
*/
static void
-swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count)
+swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count)
{
- struct swblock **pswap, *swap;
- vm_pindex_t c;
- daddr_t v;
- int n, sidx;
+ struct swblk *sb;
+ vm_pindex_t last;
+ int i;
+ bool empty;
VM_OBJECT_ASSERT_LOCKED(object);
if (object->type != OBJT_SWAP || count == 0)
return;
- mtx_lock(&swhash_mtx);
- for (c = 0; c < count;) {
- pswap = swp_pager_hash(object, index);
- sidx = index & SWAP_META_MASK;
- n = SWAP_META_PAGES - sidx;
- index += n;
- if ((swap = *pswap) == NULL) {
- c += n;
- continue;
- }
- for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) {
- if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE)
+ last = pindex + count - 1;
+ for (;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL || sb->p > last)
+ break;
+ empty = true;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
continue;
- swp_pager_freeswapspace(v, 1);
- swap->swb_pages[sidx] = SWAPBLK_NONE;
- if (--swap->swb_count == 0) {
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
- c += SWAP_META_PAGES - sidx;
- break;
- }
+ if (pindex <= sb->p + i && sb->p + i <= last) {
+ swp_pager_freeswapspace(sb->d[i], 1);
+ sb->d[i] = SWAPBLK_NONE;
+ } else
+ empty = false;
+ }
+ pindex = sb->p + SWAP_META_PAGES;
+ if (empty) {
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+ sb->p);
+ uma_zfree(swblk_zone, sb);
}
}
- mtx_unlock(&swhash_mtx);
}
/*
@@ -1914,36 +1902,27 @@
static void
swp_pager_meta_free_all(vm_object_t object)
{
- struct swblock **pswap, *swap;
- vm_pindex_t index;
- daddr_t v;
+ struct swblk *sb;
+ vm_pindex_t pindex;
int i;
VM_OBJECT_ASSERT_WLOCKED(object);
if (object->type != OBJT_SWAP)
return;
- index = 0;
- while (object->un_pager.swp.swp_bcount != 0) {
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, index);
- if ((swap = *pswap) != NULL) {
- for (i = 0; i < SWAP_META_PAGES; ++i) {
- v = swap->swb_pages[i];
- if (v != SWAPBLK_NONE) {
- --swap->swb_count;
- swp_pager_freeswapspace(v, 1);
- }
- }
- if (swap->swb_count != 0)
- panic(
- "swap_pager_meta_free_all: swb_count != 0");
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
+ pindex = 0;
+ for (;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ pindex);
+ if (sb == NULL)
+ break;
+ pindex = sb->p + 1;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ swp_pager_freeswapspace(sb->d[i], 1);
}
- mtx_unlock(&swhash_mtx);
- index += SWAP_META_PAGES;
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
+ uma_zfree(swblk_zone, sb);
}
}
@@ -1958,7 +1937,7 @@
* meta-data swapblks.
*
* It is not possible to store invalid swapblks in the swap meta data
- * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
+ * (even a literal 'SWAPBLK_NONE'), so we don't bother checking.
*
* When acting on a busy resident page and paging is in progress, we
* have to wait until paging is complete but otherwise can act on the
@@ -1970,10 +1949,9 @@
static daddr_t
swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags)
{
- struct swblock **pswap;
- struct swblock *swap;
+ struct swblk *sb;
daddr_t r1;
- int idx;
+ int i;
VM_OBJECT_ASSERT_LOCKED(object);
/*
@@ -1983,30 +1961,29 @@
if (object->type != OBJT_SWAP)
return (SWAPBLK_NONE);
- r1 = SWAPBLK_NONE;
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, pindex);
-
- if ((swap = *pswap) != NULL) {
- idx = pindex & SWAP_META_MASK;
- r1 = swap->swb_pages[idx];
-
- if (r1 != SWAPBLK_NONE) {
- if (flags & SWM_FREE) {
- swp_pager_freeswapspace(r1, 1);
- r1 = SWAPBLK_NONE;
- }
- if (flags & (SWM_FREE|SWM_POP)) {
- swap->swb_pages[idx] = SWAPBLK_NONE;
- if (--swap->swb_count == 0) {
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
- }
- }
+ sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (SWAPBLK_NONE);
+ r1 = sb->d[pindex % SWAP_META_PAGES];
+ if (r1 == SWAPBLK_NONE)
+ return (SWAPBLK_NONE);
+ if ((flags & (SWM_FREE | SWM_POP)) != 0) {
+ sb->d[pindex % SWAP_META_PAGES] = SWAPBLK_NONE;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ break;
+ }
+ if (i == SWAP_META_PAGES) {
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ uma_zfree(swblk_zone, sb);
}
}
- mtx_unlock(&swhash_mtx);
+ if ((flags & SWM_FREE) != 0) {
+ swp_pager_freeswapspace(r1, 1);
+ r1 = SWAPBLK_NONE;
+ }
return (r1);
}
@@ -2020,32 +1997,36 @@
vm_pindex_t
swap_pager_find_least(vm_object_t object, vm_pindex_t pindex)
{
- struct swblock **pswap, *swap;
- vm_pindex_t i, j, lim;
- int idx;
+ struct swblk *sb;
+ int i;
VM_OBJECT_ASSERT_LOCKED(object);
- if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0)
+ if (object->type != OBJT_SWAP)
return (object->size);
- mtx_lock(&swhash_mtx);
- for (j = pindex; j < object->size; j = lim) {
- pswap = swp_pager_hash(object, j);
- lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES);
- if (lim > object->size)
- lim = object->size;
- if ((swap = *pswap) != NULL) {
- for (idx = j & SWAP_META_MASK, i = j; i < lim;
- i++, idx++) {
- if (swap->swb_pages[idx] != SWAPBLK_NONE)
- goto found;
- }
- }
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (object->size);
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->p + i >= pindex && sb->d[i] != SWAPBLK_NONE)
+ return (sb->p + i);
+ }
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ roundup(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (object->size);
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ return (sb->p + i);
}
- i = object->size;
-found:
- mtx_unlock(&swhash_mtx);
- return (i);
+
+ /*
+ * We get here if a swblk is present in the trie but it
+ * doesn't map any blocks.
+ */
+ MPASS(0);
+ return (object->size);
}
/*
@@ -2081,7 +2062,7 @@
* Swap metadata may not fit in the KVM if we have physical
* memory of >1GB.
*/
- if (swap_zone == NULL) {
+ if (swblk_zone == NULL) {
error = ENOMEM;
goto done;
}
@@ -2125,7 +2106,7 @@
unsigned long maxpages;
/* absolute maximum we can handle assuming 100% efficiency */
- maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES;
+ maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
/* recommend using no more than half that amount */
if (npages > maxpages / 2) {
@@ -2466,7 +2447,10 @@
vm_map_t map;
vm_map_entry_t cur;
vm_object_t object;
+ struct swblk *sb;
+ vm_pindex_t bcount, pi;
long count, n;
+ int i;
map = &vmspace->vm_map;
count = 0;
@@ -2474,12 +2458,22 @@
for (cur = map->header.next; cur != &map->header; cur = cur->next) {
if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
(object = cur->object.vm_object) != NULL) {
+ if (object->type != OBJT_SWAP)
+ continue;
VM_OBJECT_WLOCK(object);
- if (object->type == OBJT_SWAP &&
- object->un_pager.swp.swp_bcount != 0) {
+ if (object->type == OBJT_SWAP) {
+ for (bcount = 0, pi = 0;; pi = sb->p + 1) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(
+ &object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ bcount++;
+ }
+ }
n = (cur->end - cur->start) / PAGE_SIZE;
- count += object->un_pager.swp.swp_bcount *
- SWAP_META_PAGES * n / object->size + 1;
+ count += bcount * n / object->size + 1;
}
VM_OBJECT_WUNLOCK(object);
}
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -70,6 +70,7 @@
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_pctrie.h>
#include <sys/_rwlock.h>
#include <vm/_vm_radix.h>
@@ -151,13 +152,12 @@
* the handle changed and hash-chain
* invalid.
*
- * swp_bcount - number of swap 'swblock' metablocks, each
- * contains up to 16 swapblk assignments.
- * see vm/swap_pager.h
+ * swp_blks - pc-trie of the allocated swap blocks.
+ *
*/
struct {
void *swp_tmpfs;
- int swp_bcount;
+ struct pctrie swp_blks;
} swp;
} un_pager;
struct ucred *cred;
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -73,6 +73,7 @@
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/kernel.h>
+#include <sys/pctrie.h>
#include <sys/sysctl.h>
#include <sys/mutex.h>
#include <sys/proc.h> /* for curproc, pageproc */
@@ -208,6 +209,7 @@
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
+ object->flags = OBJ_DEAD;
mtx_lock(&vm_object_list_mtx);
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
@@ -223,6 +225,16 @@
LIST_INIT(&object->shadow_head);
object->type = type;
+ if (type == OBJT_SWAP)
+ pctrie_init(&object->un_pager.swp.swp_blks);
+
+ /*
+ * Ensure that swap_pager_swapoff() iteration over object_list
+ * sees up to date type and pctrie head if it observed
+ * non-dead object.
+ */
+ atomic_thread_fence_rel();
+
switch (type) {
case OBJT_DEAD:
panic("_vm_object_allocate: can't create OBJT_DEAD");
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Wed, Mar 12, 5:55 PM (4 h, 36 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17123766
Default Alt Text
D11435.id30807.diff (24 KB)
Attached To
Mode
D11435: Replace global swhash in swap pager with per-object trie to track swap blocks assigned to the object pages.
Attached
Detach File
Event Timeline
Log In to Comment