Page MenuHomeFreeBSD

D11435.id30807.diff
No OneTemporary

D11435.id30807.diff

Index: sys/kern/subr_pctrie.c
===================================================================
--- sys/kern/subr_pctrie.c
+++ sys/kern/subr_pctrie.c
@@ -679,6 +679,13 @@
pctrie_reclaim_allnodes_int(ptree, root, freefn);
}
+void
+pctrie_init(struct pctrie *ptree)
+{
+
+ ptree->pt_root = 0;
+}
+
#ifdef DDB
/*
* Show details about the given node.
Index: sys/sys/pctrie.h
===================================================================
--- sys/sys/pctrie.h
+++ sys/sys/pctrie.h
@@ -76,7 +76,7 @@
return name##_PCTRIE_VAL2PTR(pctrie_lookup(ptree, key)); \
} \
\
-static __inline struct type * \
+static __inline __unused struct type * \
name##_PCTRIE_LOOKUP_LE(struct pctrie *ptree, uint64_t key) \
{ \
\
@@ -107,6 +107,7 @@
typedef void *(*pctrie_alloc_t)(struct pctrie *ptree);
typedef void (*pctrie_free_t)(struct pctrie *ptree, void *node);
+void pctrie_init(struct pctrie *ptree);
int pctrie_insert(struct pctrie *ptree, uint64_t *val,
pctrie_alloc_t allocfn);
uint64_t *pctrie_lookup(struct pctrie *ptree, uint64_t key);
Index: sys/vm/swap_pager.c
===================================================================
--- sys/vm/swap_pager.c
+++ sys/vm/swap_pager.c
@@ -87,6 +87,7 @@
#include <sys/namei.h>
#include <sys/vnode.h>
#include <sys/malloc.h>
+#include <sys/pctrie.h>
#include <sys/racct.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
@@ -127,22 +128,18 @@
#define SWB_NPAGES MAX_PAGEOUT_CLUSTER
#endif
-/*
- * The swblock structure maps an object and a small, fixed-size range
- * of page indices to disk addresses within a swap area.
- * The collection of these mappings is implemented as a hash table.
- * Unused disk addresses within a swap area are allocated and managed
- * using a blist.
- */
-#define SWAP_META_PAGES 32
+#define SWAP_META_PAGES (SWB_NPAGES * 2)
#define SWAP_META_MASK (SWAP_META_PAGES - 1)
-struct swblock {
- struct swblock *swb_hnext;
- vm_object_t swb_object;
- vm_pindex_t swb_index;
- int swb_count;
- daddr_t swb_pages[SWAP_META_PAGES];
+/*
+ * The swblk structure contains the address of the on-disk swap block
+ * for the given page index. The collection of these mappings for the
+ * object is implemented as a pc-trie. Unused disk addresses within a
+ * swap area are allocated and managed using a blist.
+ */
+struct swblk {
+ vm_pindex_t p;
+ daddr_t d[SWAP_META_PAGES];
};
static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data");
@@ -328,10 +325,6 @@
CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
"Maximum running async swap ops");
-static struct swblock **swhash;
-static int swhash_mask;
-static struct mtx swhash_mtx;
-
static struct sx sw_alloc_sx;
/*
@@ -345,7 +338,8 @@
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
static struct pagerlst swap_pager_object_list[NOBJLISTS];
-static uma_zone_t swap_zone;
+static uma_zone_t swblk_zone;
+static uma_zone_t swpctrie_zone;
/*
* pagerops for OBJT_SWAP - "swap pager". Some ops are also global procedure
@@ -403,12 +397,28 @@
/*
* Metadata functions
*/
-static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index);
static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t);
static void swp_pager_meta_free_all(vm_object_t);
static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int);
+static void *
+swblk_trie_alloc(struct pctrie *ptree)
+{
+
+ return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ?
+ M_USE_RESERVE : 0)));
+}
+
+static void
+swblk_trie_free(struct pctrie *ptree, void *node)
+{
+
+ uma_zfree(swpctrie_zone, node);
+}
+
+PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free);
+
/*
* SWP_SIZECHECK() - update swap_pager_full indication
*
@@ -437,33 +447,6 @@
}
/*
- * SWP_PAGER_HASH() - hash swap meta data
- *
- * This is an helper function which hashes the swapblk given
- * the object and page index. It returns a pointer to a pointer
- * to the object, or a pointer to a NULL pointer if it could not
- * find a swapblk.
- */
-static struct swblock **
-swp_pager_hash(vm_object_t object, vm_pindex_t index)
-{
- struct swblock **pswap;
- struct swblock *swap;
-
- index &= ~(vm_pindex_t)SWAP_META_MASK;
- pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask];
- while ((swap = *pswap) != NULL) {
- if (swap->swb_object == object &&
- swap->swb_index == index
- ) {
- break;
- }
- pswap = &swap->swb_hnext;
- }
- return (pswap);
-}
-
-/*
* SWAP_PAGER_INIT() - initialize the swap pager!
*
* Expected to be started from system init. NOTE: This code is run
@@ -528,21 +511,26 @@
mtx_unlock(&pbuf_mtx);
/*
- * Initialize our zone. Right now I'm just guessing on the number
- * we need based on the number of pages in the system. Each swblock
- * can hold 32 pages, so this is probably overkill. This reservation
- * is typically limited to around 32MB by default.
+ * Initialize our zone, guessing on the number we need based
+ * on the number of pages in the system. This reservation is
+ * typically limited to around 32MB by default.
*/
n = vm_cnt.v_page_count / 2;
- if (maxswzone && n > maxswzone / sizeof(struct swblock))
- n = maxswzone / sizeof(struct swblock);
+ if (maxswzone && n > maxswzone / sizeof(struct swblk))
+ n = maxswzone / sizeof(struct swblk);
+ swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
+ pctrie_zone_init, NULL, UMA_ALIGN_PTR,
+ UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ if (swpctrie_zone == NULL)
+ panic("failed to create swap pctrie zone.");
+ swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
+ NULL, NULL, _Alignof(struct swblk) - 1,
+ UMA_ZONE_NOFREE | UMA_ZONE_VM);
+ if (swblk_zone == NULL)
+ panic("failed to create swap blk zone.");
n2 = n;
- swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
- if (swap_zone == NULL)
- panic("failed to create swap_zone.");
do {
- if (uma_zone_reserve_kva(swap_zone, n))
+ if (uma_zone_reserve_kva(swblk_zone, n))
break;
/*
* if the allocation failed, try a zone two thirds the
@@ -551,24 +539,12 @@
n -= ((n + 2) / 3);
} while (n > 0);
if (n2 != n)
- printf("Swap zone entries reduced from %lu to %lu.\n", n2, n);
+ printf("Swap blk zone entries reduced from %lu to %lu.\n",
+ n2, n);
swap_maxpages = n * SWAP_META_PAGES;
- swzone = n * sizeof(struct swblock);
- n2 = n;
-
- /*
- * Initialize our meta-data hash table. The swapper does not need to
- * be quite as efficient as the VM system, so we do not use an
- * oversized hash table.
- *
- * n: size of hash table, must be power of 2
- * swhash_mask: hash table index mask
- */
- for (n = 1; n < n2 / 8; n *= 2)
- ;
- swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO);
- swhash_mask = n - 1;
- mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF);
+ swzone = n * sizeof(struct swblk);
+ if (!uma_zone_reserve_kva(swpctrie_zone, n))
+ panic("Cannot reverse swap pctrie zone entries.");
}
static vm_object_t
@@ -582,14 +558,20 @@
return (NULL);
crhold(cred);
}
+
+ /*
+ * un_pager.swp.swp_blks trie is initialized by
+ * vm_object_allocate() to provide correct order of
+ * visibility.
+ */
object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset +
PAGE_MASK + size));
+
object->handle = handle;
if (cred != NULL) {
object->cred = cred;
object->charge = size;
}
- object->un_pager.swp.swp_bcount = 0;
return (object);
}
@@ -1596,29 +1578,25 @@
int
swap_pager_isswapped(vm_object_t object, struct swdevt *sp)
{
- daddr_t index = 0;
- int bcount;
+ struct swblk *sb;
+ vm_pindex_t pi;
int i;
VM_OBJECT_ASSERT_WLOCKED(object);
if (object->type != OBJT_SWAP)
return (0);
- mtx_lock(&swhash_mtx);
- for (bcount = 0; bcount < object->un_pager.swp.swp_bcount; bcount++) {
- struct swblock *swap;
-
- if ((swap = *swp_pager_hash(object, index)) != NULL) {
- for (i = 0; i < SWAP_META_PAGES; ++i) {
- if (swp_pager_isondev(swap->swb_pages[i], sp)) {
- mtx_unlock(&swhash_mtx);
- return (1);
- }
- }
+ for (pi = 0;; pi = sb->p + SWAP_META_PAGES) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
+ continue;
+ if (swp_pager_isondev(sb->d[i], sp))
+ return (1);
}
- index += SWAP_META_PAGES;
}
- mtx_unlock(&swhash_mtx);
return (0);
}
@@ -1688,50 +1666,59 @@
static void
swap_pager_swapoff(struct swdevt *sp)
{
- struct swblock *swap;
- vm_object_t locked_obj, object;
- vm_pindex_t pindex;
- int i, j, retries;
+ struct swblk *sb;
+ vm_object_t object;
+ vm_pindex_t pi;
+ int i, retries;
sx_assert(&swdev_syscall_lock, SA_XLOCKED);
retries = 0;
- locked_obj = NULL;
full_rescan:
- mtx_lock(&swhash_mtx);
- for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
-restart:
- for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) {
- object = swap->swb_object;
- pindex = swap->swb_index;
- for (j = 0; j < SWAP_META_PAGES; ++j) {
- if (!swp_pager_isondev(swap->swb_pages[j], sp))
+ mtx_lock(&vm_object_list_mtx);
+ TAILQ_FOREACH(object, &vm_object_list, object_list) {
+ if (object->type != OBJT_SWAP)
+ continue;
+ mtx_unlock(&vm_object_list_mtx);
+ /* Depends on type-stability. */
+ VM_OBJECT_WLOCK(object);
+
+ /*
+ * Dead objects are eventually terminated on their own.
+ */
+ if ((object->flags & OBJ_DEAD) != 0)
+ goto next_obj;
+
+ /*
+ * Sync with fences placed after pctrie
+ * initialization. We must not access pctrie below
+ * unless we checked that our object is swap and not
+ * dead.
+ */
+ atomic_thread_fence_acq();
+ if (object->type != OBJT_SWAP)
+ goto next_obj;
+
+ for (pi = 0;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(
+ &object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ pi = sb->p + SWAP_META_PAGES;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
continue;
- if (locked_obj != object) {
- if (locked_obj != NULL)
- VM_OBJECT_WUNLOCK(locked_obj);
- locked_obj = object;
- if (!VM_OBJECT_TRYWLOCK(object)) {
- mtx_unlock(&swhash_mtx);
- /* Depends on type-stability. */
- VM_OBJECT_WLOCK(object);
- mtx_lock(&swhash_mtx);
- goto restart;
- }
- }
- MPASS(locked_obj == object);
- mtx_unlock(&swhash_mtx);
- swp_pager_force_pagein(object, pindex + j);
- mtx_lock(&swhash_mtx);
- goto restart;
+ if (swp_pager_isondev(sb->d[i], sp))
+ swp_pager_force_pagein(object,
+ sb->p + i);
}
}
+next_obj:
+ VM_OBJECT_WUNLOCK(object);
+ mtx_lock(&vm_object_list_mtx);
}
- mtx_unlock(&swhash_mtx);
- if (locked_obj != NULL) {
- VM_OBJECT_WUNLOCK(locked_obj);
- locked_obj = NULL;
- }
+ mtx_unlock(&vm_object_list_mtx);
+
if (sp->sw_used) {
/*
* Objects may be locked or paging to the device being
@@ -1775,85 +1762,88 @@
static void
swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
{
- static volatile int exhausted;
- struct swblock *swap;
- struct swblock **pswap;
- int idx;
+ static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
+ struct swblk *sb;
+ vm_pindex_t modpi, rdpi;
+ int error, i;
VM_OBJECT_ASSERT_WLOCKED(object);
+
/*
* Convert default object to swap object if necessary
*/
if (object->type != OBJT_SWAP) {
+ pctrie_init(&object->un_pager.swp.swp_blks);
+
+ /*
+ * Ensure that swap_pager_swapoff() iteration over
+ * object_list does not see garbage pctrie.
+ */
+ atomic_thread_fence_rel();
+
object->type = OBJT_SWAP;
- object->un_pager.swp.swp_bcount = 0;
KASSERT(object->handle == NULL, ("default pager with handle"));
}
- /*
- * Locate hash entry. If not found create, but if we aren't adding
- * anything just return. If we run out of space in the map we wait
- * and, since the hash table may have changed, retry.
- */
-retry:
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, pindex);
-
- if ((swap = *pswap) == NULL) {
- int i;
-
+ rdpi = rounddown(pindex, SWAP_META_PAGES);
+ sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi);
+ if (sb == NULL) {
if (swapblk == SWAPBLK_NONE)
- goto done;
-
- swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT |
- (curproc == pageproc ? M_USE_RESERVE : 0));
- if (swap == NULL) {
- mtx_unlock(&swhash_mtx);
+ return;
+ for (;;) {
+ sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc ==
+ pageproc ? M_USE_RESERVE : 0));
+ if (sb != NULL) {
+ sb->p = rdpi;
+ for (i = 0; i < SWAP_META_PAGES; i++)
+ sb->d[i] = SWAPBLK_NONE;
+ if (atomic_cmpset_int(&swblk_zone_exhausted,
+ 1, 0))
+ printf("swblk zone ok\n");
+ break;
+ }
VM_OBJECT_WUNLOCK(object);
- if (uma_zone_exhausted(swap_zone)) {
- if (atomic_cmpset_int(&exhausted, 0, 1))
- printf("swap zone exhausted, "
+ if (uma_zone_exhausted(swblk_zone)) {
+ if (atomic_cmpset_int(&swblk_zone_exhausted,
+ 0, 1))
+ printf("swap blk zone exhausted, "
"increase kern.maxswzone\n");
vm_pageout_oom(VM_OOM_SWAPZ);
- pause("swzonex", 10);
+ pause("swzonxb", 10);
+ } else
+ VM_WAIT;
+ VM_OBJECT_WLOCK(object);
+ }
+ for (;;) {
+ error = SWAP_PCTRIE_INSERT(
+ &object->un_pager.swp.swp_blks, sb);
+ if (error == 0) {
+ if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+ 1, 0))
+ printf("swpctrie zone ok\n");
+ break;
+ }
+ VM_OBJECT_WUNLOCK(object);
+ if (uma_zone_exhausted(swpctrie_zone)) {
+ if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+ 0, 1))
+ printf("swap pctrie zone exhausted, "
+ "increase kern.maxswzone\n");
+ vm_pageout_oom(VM_OOM_SWAPZ);
+ pause("swzonxp", 10);
} else
VM_WAIT;
VM_OBJECT_WLOCK(object);
- goto retry;
}
-
- if (atomic_cmpset_int(&exhausted, 1, 0))
- printf("swap zone ok\n");
-
- swap->swb_hnext = NULL;
- swap->swb_object = object;
- swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK;
- swap->swb_count = 0;
-
- ++object->un_pager.swp.swp_bcount;
-
- for (i = 0; i < SWAP_META_PAGES; ++i)
- swap->swb_pages[i] = SWAPBLK_NONE;
- }
-
- /*
- * Delete prior contents of metadata
- */
- idx = pindex & SWAP_META_MASK;
-
- if (swap->swb_pages[idx] != SWAPBLK_NONE) {
- swp_pager_freeswapspace(swap->swb_pages[idx], 1);
- --swap->swb_count;
}
+ MPASS(sb->p == rdpi);
- /*
- * Enter block into metadata
- */
- swap->swb_pages[idx] = swapblk;
- if (swapblk != SWAPBLK_NONE)
- ++swap->swb_count;
-done:
- mtx_unlock(&swhash_mtx);
+ modpi = pindex % SWAP_META_PAGES;
+ /* Delete prior contents of metadata. */
+ if (sb->d[modpi] != SWAPBLK_NONE)
+ swp_pager_freeswapspace(sb->d[modpi], 1);
+ /* Enter block into metadata. */
+ sb->d[modpi] = swapblk;
}
/*
@@ -1867,42 +1857,40 @@
* with resident pages.
*/
static void
-swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count)
+swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count)
{
- struct swblock **pswap, *swap;
- vm_pindex_t c;
- daddr_t v;
- int n, sidx;
+ struct swblk *sb;
+ vm_pindex_t last;
+ int i;
+ bool empty;
VM_OBJECT_ASSERT_LOCKED(object);
if (object->type != OBJT_SWAP || count == 0)
return;
- mtx_lock(&swhash_mtx);
- for (c = 0; c < count;) {
- pswap = swp_pager_hash(object, index);
- sidx = index & SWAP_META_MASK;
- n = SWAP_META_PAGES - sidx;
- index += n;
- if ((swap = *pswap) == NULL) {
- c += n;
- continue;
- }
- for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) {
- if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE)
+ last = pindex + count - 1;
+ for (;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL || sb->p > last)
+ break;
+ empty = true;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] == SWAPBLK_NONE)
continue;
- swp_pager_freeswapspace(v, 1);
- swap->swb_pages[sidx] = SWAPBLK_NONE;
- if (--swap->swb_count == 0) {
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
- c += SWAP_META_PAGES - sidx;
- break;
- }
+ if (pindex <= sb->p + i && sb->p + i <= last) {
+ swp_pager_freeswapspace(sb->d[i], 1);
+ sb->d[i] = SWAPBLK_NONE;
+ } else
+ empty = false;
+ }
+ pindex = sb->p + SWAP_META_PAGES;
+ if (empty) {
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+ sb->p);
+ uma_zfree(swblk_zone, sb);
}
}
- mtx_unlock(&swhash_mtx);
}
/*
@@ -1914,36 +1902,27 @@
static void
swp_pager_meta_free_all(vm_object_t object)
{
- struct swblock **pswap, *swap;
- vm_pindex_t index;
- daddr_t v;
+ struct swblk *sb;
+ vm_pindex_t pindex;
int i;
VM_OBJECT_ASSERT_WLOCKED(object);
if (object->type != OBJT_SWAP)
return;
- index = 0;
- while (object->un_pager.swp.swp_bcount != 0) {
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, index);
- if ((swap = *pswap) != NULL) {
- for (i = 0; i < SWAP_META_PAGES; ++i) {
- v = swap->swb_pages[i];
- if (v != SWAPBLK_NONE) {
- --swap->swb_count;
- swp_pager_freeswapspace(v, 1);
- }
- }
- if (swap->swb_count != 0)
- panic(
- "swap_pager_meta_free_all: swb_count != 0");
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
+ pindex = 0;
+ for (;;) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ pindex);
+ if (sb == NULL)
+ break;
+ pindex = sb->p + 1;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ swp_pager_freeswapspace(sb->d[i], 1);
}
- mtx_unlock(&swhash_mtx);
- index += SWAP_META_PAGES;
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
+ uma_zfree(swblk_zone, sb);
}
}
@@ -1958,7 +1937,7 @@
* meta-data swapblks.
*
* It is not possible to store invalid swapblks in the swap meta data
- * (other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
+ * (even a literal 'SWAPBLK_NONE'), so we don't bother checking.
*
* When acting on a busy resident page and paging is in progress, we
* have to wait until paging is complete but otherwise can act on the
@@ -1970,10 +1949,9 @@
static daddr_t
swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags)
{
- struct swblock **pswap;
- struct swblock *swap;
+ struct swblk *sb;
daddr_t r1;
- int idx;
+ int i;
VM_OBJECT_ASSERT_LOCKED(object);
/*
@@ -1983,30 +1961,29 @@
if (object->type != OBJT_SWAP)
return (SWAPBLK_NONE);
- r1 = SWAPBLK_NONE;
- mtx_lock(&swhash_mtx);
- pswap = swp_pager_hash(object, pindex);
-
- if ((swap = *pswap) != NULL) {
- idx = pindex & SWAP_META_MASK;
- r1 = swap->swb_pages[idx];
-
- if (r1 != SWAPBLK_NONE) {
- if (flags & SWM_FREE) {
- swp_pager_freeswapspace(r1, 1);
- r1 = SWAPBLK_NONE;
- }
- if (flags & (SWM_FREE|SWM_POP)) {
- swap->swb_pages[idx] = SWAPBLK_NONE;
- if (--swap->swb_count == 0) {
- *pswap = swap->swb_hnext;
- uma_zfree(swap_zone, swap);
- --object->un_pager.swp.swp_bcount;
- }
- }
+ sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (SWAPBLK_NONE);
+ r1 = sb->d[pindex % SWAP_META_PAGES];
+ if (r1 == SWAPBLK_NONE)
+ return (SWAPBLK_NONE);
+ if ((flags & (SWM_FREE | SWM_POP)) != 0) {
+ sb->d[pindex % SWAP_META_PAGES] = SWAPBLK_NONE;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ break;
+ }
+ if (i == SWAP_META_PAGES) {
+ SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ uma_zfree(swblk_zone, sb);
}
}
- mtx_unlock(&swhash_mtx);
+ if ((flags & SWM_FREE) != 0) {
+ swp_pager_freeswapspace(r1, 1);
+ r1 = SWAPBLK_NONE;
+ }
return (r1);
}
@@ -2020,32 +1997,36 @@
vm_pindex_t
swap_pager_find_least(vm_object_t object, vm_pindex_t pindex)
{
- struct swblock **pswap, *swap;
- vm_pindex_t i, j, lim;
- int idx;
+ struct swblk *sb;
+ int i;
VM_OBJECT_ASSERT_LOCKED(object);
- if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0)
+ if (object->type != OBJT_SWAP)
return (object->size);
- mtx_lock(&swhash_mtx);
- for (j = pindex; j < object->size; j = lim) {
- pswap = swp_pager_hash(object, j);
- lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES);
- if (lim > object->size)
- lim = object->size;
- if ((swap = *pswap) != NULL) {
- for (idx = j & SWAP_META_MASK, i = j; i < lim;
- i++, idx++) {
- if (swap->swb_pages[idx] != SWAPBLK_NONE)
- goto found;
- }
- }
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ rounddown(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (object->size);
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->p + i >= pindex && sb->d[i] != SWAPBLK_NONE)
+ return (sb->p + i);
+ }
+ sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+ roundup(pindex, SWAP_META_PAGES));
+ if (sb == NULL)
+ return (object->size);
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ return (sb->p + i);
}
- i = object->size;
-found:
- mtx_unlock(&swhash_mtx);
- return (i);
+
+ /*
+ * We get here if a swblk is present in the trie but it
+ * doesn't map any blocks.
+ */
+ MPASS(0);
+ return (object->size);
}
/*
@@ -2081,7 +2062,7 @@
* Swap metadata may not fit in the KVM if we have physical
* memory of >1GB.
*/
- if (swap_zone == NULL) {
+ if (swblk_zone == NULL) {
error = ENOMEM;
goto done;
}
@@ -2125,7 +2106,7 @@
unsigned long maxpages;
/* absolute maximum we can handle assuming 100% efficiency */
- maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES;
+ maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
/* recommend using no more than half that amount */
if (npages > maxpages / 2) {
@@ -2466,7 +2447,10 @@
vm_map_t map;
vm_map_entry_t cur;
vm_object_t object;
+ struct swblk *sb;
+ vm_pindex_t bcount, pi;
long count, n;
+ int i;
map = &vmspace->vm_map;
count = 0;
@@ -2474,12 +2458,22 @@
for (cur = map->header.next; cur != &map->header; cur = cur->next) {
if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
(object = cur->object.vm_object) != NULL) {
+ if (object->type != OBJT_SWAP)
+ continue;
VM_OBJECT_WLOCK(object);
- if (object->type == OBJT_SWAP &&
- object->un_pager.swp.swp_bcount != 0) {
+ if (object->type == OBJT_SWAP) {
+ for (bcount = 0, pi = 0;; pi = sb->p + 1) {
+ sb = SWAP_PCTRIE_LOOKUP_GE(
+ &object->un_pager.swp.swp_blks, pi);
+ if (sb == NULL)
+ break;
+ for (i = 0; i < SWAP_META_PAGES; i++) {
+ if (sb->d[i] != SWAPBLK_NONE)
+ bcount++;
+ }
+ }
n = (cur->end - cur->start) / PAGE_SIZE;
- count += object->un_pager.swp.swp_bcount *
- SWAP_META_PAGES * n / object->size + 1;
+ count += bcount * n / object->size + 1;
}
VM_OBJECT_WUNLOCK(object);
}
Index: sys/vm/vm_object.h
===================================================================
--- sys/vm/vm_object.h
+++ sys/vm/vm_object.h
@@ -70,6 +70,7 @@
#include <sys/queue.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
+#include <sys/_pctrie.h>
#include <sys/_rwlock.h>
#include <vm/_vm_radix.h>
@@ -151,13 +152,12 @@
* the handle changed and hash-chain
* invalid.
*
- * swp_bcount - number of swap 'swblock' metablocks, each
- * contains up to 16 swapblk assignments.
- * see vm/swap_pager.h
+ * swp_blks - pc-trie of the allocated swap blocks.
+ *
*/
struct {
void *swp_tmpfs;
- int swp_bcount;
+ struct pctrie swp_blks;
} swp;
} un_pager;
struct ucred *cred;
Index: sys/vm/vm_object.c
===================================================================
--- sys/vm/vm_object.c
+++ sys/vm/vm_object.c
@@ -73,6 +73,7 @@
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/kernel.h>
+#include <sys/pctrie.h>
#include <sys/sysctl.h>
#include <sys/mutex.h>
#include <sys/proc.h> /* for curproc, pageproc */
@@ -208,6 +209,7 @@
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
+ object->flags = OBJ_DEAD;
mtx_lock(&vm_object_list_mtx);
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
@@ -223,6 +225,16 @@
LIST_INIT(&object->shadow_head);
object->type = type;
+ if (type == OBJT_SWAP)
+ pctrie_init(&object->un_pager.swp.swp_blks);
+
+ /*
+ * Ensure that swap_pager_swapoff() iteration over object_list
+ * sees up to date type and pctrie head if it observed
+ * non-dead object.
+ */
+ atomic_thread_fence_rel();
+
switch (type) {
case OBJT_DEAD:
panic("_vm_object_allocate: can't create OBJT_DEAD");

File Metadata

Mime Type
text/plain
Expires
Wed, Mar 12, 5:55 PM (4 h, 36 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
17123766
Default Alt Text
D11435.id30807.diff (24 KB)

Event Timeline