Changeset View
Changeset View
Standalone View
Standalone View
sys/vm/swap_pager.c
Show First 20 Lines • Show All 149 Lines • ▼ Show 20 Lines | |||||
static struct swdevt *swdevhd; /* Allocate from here next */ | static struct swdevt *swdevhd; /* Allocate from here next */ | ||||
static int nswapdev; /* Number of swap devices */ | static int nswapdev; /* Number of swap devices */ | ||||
int swap_pager_avail; | int swap_pager_avail; | ||||
static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ | static struct sx swdev_syscall_lock; /* serialize swap(on|off) */ | ||||
static u_long swap_reserved; | static u_long swap_reserved; | ||||
static u_long swap_total; | static u_long swap_total; | ||||
static int sysctl_page_shift(SYSCTL_HANDLER_ARGS); | static int sysctl_page_shift(SYSCTL_HANDLER_ARGS); | ||||
static SYSCTL_NODE(_vm_stats, OID_AUTO, swap, CTLFLAG_RD, 0, "VM swap stats"); | |||||
SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, | SYSCTL_PROC(_vm, OID_AUTO, swap_reserved, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, | ||||
&swap_reserved, 0, sysctl_page_shift, "A", | &swap_reserved, 0, sysctl_page_shift, "A", | ||||
"Amount of swap storage needed to back all allocated anonymous memory."); | "Amount of swap storage needed to back all allocated anonymous memory."); | ||||
SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, | SYSCTL_PROC(_vm, OID_AUTO, swap_total, CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_MPSAFE, | ||||
&swap_total, 0, sysctl_page_shift, "A", | &swap_total, 0, sysctl_page_shift, "A", | ||||
"Total amount of available swap storage."); | "Total amount of available swap storage."); | ||||
static int overcommit = 0; | static int overcommit = 0; | ||||
SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, | SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0, | ||||
"Configure virtual memory overcommit behavior. See tuning(7) " | "Configure virtual memory overcommit behavior. See tuning(7) " | ||||
"for details."); | "for details."); | ||||
static unsigned long swzone; | static unsigned long swzone; | ||||
SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0, | SYSCTL_ULONG(_vm, OID_AUTO, swzone, CTLFLAG_RD, &swzone, 0, | ||||
"Actual size of swap metadata zone"); | "Actual size of swap metadata zone"); | ||||
static unsigned long swap_maxpages; | static unsigned long swap_maxpages; | ||||
SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, | SYSCTL_ULONG(_vm, OID_AUTO, swap_maxpages, CTLFLAG_RD, &swap_maxpages, 0, | ||||
"Maximum amount of swap supported"); | "Maximum amount of swap supported"); | ||||
static counter_u64_t swap_free_deferred; | |||||
SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_deferred, | |||||
CTLFLAG_RD, &swap_free_deferred, | |||||
"Number of pages that deferred freeing swap space"); | |||||
markj: Could we put these under vm.stats.swap instead, so there is a little more harmony with vm.stats. | |||||
static counter_u64_t swap_free_completed; | |||||
SYSCTL_COUNTER_U64(_vm_stats_swap, OID_AUTO, free_completed, | |||||
CTLFLAG_RD, &swap_free_completed, | |||||
"Number of deferred frees completed"); | |||||
/* bits from overcommit */ | /* bits from overcommit */ | ||||
#define SWAP_RESERVE_FORCE_ON (1 << 0) | #define SWAP_RESERVE_FORCE_ON (1 << 0) | ||||
#define SWAP_RESERVE_RLIMIT_ON (1 << 1) | #define SWAP_RESERVE_RLIMIT_ON (1 << 1) | ||||
#define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) | #define SWAP_RESERVE_ALLOW_NONWIRED (1 << 2) | ||||
static int | static int | ||||
sysctl_page_shift(SYSCTL_HANDLER_ARGS) | sysctl_page_shift(SYSCTL_HANDLER_ARGS) | ||||
{ | { | ||||
▲ Show 20 Lines • Show All 307 Lines • ▼ Show 20 Lines | swp_sizecheck(void) | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* SWAP_PAGER_INIT() - initialize the swap pager! | * SWAP_PAGER_INIT() - initialize the swap pager! | ||||
* | * | ||||
* Expected to be started from system init. NOTE: This code is run | * Expected to be started from system init. NOTE: This code is run | ||||
* before much else so be careful what you depend on. Most of the VM | * before much else so be careful what you depend on. Most of the VM | ||||
* system has yet to be initialized at this point. | * system has yet to be initialized at this point. | ||||
Done Inline Actionsaccidental whitespace. I will remove before commit. jeff: accidental whitespace. I will remove before commit. | |||||
*/ | */ | ||||
static void | static void | ||||
swap_pager_init(void) | swap_pager_init(void) | ||||
{ | { | ||||
/* | /* | ||||
* Initialize object lists | * Initialize object lists | ||||
*/ | */ | ||||
int i; | int i; | ||||
for (i = 0; i < NOBJLISTS; ++i) | for (i = 0; i < NOBJLISTS; ++i) | ||||
TAILQ_INIT(&swap_pager_object_list[i]); | TAILQ_INIT(&swap_pager_object_list[i]); | ||||
mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); | mtx_init(&sw_dev_mtx, "swapdev", NULL, MTX_DEF); | ||||
sx_init(&sw_alloc_sx, "swspsx"); | sx_init(&sw_alloc_sx, "swspsx"); | ||||
sx_init(&swdev_syscall_lock, "swsysc"); | sx_init(&swdev_syscall_lock, "swsysc"); | ||||
} | } | ||||
static void | |||||
swap_pager_counters(void) | |||||
{ | |||||
swap_free_deferred = counter_u64_alloc(M_WAITOK); | |||||
swap_free_completed = counter_u64_alloc(M_WAITOK); | |||||
} | |||||
SYSINIT(swap_counters, SI_SUB_CPU, SI_ORDER_ANY, swap_pager_counters, NULL); | |||||
/* | /* | ||||
* SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process | * SWAP_PAGER_SWAP_INIT() - swap pager initialization from pageout process | ||||
* | * | ||||
* Expected to be started from pageout process once, prior to entering | * Expected to be started from pageout process once, prior to entering | ||||
* its main loop. | * its main loop. | ||||
*/ | */ | ||||
void | void | ||||
swap_pager_swap_init(void) | swap_pager_swap_init(void) | ||||
▲ Show 20 Lines • Show All 583 Lines • ▼ Show 20 Lines | |||||
* | * | ||||
* NOTE!!! If the page is clean and the swap was valid, the caller | * NOTE!!! If the page is clean and the swap was valid, the caller | ||||
* should make the page dirty before calling this routine. This routine | * should make the page dirty before calling this routine. This routine | ||||
* does NOT change the m->dirty status of the page. Also: MADV_FREE | * does NOT change the m->dirty status of the page. Also: MADV_FREE | ||||
* depends on it. | * depends on it. | ||||
* | * | ||||
* This routine may not sleep. | * This routine may not sleep. | ||||
* | * | ||||
* The object containing the page must be locked. | * The object containing the page may be locked. | ||||
*/ | */ | ||||
static void | static void | ||||
swap_pager_unswapped(vm_page_t m) | swap_pager_unswapped(vm_page_t m) | ||||
{ | { | ||||
struct swblk *sb; | struct swblk *sb; | ||||
vm_object_t obj; | |||||
VM_OBJECT_ASSERT_WLOCKED(m->object); | /* | ||||
* Handle enqueing deferred frees first. If we do not have the | |||||
* object lock we wait for the page daemon to clear the space. | |||||
*/ | |||||
obj = m->object; | |||||
markjUnsubmitted Not Done Inline ActionsAssert that either the object is wlocked or the page is xbusied here? I believe that is true among existing callers, or else it is not safe to dereference obj. markj: Assert that either the object is wlocked or the page is xbusied here? I believe that is true… | |||||
jeffAuthorUnsubmitted Done Inline ActionsYes that is true, I can add this assert. jeff: Yes that is true, I can add this assert. | |||||
if (!VM_OBJECT_WOWNED(obj)) { | |||||
/* | |||||
* The caller is responsible for synchronization but we | |||||
Done Inline ActionsI prefer not to trylock here. In my experience if you trylock from a great number of threads you can end up with degenerate behavior. By pushing it into the page daemon we have only a couple of threads that are potentially adding contention to the object lock. jeff: I prefer not to trylock here. In my experience if you trylock from a great number of threads… | |||||
* will harmlessly handle races. This is typically provided | |||||
* by only calling unswapped() when a page transitions from | |||||
* clean to dirty. | |||||
*/ | |||||
if ((m->a.flags & (PGA_SWAP_SPACE | PGA_SWAP_FREE)) == | |||||
PGA_SWAP_SPACE) { | |||||
vm_page_aflag_set(m, PGA_SWAP_FREE); | |||||
counter_u64_add(swap_free_deferred, 1); | |||||
} | |||||
return; | |||||
} | |||||
if ((m->a.flags & PGA_SWAP_FREE) != 0) | |||||
counter_u64_add(swap_free_completed, 1); | |||||
vm_page_aflag_clear(m, PGA_SWAP_FREE | PGA_SWAP_SPACE); | |||||
/* | /* | ||||
* The meta data only exists if the object is OBJT_SWAP | * The meta data only exists if the object is OBJT_SWAP | ||||
* and even then might not be allocated yet. | * and even then might not be allocated yet. | ||||
*/ | */ | ||||
KASSERT(m->object->type == OBJT_SWAP, | KASSERT(m->object->type == OBJT_SWAP, | ||||
("Free object not swappable")); | ("Free object not swappable")); | ||||
▲ Show 20 Lines • Show All 302 Lines • ▼ Show 20 Lines | for (i = 0; i < count; i += n) { | ||||
bp->b_rcred = crhold(thread0.td_ucred); | bp->b_rcred = crhold(thread0.td_ucred); | ||||
bp->b_wcred = crhold(thread0.td_ucred); | bp->b_wcred = crhold(thread0.td_ucred); | ||||
bp->b_bcount = PAGE_SIZE * n; | bp->b_bcount = PAGE_SIZE * n; | ||||
bp->b_bufsize = PAGE_SIZE * n; | bp->b_bufsize = PAGE_SIZE * n; | ||||
bp->b_blkno = blk; | bp->b_blkno = blk; | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
for (j = 0; j < n; ++j) { | for (j = 0; j < n; ++j) { | ||||
mreq = ma[i + j]; | mreq = ma[i + j]; | ||||
Done Inline ActionsThis is a bug. I am running stress2 now. jeff: This is a bug. I am running stress2 now. | |||||
vm_page_aflag_clear(mreq, PGA_SWAP_FREE); | |||||
addr = swp_pager_meta_build(mreq->object, mreq->pindex, | addr = swp_pager_meta_build(mreq->object, mreq->pindex, | ||||
blk + j); | blk + j); | ||||
if (addr != SWAPBLK_NONE) | if (addr != SWAPBLK_NONE) | ||||
swp_pager_update_freerange(&s_free, &n_free, | swp_pager_update_freerange(&s_free, &n_free, | ||||
addr); | addr); | ||||
MPASS(mreq->dirty == VM_PAGE_BITS_ALL); | MPASS(mreq->dirty == VM_PAGE_BITS_ALL); | ||||
mreq->oflags |= VPO_SWAPINPROG; | mreq->oflags |= VPO_SWAPINPROG; | ||||
bp->b_pages[j] = mreq; | bp->b_pages[j] = mreq; | ||||
▲ Show 20 Lines • Show All 108 Lines • ▼ Show 20 Lines | for (i = 0; i < bp->b_npages; ++i) { | ||||
vm_page_t m = bp->b_pages[i]; | vm_page_t m = bp->b_pages[i]; | ||||
m->oflags &= ~VPO_SWAPINPROG; | m->oflags &= ~VPO_SWAPINPROG; | ||||
if (m->oflags & VPO_SWAPSLEEP) { | if (m->oflags & VPO_SWAPSLEEP) { | ||||
m->oflags &= ~VPO_SWAPSLEEP; | m->oflags &= ~VPO_SWAPSLEEP; | ||||
wakeup(&object->handle); | wakeup(&object->handle); | ||||
} | } | ||||
/* We always have space after I/O, successful or not. */ | |||||
vm_page_aflag_set(m, PGA_SWAP_SPACE); | |||||
if (bp->b_ioflags & BIO_ERROR) { | if (bp->b_ioflags & BIO_ERROR) { | ||||
/* | /* | ||||
* If an error occurs I'd love to throw the swapblk | * If an error occurs I'd love to throw the swapblk | ||||
* away without freeing it back to swapspace, so it | * away without freeing it back to swapspace, so it | ||||
* can never be used again. But I can't from an | * can never be used again. But I can't from an | ||||
* interrupt. | * interrupt. | ||||
*/ | */ | ||||
if (bp->b_iocmd == BIO_READ) { | if (bp->b_iocmd == BIO_READ) { | ||||
/* | /* | ||||
* NOTE: for reads, m->dirty will probably | * NOTE: for reads, m->dirty will probably | ||||
* be overridden by the original caller of | * be overridden by the original caller of | ||||
* getpages so don't play cute tricks here. | * getpages so don't play cute tricks here. | ||||
*/ | */ | ||||
vm_page_invalid(m); | vm_page_invalid(m); | ||||
} else { | } else { | ||||
/* | /* | ||||
* If a write error occurs, reactivate page | * If a write error occurs, reactivate page | ||||
* so it doesn't clog the inactive list, | * so it doesn't clog the inactive list, | ||||
* then finish the I/O. | * then finish the I/O. | ||||
*/ | */ | ||||
MPASS(m->dirty == VM_PAGE_BITS_ALL); | MPASS(m->dirty == VM_PAGE_BITS_ALL); | ||||
/* PQ_UNSWAPPABLE? */ | |||||
Done Inline ActionsThe page isn't exactly unswappable but the block is dead. On read both are. The comment above "If an error occurs I'd love to..." may be possible now. jeff: The page isn't exactly unswappable but the block is dead. On read both are. The comment above… | |||||
vm_page_lock(m); | vm_page_lock(m); | ||||
vm_page_activate(m); | vm_page_activate(m); | ||||
vm_page_unlock(m); | vm_page_unlock(m); | ||||
vm_page_sunbusy(m); | vm_page_sunbusy(m); | ||||
} | } | ||||
} else if (bp->b_iocmd == BIO_READ) { | } else if (bp->b_iocmd == BIO_READ) { | ||||
/* | /* | ||||
* NOTE: for reads, m->dirty will probably be | * NOTE: for reads, m->dirty will probably be | ||||
▲ Show 20 Lines • Show All 1,472 Lines • Show Last 20 Lines |
Could we put these under vm.stats.swap instead, so there is a little more harmony with vm.stats.page and vm.stats.object?