Changeset View
Changeset View
Standalone View
Standalone View
sys/vm/vm_pager.c
Show First 20 Lines • Show All 79 Lines • ▼ Show 20 Lines | |||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
#include <vm/vm_kern.h> | #include <vm/vm_kern.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <vm/vm_pager.h> | #include <vm/vm_pager.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/uma.h> | |||||
#include "opt_swap.h" | |||||
int cluster_pbuf_freecnt = -1; /* unlimited to begin with */ | uma_zone_t pbuf_zone; | ||||
struct buf *swbuf; | |||||
static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); | static int dead_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); | ||||
static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, | static vm_object_t dead_pager_alloc(void *, vm_ooffset_t, vm_prot_t, | ||||
vm_ooffset_t, struct ucred *); | vm_ooffset_t, struct ucred *); | ||||
static void dead_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); | static void dead_pager_putpages(vm_object_t, vm_page_t *, int, int, int *); | ||||
static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); | static boolean_t dead_pager_haspage(vm_object_t, vm_pindex_t, int *, int *); | ||||
static void dead_pager_dealloc(vm_object_t); | static void dead_pager_dealloc(vm_object_t); | ||||
static int | static int | ||||
▲ Show 20 Lines • Show All 61 Lines • ▼ Show 20 Lines | |||||
/* | /* | ||||
* Kernel address space for mapping pages. | * Kernel address space for mapping pages. | ||||
* Used by pagers where KVAs are needed for IO. | * Used by pagers where KVAs are needed for IO. | ||||
* | * | ||||
* XXX needs to be large enough to support the number of pending async | * XXX needs to be large enough to support the number of pending async | ||||
* cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size | * cleaning requests (NPENDINGIO == 64) * the maximum swap cluster size | ||||
* (MAXPHYS == 64k) if you want to get the most efficiency. | * (MAXPHYS == 64k) if you want to get the most efficiency. | ||||
*/ | */ | ||||
struct mtx_padalign __exclusive_cache_line pbuf_mtx; | |||||
static TAILQ_HEAD(swqueue, buf) bswlist; | |||||
static int bswneeded; | |||||
vm_offset_t swapbkva; /* swap buffers kva */ | vm_offset_t swapbkva; /* swap buffers kva */ | ||||
void | void | ||||
vm_pager_init(void) | vm_pager_init(void) | ||||
{ | { | ||||
struct pagerops **pgops; | struct pagerops **pgops; | ||||
TAILQ_INIT(&bswlist); | |||||
/* | /* | ||||
* Initialize known pagers | * Initialize known pagers | ||||
*/ | */ | ||||
for (pgops = pagertab; pgops < &pagertab[nitems(pagertab)]; pgops++) | for (pgops = pagertab; pgops < &pagertab[nitems(pagertab)]; pgops++) | ||||
if ((*pgops)->pgo_init != NULL) | if ((*pgops)->pgo_init != NULL) | ||||
(*(*pgops)->pgo_init)(); | (*(*pgops)->pgo_init)(); | ||||
} | } | ||||
void | void | ||||
vm_pager_bufferinit(void) | vm_pager_bufferinit(void) | ||||
{ | { | ||||
struct buf *bp; | |||||
int i; | |||||
mtx_init(&pbuf_mtx, "pbuf mutex", NULL, MTX_DEF); | |||||
bp = swbuf; | |||||
/* | /* | ||||
* Now set up swap and physical I/O buffer headers. | * swbufs are used as temporary holders for I/O, such as paging I/O. | ||||
* We have no less then 16 and no more then 256. | |||||
*/ | */ | ||||
for (i = 0; i < nswbuf; i++, bp++) { | #ifndef NSWBUF_MIN | ||||
TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); | #define NSWBUF_MIN 16 | ||||
BUF_LOCKINIT(bp); | #endif | ||||
LIST_INIT(&bp->b_dep); | nswbuf = min(nbuf / 4, 256); | ||||
bp->b_rcred = bp->b_wcred = NOCRED; | TUNABLE_INT_FETCH("kern.nswbuf", &nswbuf); | ||||
bp->b_xflags = 0; | if (nswbuf < NSWBUF_MIN) | ||||
} | nswbuf = NSWBUF_MIN; | ||||
cluster_pbuf_freecnt = nswbuf / 2; | /* Main zone for paging bufs. */ | ||||
vnode_pbuf_freecnt = nswbuf / 2 + 1; | pbuf_zone = uma_zcreate("pbuf", sizeof(struct buf), | ||||
vnode_async_pbuf_freecnt = nswbuf / 2; | pbuf_ctor, pbuf_dtor, pbuf_init, NULL, UMA_ALIGN_CACHE, | ||||
UMA_ZONE_VM | UMA_ZONE_NOFREE); | |||||
uma_zone_set_max(pbuf_zone, nswbuf); | |||||
} | } | ||||
/* | /* | ||||
* Allocate an instance of a pager of the given type. | * Allocate an instance of a pager of the given type. | ||||
* Size, protection and offset parameters are passed in for pagers that | * Size, protection and offset parameters are passed in for pagers that | ||||
* need to perform page-level validation (e.g. the device pager). | * need to perform page-level validation (e.g. the device pager). | ||||
*/ | */ | ||||
vm_object_t | vm_object_t | ||||
▲ Show 20 Lines • Show All 123 Lines • ▼ Show 20 Lines | if (object->handle == handle) { | ||||
break; | break; | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
} | } | ||||
} | } | ||||
return (object); | return (object); | ||||
} | } | ||||
/* | int | ||||
* initialize a physical buffer | pbuf_ctor(void *mem, int size, void *arg, int flags) | ||||
*/ | |||||
/* | |||||
* XXX This probably belongs in vfs_bio.c | |||||
*/ | |||||
static void | |||||
initpbuf(struct buf *bp) | |||||
{ | { | ||||
struct buf *bp = mem; | |||||
KASSERT(bp->b_bufobj == NULL, ("initpbuf with bufobj")); | bp->b_vp = NULL; | ||||
KASSERT(bp->b_vp == NULL, ("initpbuf with vp")); | bp->b_bufobj = NULL; | ||||
/* copied from initpbuf() */ | |||||
bp->b_rcred = NOCRED; | bp->b_rcred = NOCRED; | ||||
bp->b_wcred = NOCRED; | bp->b_wcred = NOCRED; | ||||
bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */ | bp->b_qindex = 0; /* On no queue (QUEUE_NONE) */ | ||||
bp->b_kvabase = (caddr_t)(MAXPHYS * (bp - swbuf)) + swapbkva; | |||||
bp->b_data = bp->b_kvabase; | bp->b_data = bp->b_kvabase; | ||||
bp->b_kvasize = MAXPHYS; | |||||
bp->b_flags = 0; | |||||
bp->b_xflags = 0; | bp->b_xflags = 0; | ||||
bp->b_flags = 0; | |||||
bp->b_ioflags = 0; | bp->b_ioflags = 0; | ||||
bp->b_iodone = NULL; | bp->b_iodone = NULL; | ||||
bp->b_error = 0; | bp->b_error = 0; | ||||
BUF_LOCK(bp, LK_EXCLUSIVE, NULL); | BUF_LOCK(bp, LK_EXCLUSIVE, NULL); | ||||
buf_track(bp, __func__); | |||||
} | |||||
/* | return (0); | ||||
* allocate a physical buffer | |||||
* | |||||
* There are a limited number (nswbuf) of physical buffers. We need | |||||
* to make sure that no single subsystem is able to hog all of them, | |||||
* so each subsystem implements a counter which is typically initialized | |||||
* to 1/2 nswbuf. getpbuf() decrements this counter in allocation and | |||||
* increments it on release, and blocks if the counter hits zero. A | |||||
* subsystem may initialize the counter to -1 to disable the feature, | |||||
* but it must still be sure to match up all uses of getpbuf() with | |||||
* relpbuf() using the same variable. | |||||
* | |||||
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed | |||||
* relatively soon when the rest of the subsystems get smart about it. XXX | |||||
*/ | |||||
struct buf * | |||||
getpbuf(int *pfreecnt) | |||||
{ | |||||
struct buf *bp; | |||||
mtx_lock(&pbuf_mtx); | |||||
for (;;) { | |||||
if (pfreecnt != NULL) { | |||||
while (*pfreecnt == 0) { | |||||
msleep(pfreecnt, &pbuf_mtx, PVM, "wswbuf0", 0); | |||||
} | } | ||||
} | |||||
/* get a bp from the swap buffer header pool */ | |||||
if ((bp = TAILQ_FIRST(&bswlist)) != NULL) | |||||
break; | |||||
bswneeded = 1; | |||||
msleep(&bswneeded, &pbuf_mtx, PVM, "wswbuf1", 0); | |||||
/* loop in case someone else grabbed one */ | |||||
} | |||||
TAILQ_REMOVE(&bswlist, bp, b_freelist); | |||||
if (pfreecnt) | |||||
--*pfreecnt; | |||||
mtx_unlock(&pbuf_mtx); | |||||
initpbuf(bp); | |||||
return (bp); | |||||
} | |||||
/* | |||||
* allocate a physical buffer, if one is available. | |||||
* | |||||
* Note that there is no NULL hack here - all subsystems using this | |||||
* call understand how to use pfreecnt. | |||||
*/ | |||||
struct buf * | |||||
trypbuf(int *pfreecnt) | |||||
{ | |||||
struct buf *bp; | |||||
mtx_lock(&pbuf_mtx); | |||||
if (*pfreecnt == 0 || (bp = TAILQ_FIRST(&bswlist)) == NULL) { | |||||
mtx_unlock(&pbuf_mtx); | |||||
return NULL; | |||||
} | |||||
TAILQ_REMOVE(&bswlist, bp, b_freelist); | |||||
--*pfreecnt; | |||||
mtx_unlock(&pbuf_mtx); | |||||
initpbuf(bp); | |||||
return (bp); | |||||
} | |||||
/* | |||||
* release a physical buffer | |||||
* | |||||
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed | |||||
* relatively soon when the rest of the subsystems get smart about it. XXX | |||||
*/ | |||||
void | void | ||||
relpbuf(struct buf *bp, int *pfreecnt) | pbuf_dtor(void *mem, int size, void *arg) | ||||
{ | { | ||||
struct buf *bp = mem; | |||||
if (bp->b_rcred != NOCRED) { | if (bp->b_rcred != NOCRED) { | ||||
crfree(bp->b_rcred); | crfree(bp->b_rcred); | ||||
bp->b_rcred = NOCRED; | bp->b_rcred = NOCRED; | ||||
} | } | ||||
if (bp->b_wcred != NOCRED) { | if (bp->b_wcred != NOCRED) { | ||||
crfree(bp->b_wcred); | crfree(bp->b_wcred); | ||||
bp->b_wcred = NOCRED; | bp->b_wcred = NOCRED; | ||||
} | } | ||||
KASSERT(bp->b_vp == NULL, ("relpbuf with vp")); | |||||
KASSERT(bp->b_bufobj == NULL, ("relpbuf with bufobj")); | |||||
buf_track(bp, __func__); | |||||
BUF_UNLOCK(bp); | BUF_UNLOCK(bp); | ||||
} | |||||
mtx_lock(&pbuf_mtx); | int | ||||
TAILQ_INSERT_HEAD(&bswlist, bp, b_freelist); | pbuf_init(void *mem, int size, int flags) | ||||
{ | |||||
struct buf *bp = mem; | |||||
if (bswneeded) { | bp->b_kvabase = (void *)kva_alloc(MAXPHYS); | ||||
bswneeded = 0; | if (bp->b_kvabase == NULL) | ||||
wakeup(&bswneeded); | return (ENOMEM); | ||||
} | bp->b_kvasize = MAXPHYS; | ||||
if (pfreecnt) { | BUF_LOCKINIT(bp); | ||||
if (++*pfreecnt == 1) | LIST_INIT(&bp->b_dep); | ||||
wakeup(pfreecnt); | bp->b_rcred = bp->b_wcred = NOCRED; | ||||
} | bp->b_xflags = 0; | ||||
mtx_unlock(&pbuf_mtx); | |||||
return (0); | |||||
} | } | ||||
/* | /* | ||||
* Associate a p-buffer with a vnode. | * Associate a p-buffer with a vnode. | ||||
* | * | ||||
* Also sets B_PAGING flag to indicate that vnode is not fully associated | * Also sets B_PAGING flag to indicate that vnode is not fully associated | ||||
* with the buffer. i.e. the bp has not been linked into the vnode or | * with the buffer. i.e. the bp has not been linked into the vnode or | ||||
* ref-counted. | * ref-counted. | ||||
▲ Show 20 Lines • Show All 63 Lines • Show Last 20 Lines |