Changeset View
Changeset View
Standalone View
Standalone View
sys/vm/vnode_pager.c
Show First 20 Lines • Show All 52 Lines • ▼ Show 20 Lines | |||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_vm.h" | #include "opt_vm.h" | ||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/kernel.h> | |||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/mount.h> | #include <sys/mount.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <sys/limits.h> | #include <sys/limits.h> | ||||
#include <sys/conf.h> | #include <sys/conf.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/sf_buf.h> | #include <sys/sf_buf.h> | ||||
#include <sys/domainset.h> | #include <sys/domainset.h> | ||||
#include <machine/atomic.h> | #include <machine/atomic.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_param.h> | #include <vm/vm_param.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <vm/vm_pager.h> | #include <vm/vm_pager.h> | ||||
#include <vm/vm_map.h> | #include <vm/vm_map.h> | ||||
#include <vm/vnode_pager.h> | #include <vm/vnode_pager.h> | ||||
#include <vm/vm_extern.h> | #include <vm/vm_extern.h> | ||||
#include <vm/uma.h> | |||||
static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, | static int vnode_pager_addr(struct vnode *vp, vm_ooffset_t address, | ||||
daddr_t *rtaddress, int *run); | daddr_t *rtaddress, int *run); | ||||
static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); | static int vnode_pager_input_smlfs(vm_object_t object, vm_page_t m); | ||||
static int vnode_pager_input_old(vm_object_t object, vm_page_t m); | static int vnode_pager_input_old(vm_object_t object, vm_page_t m); | ||||
static void vnode_pager_dealloc(vm_object_t); | static void vnode_pager_dealloc(vm_object_t); | ||||
static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); | static int vnode_pager_getpages(vm_object_t, vm_page_t *, int, int *, int *); | ||||
static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, | static int vnode_pager_getpages_async(vm_object_t, vm_page_t *, int, int *, | ||||
Show All 9 Lines | struct pagerops vnodepagerops = { | ||||
.pgo_alloc = vnode_pager_alloc, | .pgo_alloc = vnode_pager_alloc, | ||||
.pgo_dealloc = vnode_pager_dealloc, | .pgo_dealloc = vnode_pager_dealloc, | ||||
.pgo_getpages = vnode_pager_getpages, | .pgo_getpages = vnode_pager_getpages, | ||||
.pgo_getpages_async = vnode_pager_getpages_async, | .pgo_getpages_async = vnode_pager_getpages_async, | ||||
.pgo_putpages = vnode_pager_putpages, | .pgo_putpages = vnode_pager_putpages, | ||||
.pgo_haspage = vnode_pager_haspage, | .pgo_haspage = vnode_pager_haspage, | ||||
}; | }; | ||||
int vnode_pbuf_freecnt; | |||||
int vnode_async_pbuf_freecnt; | |||||
static struct domainset *vnode_domainset = NULL; | static struct domainset *vnode_domainset = NULL; | ||||
SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW, | SYSCTL_PROC(_debug, OID_AUTO, vnode_domainset, CTLTYPE_STRING | CTLFLAG_RW, | ||||
&vnode_domainset, 0, sysctl_handle_domainset, "A", | &vnode_domainset, 0, sysctl_handle_domainset, "A", | ||||
"Default vnode NUMA policy"); | "Default vnode NUMA policy"); | ||||
static uma_zone_t vnode_pbuf_zone; | |||||
static void | |||||
vnode_pager_init(void *dummy) | |||||
{ | |||||
vnode_pbuf_zone = uma_zsecond_create("vnpbuf", pbuf_ctor, pbuf_dtor, | |||||
pbuf_init, NULL, pbuf_zone); | |||||
uma_zone_set_max(vnode_pbuf_zone, nswbuf * 8); | |||||
#if 0 | |||||
uma_prealloc(vnode_pbuf_zone, nswbuf * 8); | |||||
#endif | |||||
} | |||||
SYSINIT(vnode_pager, SI_SUB_CPU, SI_ORDER_ANY, vnode_pager_init, NULL); | |||||
/* Create the VM system backing object for this vnode */ | /* Create the VM system backing object for this vnode */ | ||||
int | int | ||||
vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) | vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td) | ||||
{ | { | ||||
vm_object_t object; | vm_object_t object; | ||||
vm_ooffset_t size = isize; | vm_ooffset_t size = isize; | ||||
struct vattr va; | struct vattr va; | ||||
▲ Show 20 Lines • Show All 431 Lines • ▼ Show 20 Lines | for (i = 0; i < PAGE_SIZE / bsize; i++) { | ||||
if (address >= object->un_pager.vnp.vnp_size) { | if (address >= object->un_pager.vnp.vnp_size) { | ||||
fileaddr = -1; | fileaddr = -1; | ||||
} else { | } else { | ||||
error = vnode_pager_addr(vp, address, &fileaddr, NULL); | error = vnode_pager_addr(vp, address, &fileaddr, NULL); | ||||
if (error) | if (error) | ||||
break; | break; | ||||
} | } | ||||
if (fileaddr != -1) { | if (fileaddr != -1) { | ||||
bp = getpbuf(&vnode_pbuf_freecnt); | bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK); | ||||
/* build a minimal buffer header */ | /* build a minimal buffer header */ | ||||
bp->b_iocmd = BIO_READ; | bp->b_iocmd = BIO_READ; | ||||
bp->b_iodone = bdone; | bp->b_iodone = bdone; | ||||
KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); | KASSERT(bp->b_rcred == NOCRED, ("leaking read ucred")); | ||||
KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); | KASSERT(bp->b_wcred == NOCRED, ("leaking write ucred")); | ||||
bp->b_rcred = crhold(curthread->td_ucred); | bp->b_rcred = crhold(curthread->td_ucred); | ||||
bp->b_wcred = crhold(curthread->td_ucred); | bp->b_wcred = crhold(curthread->td_ucred); | ||||
Show All 15 Lines | if (fileaddr != -1) { | ||||
if ((bp->b_ioflags & BIO_ERROR) != 0) | if ((bp->b_ioflags & BIO_ERROR) != 0) | ||||
error = EIO; | error = EIO; | ||||
/* | /* | ||||
* free the buffer header back to the swap buffer pool | * free the buffer header back to the swap buffer pool | ||||
*/ | */ | ||||
bp->b_vp = NULL; | bp->b_vp = NULL; | ||||
pbrelbo(bp); | pbrelbo(bp); | ||||
relpbuf(bp, &vnode_pbuf_freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
if (error) | if (error) | ||||
break; | break; | ||||
} else | } else | ||||
bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); | bzero((caddr_t)sf_buf_kva(sf) + i * bsize, bsize); | ||||
KASSERT((m->dirty & bits) == 0, | KASSERT((m->dirty & bits) == 0, | ||||
("vnode_pager_input_smlfs: page %p is dirty", m)); | ("vnode_pager_input_smlfs: page %p is dirty", m)); | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
m->valid |= bits; | m->valid |= bits; | ||||
▲ Show 20 Lines • Show All 145 Lines • ▼ Show 20 Lines | |||||
{ | { | ||||
vm_object_t object; | vm_object_t object; | ||||
struct bufobj *bo; | struct bufobj *bo; | ||||
struct buf *bp; | struct buf *bp; | ||||
off_t foff; | off_t foff; | ||||
#ifdef INVARIANTS | #ifdef INVARIANTS | ||||
off_t blkno0; | off_t blkno0; | ||||
#endif | #endif | ||||
int bsize, pagesperblock, *freecnt; | int bsize, pagesperblock; | ||||
int error, before, after, rbehind, rahead, poff, i; | int error, before, after, rbehind, rahead, poff, i; | ||||
int bytecount, secmask; | int bytecount, secmask; | ||||
KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, | KASSERT(vp->v_type != VCHR && vp->v_type != VBLK, | ||||
("%s does not support devices", __func__)); | ("%s does not support devices", __func__)); | ||||
if (vp->v_iflag & VI_DOOMED) | if (vp->v_iflag & VI_DOOMED) | ||||
return (VM_PAGER_BAD); | return (VM_PAGER_BAD); | ||||
Show All 14 Lines | #endif | ||||
* by zeroing in vm_pager_get_pages(). | * by zeroing in vm_pager_get_pages(). | ||||
*/ | */ | ||||
if (m[count - 1]->valid != 0 && --count == 0) { | if (m[count - 1]->valid != 0 && --count == 0) { | ||||
if (iodone != NULL) | if (iodone != NULL) | ||||
iodone(arg, m, 1, 0); | iodone(arg, m, 1, 0); | ||||
return (VM_PAGER_OK); | return (VM_PAGER_OK); | ||||
} | } | ||||
/* | bp = uma_zalloc(vnode_pbuf_zone, M_WAITOK); | ||||
* Synchronous and asynchronous paging operations use different | |||||
* free pbuf counters. This is done to avoid asynchronous requests | |||||
* to consume all pbufs. | |||||
* Allocate the pbuf at the very beginning of the function, so that | |||||
* if we are low on certain kind of pbufs don't even proceed to BMAP, | |||||
* but sleep. | |||||
*/ | |||||
freecnt = iodone != NULL ? | |||||
&vnode_async_pbuf_freecnt : &vnode_pbuf_freecnt; | |||||
bp = getpbuf(freecnt); | |||||
/* | /* | ||||
* Get the underlying device blocks for the file with VOP_BMAP(). | * Get the underlying device blocks for the file with VOP_BMAP(). | ||||
* If the file system doesn't support VOP_BMAP, use old way of | * If the file system doesn't support VOP_BMAP, use old way of | ||||
* getting pages via VOP_READ. | * getting pages via VOP_READ. | ||||
*/ | */ | ||||
error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before); | error = VOP_BMAP(vp, foff / bsize, &bo, &bp->b_blkno, &after, &before); | ||||
if (error == EOPNOTSUPP) { | if (error == EOPNOTSUPP) { | ||||
relpbuf(bp, freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
VM_CNT_INC(v_vnodein); | VM_CNT_INC(v_vnodein); | ||||
VM_CNT_INC(v_vnodepgsin); | VM_CNT_INC(v_vnodepgsin); | ||||
error = vnode_pager_input_old(object, m[i]); | error = vnode_pager_input_old(object, m[i]); | ||||
if (error) | if (error) | ||||
break; | break; | ||||
} | } | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
return (error); | return (error); | ||||
} else if (error != 0) { | } else if (error != 0) { | ||||
relpbuf(bp, freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
return (VM_PAGER_ERROR); | return (VM_PAGER_ERROR); | ||||
} | } | ||||
/* | /* | ||||
* If the file system supports BMAP, but blocksize is smaller | * If the file system supports BMAP, but blocksize is smaller | ||||
* than a page size, then use special small filesystem code. | * than a page size, then use special small filesystem code. | ||||
*/ | */ | ||||
if (pagesperblock == 0) { | if (pagesperblock == 0) { | ||||
relpbuf(bp, freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
for (i = 0; i < count; i++) { | for (i = 0; i < count; i++) { | ||||
VM_CNT_INC(v_vnodein); | VM_CNT_INC(v_vnodein); | ||||
VM_CNT_INC(v_vnodepgsin); | VM_CNT_INC(v_vnodepgsin); | ||||
error = vnode_pager_input_smlfs(object, m[i]); | error = vnode_pager_input_smlfs(object, m[i]); | ||||
if (error) | if (error) | ||||
break; | break; | ||||
} | } | ||||
return (error); | return (error); | ||||
} | } | ||||
/* | /* | ||||
* A sparse file can be encountered only for a single page request, | * A sparse file can be encountered only for a single page request, | ||||
* which may not be preceded by call to vm_pager_haspage(). | * which may not be preceded by call to vm_pager_haspage(). | ||||
*/ | */ | ||||
if (bp->b_blkno == -1) { | if (bp->b_blkno == -1) { | ||||
KASSERT(count == 1, | KASSERT(count == 1, | ||||
("%s: array[%d] request to a sparse file %p", __func__, | ("%s: array[%d] request to a sparse file %p", __func__, | ||||
count, vp)); | count, vp)); | ||||
relpbuf(bp, freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
pmap_zero_page(m[0]); | pmap_zero_page(m[0]); | ||||
KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", | KASSERT(m[0]->dirty == 0, ("%s: page %p is dirty", | ||||
__func__, m[0])); | __func__, m[0])); | ||||
VM_OBJECT_WLOCK(object); | VM_OBJECT_WLOCK(object); | ||||
m[0]->valid = VM_PAGE_BITS_ALL; | m[0]->valid = VM_PAGE_BITS_ALL; | ||||
VM_OBJECT_WUNLOCK(object); | VM_OBJECT_WUNLOCK(object); | ||||
return (VM_PAGER_OK); | return (VM_PAGER_OK); | ||||
} | } | ||||
▲ Show 20 Lines • Show All 197 Lines • ▼ Show 20 Lines | if (iodone != NULL) { /* async */ | ||||
bp->b_iodone = bdone; | bp->b_iodone = bdone; | ||||
bstrategy(bp); | bstrategy(bp); | ||||
bwait(bp, PVM, "vnread"); | bwait(bp, PVM, "vnread"); | ||||
error = vnode_pager_generic_getpages_done(bp); | error = vnode_pager_generic_getpages_done(bp); | ||||
for (i = 0; i < bp->b_npages; i++) | for (i = 0; i < bp->b_npages; i++) | ||||
bp->b_pages[i] = NULL; | bp->b_pages[i] = NULL; | ||||
bp->b_vp = NULL; | bp->b_vp = NULL; | ||||
pbrelbo(bp); | pbrelbo(bp); | ||||
relpbuf(bp, &vnode_pbuf_freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); | return (error != 0 ? VM_PAGER_ERROR : VM_PAGER_OK); | ||||
} | } | ||||
} | } | ||||
static void | static void | ||||
vnode_pager_generic_getpages_done_async(struct buf *bp) | vnode_pager_generic_getpages_done_async(struct buf *bp) | ||||
{ | { | ||||
int error; | int error; | ||||
error = vnode_pager_generic_getpages_done(bp); | error = vnode_pager_generic_getpages_done(bp); | ||||
/* Run the iodone upon the requested range. */ | /* Run the iodone upon the requested range. */ | ||||
bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore, | bp->b_pgiodone(bp->b_caller1, bp->b_pages + bp->b_pgbefore, | ||||
bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error); | bp->b_npages - bp->b_pgbefore - bp->b_pgafter, error); | ||||
for (int i = 0; i < bp->b_npages; i++) | for (int i = 0; i < bp->b_npages; i++) | ||||
bp->b_pages[i] = NULL; | bp->b_pages[i] = NULL; | ||||
bp->b_vp = NULL; | bp->b_vp = NULL; | ||||
pbrelbo(bp); | pbrelbo(bp); | ||||
relpbuf(bp, &vnode_async_pbuf_freecnt); | uma_zfree(vnode_pbuf_zone, bp); | ||||
} | } | ||||
static int | static int | ||||
vnode_pager_generic_getpages_done(struct buf *bp) | vnode_pager_generic_getpages_done(struct buf *bp) | ||||
{ | { | ||||
vm_object_t object; | vm_object_t object; | ||||
off_t tfoff, nextoff; | off_t tfoff, nextoff; | ||||
int i, error; | int i, error; | ||||
▲ Show 20 Lines • Show All 478 Lines • Show Last 20 Lines |