Changeset View
Changeset View
Standalone View
Standalone View
sys/kern/vfs_cluster.c
Show All 30 Lines | |||||
* SUCH DAMAGE. | * SUCH DAMAGE. | ||||
* | * | ||||
* @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 | * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 | ||||
*/ | */ | ||||
#include <sys/cdefs.h> | #include <sys/cdefs.h> | ||||
__FBSDID("$FreeBSD$"); | __FBSDID("$FreeBSD$"); | ||||
#include "opt_debug_cluster.h" | |||||
#include <sys/param.h> | #include <sys/param.h> | ||||
#include <sys/systm.h> | #include <sys/systm.h> | ||||
#include <sys/kernel.h> | #include <sys/kernel.h> | ||||
#include <sys/proc.h> | #include <sys/proc.h> | ||||
#include <sys/bio.h> | #include <sys/bio.h> | ||||
#include <sys/buf.h> | #include <sys/buf.h> | ||||
#include <sys/vnode.h> | #include <sys/vnode.h> | ||||
#include <sys/malloc.h> | #include <sys/malloc.h> | ||||
#include <sys/mount.h> | #include <sys/mount.h> | ||||
#include <sys/racct.h> | #include <sys/racct.h> | ||||
#include <sys/resourcevar.h> | #include <sys/resourcevar.h> | ||||
#include <sys/rwlock.h> | #include <sys/rwlock.h> | ||||
#include <sys/vmmeter.h> | #include <sys/vmmeter.h> | ||||
#include <vm/vm.h> | #include <vm/vm.h> | ||||
#include <vm/vm_object.h> | #include <vm/vm_object.h> | ||||
#include <vm/vm_page.h> | #include <vm/vm_page.h> | ||||
#include <sys/sysctl.h> | #include <sys/sysctl.h> | ||||
#if defined(CLUSTERDEBUG) | |||||
static int rcluster= 0; | |||||
SYSCTL_INT(_debug, OID_AUTO, rcluster, CTLFLAG_RW, &rcluster, 0, | |||||
"Debug VFS clustering code"); | |||||
#endif | |||||
static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer"); | static MALLOC_DEFINE(M_SEGMENT, "cl_savebuf", "cluster_save buffer"); | ||||
static uma_zone_t cluster_pbuf_zone; | static uma_zone_t cluster_pbuf_zone; | ||||
static void cluster_init(void *); | static void cluster_init(void *); | ||||
static struct cluster_save *cluster_collectbufs(struct vnode *vp, | static struct cluster_save *cluster_collectbufs(struct vnode *vp, | ||||
struct buf *last_bp, int gbflags); | struct vn_clusterw *vnc, struct buf *last_bp, int gbflags); | ||||
static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize, | static struct buf *cluster_rbuild(struct vnode *vp, u_quad_t filesize, | ||||
daddr_t lbn, daddr_t blkno, long size, int run, int gbflags, | daddr_t lbn, daddr_t blkno, long size, int run, int gbflags, | ||||
struct buf *fbp); | struct buf *fbp); | ||||
static void cluster_callback(struct buf *); | static void cluster_callback(struct buf *); | ||||
static int write_behind = 1; | static int write_behind = 1; | ||||
SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0, | SYSCTL_INT(_vfs, OID_AUTO, write_behind, CTLFLAG_RW, &write_behind, 0, | ||||
"Cluster write-behind; 0: disable, 1: enable, 2: backed off"); | "Cluster write-behind; 0: disable, 1: enable, 2: backed off"); | ||||
▲ Show 20 Lines • Show All 567 Lines • ▼ Show 20 Lines | |||||
* Three cases: | * Three cases: | ||||
* 1. Write is not sequential (write asynchronously) | * 1. Write is not sequential (write asynchronously) | ||||
* Write is sequential: | * Write is sequential: | ||||
* 2. beginning of cluster - begin cluster | * 2. beginning of cluster - begin cluster | ||||
* 3. middle of a cluster - add to cluster | * 3. middle of a cluster - add to cluster | ||||
* 4. end of a cluster - asynchronously write cluster | * 4. end of a cluster - asynchronously write cluster | ||||
*/ | */ | ||||
void | void | ||||
cluster_write(struct vnode *vp, struct buf *bp, u_quad_t filesize, int seqcount, | cluster_write(struct vnode *vp, struct vn_clusterw *vnc, struct buf *bp, | ||||
int gbflags) | u_quad_t filesize, int seqcount, int gbflags) | ||||
{ | { | ||||
daddr_t lbn; | daddr_t lbn; | ||||
int maxclen, cursize; | int maxclen, cursize; | ||||
int lblocksize; | int lblocksize; | ||||
int async; | int async; | ||||
if (!unmapped_buf_allowed) | if (!unmapped_buf_allowed) | ||||
gbflags &= ~GB_UNMAPPED; | gbflags &= ~GB_UNMAPPED; | ||||
if (vp->v_type == VREG) { | if (vp->v_type == VREG) { | ||||
async = DOINGASYNC(vp); | async = DOINGASYNC(vp); | ||||
lblocksize = vp->v_mount->mnt_stat.f_iosize; | lblocksize = vp->v_mount->mnt_stat.f_iosize; | ||||
} else { | } else { | ||||
async = 0; | async = 0; | ||||
lblocksize = bp->b_bufsize; | lblocksize = bp->b_bufsize; | ||||
} | } | ||||
lbn = bp->b_lblkno; | lbn = bp->b_lblkno; | ||||
KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset")); | KASSERT(bp->b_offset != NOOFFSET, ("cluster_write: no buffer offset")); | ||||
/* Initialize vnode to beginning of file. */ | /* Initialize vnode to beginning of file. */ | ||||
if (lbn == 0) | if (lbn == 0) | ||||
vp->v_lasta = vp->v_clen = vp->v_cstart = vp->v_lastw = 0; | vnc->v_lasta = vnc->v_clen = vnc->v_cstart = vnc->v_lastw = 0; | ||||
if (vp->v_clen == 0 || lbn != vp->v_lastw + 1 || | if (vnc->v_clen == 0 || lbn != vnc->v_lastw + 1 || | ||||
(bp->b_blkno != vp->v_lasta + btodb(lblocksize))) { | (bp->b_blkno != vnc->v_lasta + btodb(lblocksize))) { | ||||
maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1; | maxclen = vp->v_mount->mnt_iosize_max / lblocksize - 1; | ||||
if (vp->v_clen != 0) { | if (vnc->v_clen != 0) { | ||||
/* | /* | ||||
* Next block is not sequential. | * Next block is not sequential. | ||||
* | * | ||||
* If we are not writing at end of file, the process | * If we are not writing at end of file, the process | ||||
* seeked to another point in the file since its last | * seeked to another point in the file since its last | ||||
* write, or we have reached our maximum cluster size, | * write, or we have reached our maximum cluster size, | ||||
* then push the previous cluster. Otherwise try | * then push the previous cluster. Otherwise try | ||||
* reallocating to make it sequential. | * reallocating to make it sequential. | ||||
* | * | ||||
* Change to algorithm: only push previous cluster if | * Change to algorithm: only push previous cluster if | ||||
* it was sequential from the point of view of the | * it was sequential from the point of view of the | ||||
* seqcount heuristic, otherwise leave the buffer | * seqcount heuristic, otherwise leave the buffer | ||||
* intact so we can potentially optimize the I/O | * intact so we can potentially optimize the I/O | ||||
* later on in the buf_daemon or update daemon | * later on in the buf_daemon or update daemon | ||||
* flush. | * flush. | ||||
*/ | */ | ||||
cursize = vp->v_lastw - vp->v_cstart + 1; | cursize = vnc->v_lastw - vnc->v_cstart + 1; | ||||
if (((u_quad_t) bp->b_offset + lblocksize) != filesize || | if ((u_quad_t)bp->b_offset + lblocksize != filesize || | ||||
lbn != vp->v_lastw + 1 || vp->v_clen <= cursize) { | lbn != vnc->v_lastw + 1 || vnc->v_clen <= cursize) { | ||||
if (!async && seqcount > 0) { | if (!async && seqcount > 0) { | ||||
cluster_wbuild_wb(vp, lblocksize, | cluster_wbuild_wb(vp, lblocksize, | ||||
vp->v_cstart, cursize, gbflags); | vnc->v_cstart, cursize, gbflags); | ||||
} | } | ||||
} else { | } else { | ||||
struct buf **bpp, **endbp; | struct buf **bpp, **endbp; | ||||
struct cluster_save *buflist; | struct cluster_save *buflist; | ||||
buflist = cluster_collectbufs(vp, bp, gbflags); | buflist = cluster_collectbufs(vp, vnc, bp, | ||||
gbflags); | |||||
if (buflist == NULL) { | if (buflist == NULL) { | ||||
/* | /* | ||||
* Cluster build failed so just write | * Cluster build failed so just write | ||||
* it now. | * it now. | ||||
*/ | */ | ||||
bawrite(bp); | bawrite(bp); | ||||
return; | return; | ||||
} | } | ||||
Show All 9 Lines | if (vnc->v_clen != 0) { | ||||
* optimize the write ordering. | * optimize the write ordering. | ||||
*/ | */ | ||||
for (bpp = buflist->bs_children; | for (bpp = buflist->bs_children; | ||||
bpp < endbp; bpp++) | bpp < endbp; bpp++) | ||||
brelse(*bpp); | brelse(*bpp); | ||||
free(buflist, M_SEGMENT); | free(buflist, M_SEGMENT); | ||||
if (seqcount > 1) { | if (seqcount > 1) { | ||||
cluster_wbuild_wb(vp, | cluster_wbuild_wb(vp, | ||||
lblocksize, vp->v_cstart, | lblocksize, vnc->v_cstart, | ||||
cursize, gbflags); | cursize, gbflags); | ||||
} | } | ||||
} else { | } else { | ||||
/* | /* | ||||
* Succeeded, keep building cluster. | * Succeeded, keep building cluster. | ||||
*/ | */ | ||||
for (bpp = buflist->bs_children; | for (bpp = buflist->bs_children; | ||||
bpp <= endbp; bpp++) | bpp <= endbp; bpp++) | ||||
bdwrite(*bpp); | bdwrite(*bpp); | ||||
free(buflist, M_SEGMENT); | free(buflist, M_SEGMENT); | ||||
vp->v_lastw = lbn; | vnc->v_lastw = lbn; | ||||
vp->v_lasta = bp->b_blkno; | vnc->v_lasta = bp->b_blkno; | ||||
return; | return; | ||||
} | } | ||||
} | } | ||||
} | } | ||||
/* | /* | ||||
* Consider beginning a cluster. If at end of file, make | * Consider beginning a cluster. If at end of file, make | ||||
* cluster as large as possible, otherwise find size of | * cluster as large as possible, otherwise find size of | ||||
* existing cluster. | * existing cluster. | ||||
*/ | */ | ||||
if ((vp->v_type == VREG) && | if (vp->v_type == VREG && | ||||
((u_quad_t) bp->b_offset + lblocksize) != filesize && | (u_quad_t) bp->b_offset + lblocksize != filesize && | ||||
(bp->b_blkno == bp->b_lblkno) && | bp->b_blkno == bp->b_lblkno && | ||||
(VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) || | (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, | ||||
bp->b_blkno == -1)) { | NULL) != 0 || bp->b_blkno == -1)) { | ||||
bawrite(bp); | bawrite(bp); | ||||
vp->v_clen = 0; | vnc->v_clen = 0; | ||||
vp->v_lasta = bp->b_blkno; | vnc->v_lasta = bp->b_blkno; | ||||
vp->v_cstart = lbn + 1; | vnc->v_cstart = lbn + 1; | ||||
vp->v_lastw = lbn; | vnc->v_lastw = lbn; | ||||
return; | return; | ||||
} | } | ||||
vp->v_clen = maxclen; | vnc->v_clen = maxclen; | ||||
if (!async && maxclen == 0) { /* I/O not contiguous */ | if (!async && maxclen == 0) { /* I/O not contiguous */ | ||||
vp->v_cstart = lbn + 1; | vnc->v_cstart = lbn + 1; | ||||
bawrite(bp); | bawrite(bp); | ||||
} else { /* Wait for rest of cluster */ | } else { /* Wait for rest of cluster */ | ||||
vp->v_cstart = lbn; | vnc->v_cstart = lbn; | ||||
bdwrite(bp); | bdwrite(bp); | ||||
} | } | ||||
} else if (lbn == vp->v_cstart + vp->v_clen) { | } else if (lbn == vnc->v_cstart + vnc->v_clen) { | ||||
/* | /* | ||||
* At end of cluster, write it out if seqcount tells us we | * At end of cluster, write it out if seqcount tells us we | ||||
* are operating sequentially, otherwise let the buf or | * are operating sequentially, otherwise let the buf or | ||||
* update daemon handle it. | * update daemon handle it. | ||||
*/ | */ | ||||
bdwrite(bp); | bdwrite(bp); | ||||
if (seqcount > 1) { | if (seqcount > 1) { | ||||
cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, | cluster_wbuild_wb(vp, lblocksize, vnc->v_cstart, | ||||
vp->v_clen + 1, gbflags); | vnc->v_clen + 1, gbflags); | ||||
} | } | ||||
vp->v_clen = 0; | vnc->v_clen = 0; | ||||
vp->v_cstart = lbn + 1; | vnc->v_cstart = lbn + 1; | ||||
} else if (vm_page_count_severe()) { | } else if (vm_page_count_severe()) { | ||||
/* | /* | ||||
* We are low on memory, get it going NOW | * We are low on memory, get it going NOW | ||||
*/ | */ | ||||
bawrite(bp); | bawrite(bp); | ||||
} else { | } else { | ||||
/* | /* | ||||
* In the middle of a cluster, so just delay the I/O for now. | * In the middle of a cluster, so just delay the I/O for now. | ||||
*/ | */ | ||||
bdwrite(bp); | bdwrite(bp); | ||||
} | } | ||||
vp->v_lastw = lbn; | vnc->v_lastw = lbn; | ||||
vp->v_lasta = bp->b_blkno; | vnc->v_lasta = bp->b_blkno; | ||||
} | } | ||||
/* | /* | ||||
* This is an awful lot like cluster_rbuild...wish they could be combined. | * This is an awful lot like cluster_rbuild...wish they could be combined. | ||||
* The last lbn argument is the current block on which I/O is being | * The last lbn argument is the current block on which I/O is being | ||||
* performed. Check to see that it doesn't fall in the middle of | * performed. Check to see that it doesn't fall in the middle of | ||||
* the current block (if last_bp == NULL). | * the current block (if last_bp == NULL). | ||||
*/ | */ | ||||
▲ Show 20 Lines • Show All 230 Lines • ▼ Show 20 Lines | cluster_wbuild(struct vnode *vp, long size, daddr_t start_lbn, int len, | ||||
return totalwritten; | return totalwritten; | ||||
} | } | ||||
/* | /* | ||||
* Collect together all the buffers in a cluster. | * Collect together all the buffers in a cluster. | ||||
* Plus add one additional buffer. | * Plus add one additional buffer. | ||||
*/ | */ | ||||
static struct cluster_save * | static struct cluster_save * | ||||
cluster_collectbufs(struct vnode *vp, struct buf *last_bp, int gbflags) | cluster_collectbufs(struct vnode *vp, struct vn_clusterw *vnc, | ||||
struct buf *last_bp, int gbflags) | |||||
{ | { | ||||
struct cluster_save *buflist; | struct cluster_save *buflist; | ||||
struct buf *bp; | struct buf *bp; | ||||
daddr_t lbn; | daddr_t lbn; | ||||
int i, j, len, error; | int i, j, len, error; | ||||
len = vp->v_lastw - vp->v_cstart + 1; | len = vnc->v_lastw - vnc->v_cstart + 1; | ||||
buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), | buflist = malloc(sizeof(struct buf *) * (len + 1) + sizeof(*buflist), | ||||
M_SEGMENT, M_WAITOK); | M_SEGMENT, M_WAITOK); | ||||
buflist->bs_nchildren = 0; | buflist->bs_nchildren = 0; | ||||
buflist->bs_children = (struct buf **) (buflist + 1); | buflist->bs_children = (struct buf **) (buflist + 1); | ||||
for (lbn = vp->v_cstart, i = 0; i < len; lbn++, i++) { | for (lbn = vnc->v_cstart, i = 0; i < len; lbn++, i++) { | ||||
error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED, | error = bread_gb(vp, lbn, last_bp->b_bcount, NOCRED, | ||||
gbflags, &bp); | gbflags, &bp); | ||||
if (error != 0) { | if (error != 0) { | ||||
/* | /* | ||||
* If read fails, release collected buffers | * If read fails, release collected buffers | ||||
* and return failure. | * and return failure. | ||||
*/ | */ | ||||
for (j = 0; j < i; j++) | for (j = 0; j < i; j++) | ||||
brelse(buflist->bs_children[j]); | brelse(buflist->bs_children[j]); | ||||
free(buflist, M_SEGMENT); | free(buflist, M_SEGMENT); | ||||
return (NULL); | return (NULL); | ||||
} | } | ||||
buflist->bs_children[i] = bp; | buflist->bs_children[i] = bp; | ||||
if (bp->b_blkno == bp->b_lblkno) | if (bp->b_blkno == bp->b_lblkno) | ||||
VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, | VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, | ||||
NULL, NULL); | NULL, NULL); | ||||
} | } | ||||
buflist->bs_children[i] = bp = last_bp; | buflist->bs_children[i] = bp = last_bp; | ||||
if (bp->b_blkno == bp->b_lblkno) | if (bp->b_blkno == bp->b_lblkno) | ||||
VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); | VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); | ||||
buflist->bs_nchildren = i + 1; | buflist->bs_nchildren = i + 1; | ||||
return (buflist); | return (buflist); | ||||
} | |||||
void | |||||
cluster_init_vn(struct vn_clusterw *vnc) | |||||
{ | |||||
vnc->v_lasta = 0; | |||||
vnc->v_clen = 0; | |||||
vnc->v_cstart = 0; | |||||
vnc->v_lastw = 0; | |||||
} | } |