Index: lib/libstand/cd9660.c =================================================================== --- lib/libstand/cd9660.c +++ lib/libstand/cd9660.c @@ -143,7 +143,7 @@ if (bcmp(sh->type, SUSP_CONTINUATION, 2) == 0) { shc = (ISO_RRIP_CONT *)sh; error = f->f_dev->dv_strategy(f->f_devdata, F_READ, - cdb2devb(isonum_733(shc->location)), + cdb2devb(isonum_733(shc->location)), 0, ISO_DEFAULT_BLOCK_SIZE, susp_buffer, &read); /* Bail if it fails. */ @@ -288,7 +288,7 @@ for (bno = 16;; bno++) { twiddle(1); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno), - ISO_DEFAULT_BLOCK_SIZE, buf, &read); + 0, ISO_DEFAULT_BLOCK_SIZE, buf, &read); if (rc) goto out; if (read != ISO_DEFAULT_BLOCK_SIZE) { @@ -322,7 +322,7 @@ twiddle(1); rc = f->f_dev->dv_strategy (f->f_devdata, F_READ, - cdb2devb(bno + boff), + cdb2devb(bno + boff), 0, ISO_DEFAULT_BLOCK_SIZE, buf, &read); if (rc) @@ -381,7 +381,7 @@ bno = isonum_733(rec.extent) + isonum_711(rec.ext_attr_length); twiddle(1); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno), - ISO_DEFAULT_BLOCK_SIZE, buf, &read); + 0, ISO_DEFAULT_BLOCK_SIZE, buf, &read); if (rc) goto out; if (read != ISO_DEFAULT_BLOCK_SIZE) { @@ -438,7 +438,8 @@ twiddle(16); rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, - cdb2devb(blkno), ISO_DEFAULT_BLOCK_SIZE, fp->f_buf, &read); + cdb2devb(blkno), 0, ISO_DEFAULT_BLOCK_SIZE, + fp->f_buf, &read); if (rc) return (rc); if (read != ISO_DEFAULT_BLOCK_SIZE) Index: lib/libstand/dosfs.c =================================================================== --- lib/libstand/dosfs.c +++ lib/libstand/dosfs.c @@ -788,7 +788,7 @@ int err; twiddle(1); - if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, + if ((err = (fd->f_dev->dv_strategy)(fd->f_devdata, F_READ, lsec, 0, secbyt(nsec), buf, NULL))) return(err); return(0); Index: lib/libstand/ext2fs.c =================================================================== --- lib/libstand/ext2fs.c +++ lib/libstand/ext2fs.c @@ -355,7 +355,7 @@ fp->f_fs = fs; twiddle(1); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - EXT2_SBLOCK, EXT2_SBSIZE, (char *)fs, &buf_size); + EXT2_SBLOCK, 0, EXT2_SBSIZE, (char *)fs, &buf_size); if (error) goto out; @@ -397,7 +397,7 @@ fp->f_bg = malloc(len); twiddle(1); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, len, + EXT2_SBLOCK + EXT2_SBSIZE / DEV_BSIZE, 0, len, (char *)fp->f_bg, &buf_size); if (error) goto out; @@ -509,7 +509,7 @@ twiddle(1); error = (f->f_dev->dv_strategy)(f->f_devdata, - F_READ, fsb_to_db(fs, disk_block), + F_READ, fsb_to_db(fs, disk_block), 0, fs->fs_bsize, buf, &buf_size); if (error) goto out; @@ -570,7 +570,7 @@ buf = malloc(fs->fs_bsize); twiddle(1); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - ino_to_db(fs, fp->f_bg, inumber), fs->fs_bsize, buf, &rsize); + ino_to_db(fs, fp->f_bg, inumber), 0, fs->fs_bsize, buf, &rsize); if (error) goto out; if (rsize != fs->fs_bsize) { @@ -667,7 +667,7 @@ malloc(fs->fs_bsize); twiddle(1); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsb_to_db(fp->f_fs, ind_block_num), fs->fs_bsize, + fsb_to_db(fp->f_fs, ind_block_num), 0, fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); if (error) return (error); @@ -725,7 +725,7 @@ } else { twiddle(4); error = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsb_to_db(fs, disk_block), block_size, + fsb_to_db(fs, disk_block), 0, block_size, fp->f_buf, &fp->f_buf_size); if (error) goto done; Index: lib/libstand/read.c =================================================================== --- lib/libstand/read.c +++ lib/libstand/read.c @@ -79,7 +79,7 @@ if (f->f_flags & F_RAW) { twiddle(4); errno = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - btodb(f->f_offset), bcount, dest, &resid); + btodb(f->f_offset), 0, bcount, dest, &resid); if (errno) return (-1); f->f_offset += resid; Index: lib/libstand/stand.h =================================================================== --- lib/libstand/stand.h +++ lib/libstand/stand.h @@ -138,8 +138,8 @@ const char dv_name[8]; int dv_type; /* opaque type constant, arch-dependant */ int (*dv_init)(void); /* early probe call */ - int (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize); + int (*dv_strategy)(void *devdata, int rw, daddr_t blk, + size_t offset, size_t size, char *buf, size_t *rsize); int (*dv_open)(struct open_file *f, ...); int (*dv_close)(struct open_file *f); int (*dv_ioctl)(struct open_file *f, u_long cmd, void *data); Index: lib/libstand/ufs.c =================================================================== --- lib/libstand/ufs.c +++ lib/libstand/ufs.c @@ -157,7 +157,7 @@ buf = malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsbtodb(fs, ino_to_fsba(fs, inumber)), fs->fs_bsize, + fsbtodb(fs, ino_to_fsba(fs, inumber)), 0, fs->fs_bsize, buf, &rsize); if (rc) goto out; @@ -267,7 +267,7 @@ malloc(fs->fs_bsize); twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsbtodb(fp->f_fs, ind_block_num), + fsbtodb(fp->f_fs, ind_block_num), 0, fs->fs_bsize, fp->f_blk[level], &fp->f_blksize[level]); @@ -348,7 +348,7 @@ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsbtodb(fs, disk_block), + fsbtodb(fs, disk_block), 0, block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); @@ -367,7 +367,7 @@ twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, - fsbtodb(fs, disk_block), + fsbtodb(fs, disk_block), 0, block_size, fp->f_buf, &fp->f_buf_size); return (rc); } @@ -408,7 +408,7 @@ } else { twiddle(4); rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - fsbtodb(fs, disk_block), + fsbtodb(fs, disk_block), 0, block_size, fp->f_buf, &fp->f_buf_size); if (rc) return (rc); @@ -521,7 +521,7 @@ */ for (i = 0; sblock_try[i] != -1; i++) { rc = (f->f_dev->dv_strategy)(f->f_devdata, F_READ, - sblock_try[i] / DEV_BSIZE, SBLOCKSIZE, + sblock_try[i] / DEV_BSIZE, 0, SBLOCKSIZE, (char *)fs, &buf_size); if (rc) goto out; @@ -651,7 +651,7 @@ twiddle(1); rc = (f->f_dev->dv_strategy)(f->f_devdata, - F_READ, fsbtodb(fs, disk_block), + F_READ, fsbtodb(fs, disk_block), 0, fs->fs_bsize, buf, &buf_size); if (rc) goto out; Index: lib/libstand/write.c =================================================================== --- lib/libstand/write.c +++ lib/libstand/write.c @@ -82,7 +82,7 @@ if (f->f_flags & F_RAW) { twiddle(4); errno = (f->f_dev->dv_strategy)(f->f_devdata, F_WRITE, - btodb(f->f_offset), bcount, dest, &resid); + btodb(f->f_offset), 0, bcount, dest, &resid); if (errno) return (-1); f->f_offset += resid; Index: sys/boot/common/bcache.c =================================================================== --- sys/boot/common/bcache.c +++ sys/boot/common/bcache.c @@ -25,10 +25,11 @@ */ #include +#include __FBSDID("$FreeBSD$"); /* - * Simple LRU block cache + * Simple hashed block cache */ #include @@ -35,7 +36,6 @@ #include #include -#include #include "bootstrap.h" @@ -42,34 +42,50 @@ /* #define BCACHE_DEBUG */ #ifdef BCACHE_DEBUG -#define BCACHE_TIMEOUT 10 # define DEBUG(fmt, args...) printf("%s: " fmt "\n" , __func__ , ## args) #else -#define BCACHE_TIMEOUT 2 # define DEBUG(fmt, args...) #endif - struct bcachectl { daddr_t bc_blkno; - time_t bc_stamp; int bc_count; }; -static struct bcachectl *bcache_ctl; -static caddr_t bcache_data; -static bitstr_t *bcache_miss; -static u_int bcache_nblks; -static u_int bcache_blksize; -static u_int bcache_hits, bcache_misses, bcache_ops, bcache_bypasses; -static u_int bcache_flushes; -static u_int bcache_bcount; +/* + * bcache per device node. cache is allocated on device first open and freed + * on last close, to save memory. The issue there is the size; biosdisk + * supports up to 31 (0x1f) devices. Classic setup would use single disk + * to boot from, but this has changed with zfs. + */ +struct bcache { + struct bcachectl *bcache_ctl; + caddr_t bcache_data; + u_int bcache_nblks; +}; -static void bcache_invalidate(daddr_t blkno); -static void bcache_insert(caddr_t buf, daddr_t blkno); -static int bcache_lookup(caddr_t buf, daddr_t blkno); +static u_int bcache_total_nblks; /* set by bcache_init */ +static u_int bcache_blksize; /* set by bcache_init */ +static u_int bcache_numdev; /* set by bcache_add_dev */ +/* statistics */ +static u_int bcache_units; /* number of devices with cache */ +static u_int bcache_unit_nblks; /* nblocks per unit */ +static u_int bcache_hits; +static u_int bcache_misses; +static u_int bcache_ops; +static u_int bcache_bypasses; +static u_int bcache_bcount; +static u_int bcache_rablks; +#define BHASH(bc, blkno) (((u_int)(blkno)) & ((bc)->bcache_nblks - 1)) +#define BCACHE_LOOKUP(bc, blkno) \ + ((bc)->bcache_ctl[BHASH((bc), (blkno))].bc_blkno != (blkno)) +#define BCACHE_READAHEAD 256 + +static void bcache_invalidate(struct bcache *bc, daddr_t blkno); +static void bcache_insert(struct bcache *bc, daddr_t blkno); + /* * Initialise the cache for (nblks) of (bsize). */ @@ -76,59 +92,104 @@ int bcache_init(u_int nblks, size_t bsize) { - /* discard any old contents */ - if (bcache_data != NULL) { - free(bcache_data); - bcache_data = NULL; - free(bcache_ctl); - } - - /* Allocate control structures */ - bcache_nblks = nblks; + /* set up control data */ + bcache_total_nblks = nblks; bcache_blksize = bsize; - bcache_data = malloc(bcache_nblks * bcache_blksize); - bcache_ctl = (struct bcachectl *)malloc(bcache_nblks * sizeof(struct bcachectl)); - bcache_miss = bit_alloc((bcache_nblks + 1) / 2); - if ((bcache_data == NULL) || (bcache_ctl == NULL) || (bcache_miss == NULL)) { - if (bcache_miss) - free(bcache_miss); - if (bcache_ctl) - free(bcache_ctl); - if (bcache_data) - free(bcache_data); - bcache_data = NULL; - return(ENOMEM); - } return(0); } /* - * Flush the cache + * add number of devices to bcache. we have to divide cache space + * between the devices, so bcache_add_dev() can be used to set up the + * number. The issue is, we need to get the number before actual allocations. + * bcache_add_dev() is supposed to be called from device init() call, so the + * assumption is, devsw dv_init is called for plain devices first, and + * for zfs, last. */ void -bcache_flush(void) +bcache_add_dev(int devices) { - u_int i; + bcache_numdev += devices; +} - bcache_flushes++; +void * +bcache_allocate(void) +{ + int i, n; + struct bcache *bc = malloc(sizeof (struct bcache)); + int disks = bcache_numdev; + if (disks == 0) + disks = 1; /* safe guard */ + if (bc == NULL) { + errno = ENOMEM; + return (bc); + } + i = 0; + n = disks; + while (n != 1) { + n >>= 1; + i++; + } + if (disks > (1 << i)) + i++; + bc->bcache_nblks = bcache_total_nblks >> i; + bcache_unit_nblks = bc->bcache_nblks; + bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); + if (bc->bcache_data == NULL) { + /* dont error out yet. fall back to 32 blocks and try again */ + bc->bcache_nblks = 32; + bc->bcache_data = malloc(bc->bcache_nblks * bcache_blksize); + } + + bc->bcache_ctl = malloc(bc->bcache_nblks * sizeof(struct bcachectl)); + + if ((bc->bcache_data == NULL) || (bc->bcache_ctl == NULL)) { + if (bc->bcache_ctl) + free(bc->bcache_ctl); + if (bc->bcache_data) + free(bc->bcache_data); + free(bc); + errno = ENOMEM; + return(NULL); + } + /* Flush the cache */ - for (i = 0; i < bcache_nblks; i++) { - bcache_ctl[i].bc_count = -1; - bcache_ctl[i].bc_blkno = -1; + for (i = 0; i < bc->bcache_nblks; i++) { + bc->bcache_ctl[i].bc_count = -1; + bc->bcache_ctl[i].bc_blkno = -1; } + bcache_units++; + return (bc); } +void +bcache_free(void *cache) +{ + struct bcache *bc = cache; + + if (bc == NULL) + return; + + if (bc->bcache_ctl) + free(bc->bcache_ctl); + if (bc->bcache_data) + free(bc->bcache_data); + free(bc); + bcache_units--; +} + /* * Handle a write request; write directly to the disk, and populate the * cache with the new values. */ static int -write_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) +write_strategy(void *devdata, int rw, daddr_t blk, size_t offset, + size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; + struct bcache *bc = dd->dv_cache; daddr_t i, nblk; int err; @@ -136,12 +197,13 @@ /* Invalidate the blocks being written */ for (i = 0; i < nblk; i++) { - bcache_invalidate(blk + i); + bcache_invalidate(bc, blk + i); } /* Write the blocks */ - err = dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize); + err = dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, rsize); +#if 0 /* Populate the block cache with the new data */ if (err == 0) { for (i = 0; i < nblk; i++) { @@ -148,6 +210,7 @@ bcache_insert(buf + (i * bcache_blksize),blk + i); } } +#endif return err; } @@ -158,61 +221,72 @@ * device I/O and then use the I/O results to populate the cache. */ static int -read_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) +read_strategy(void *devdata, int rw, daddr_t blk, size_t offset, + size_t size, char *buf, size_t *rsize) { struct bcache_devdata *dd = (struct bcache_devdata *)devdata; - int p_size, result; - daddr_t p_blk, i, j, nblk; + struct bcache *bc = dd->dv_cache; + int p_size, r_size, result, complete, ra; + int i, j, nblk; + daddr_t p_blk; caddr_t p_buf; + if (bc == NULL) { + errno = ENODEV; + return (-1); + } + nblk = size / bcache_blksize; + if ((nblk == 0 && size != 0) || offset != 0) + nblk++; result = 0; + complete = 1; - /* Satisfy any cache hits up front */ + /* Satisfy any cache hits up front, break on first miss */ for (i = 0; i < nblk; i++) { - if (bcache_lookup(buf + (bcache_blksize * i), blk + i)) { - bit_set(bcache_miss, i); /* cache miss */ - bcache_misses++; + if (BCACHE_LOOKUP(bc, blk + i)) { + bcache_misses += (nblk - i); + complete = 0; + break; } else { - bit_clear(bcache_miss, i); /* cache hit */ bcache_hits++; } } - /* Go back and fill in any misses XXX optimise */ - p_blk = -1; - p_buf = NULL; - p_size = 0; - for (i = 0; i < nblk; i++) { - if (bit_test(bcache_miss, i)) { - /* miss, add to pending transfer */ - if (p_blk == -1) { - p_blk = blk + i; - p_buf = buf + (bcache_blksize * i); - p_size = 1; - } else { - p_size++; - } - } else if (p_blk != -1) { - /* hit, complete pending transfer */ - result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL); - if (result != 0) - goto done; - for (j = 0; j < p_size; j++) - bcache_insert(p_buf + (j * bcache_blksize), p_blk + j); - p_blk = -1; - } + if (complete) { /* whole set was in cache, return it */ + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, + buf, size); + goto done; + } + + /* + * Fill in any misses. From check we have i pointing to first missing + * block, read in all remaining blocks + readahead. + * We have space at least for nblk - i before bcache wraps. + */ + p_blk = blk + i; + p_buf = bc->bcache_data + (bcache_blksize * BHASH(bc, p_blk)); + r_size = bc->bcache_nblks - BHASH(bc, p_blk); /* remaining blocks */ + + p_size = MIN(r_size, nblk - i); /* read at least those blocks */ + + ra = bc->bcache_nblks - (unsigned)BHASH(bc, p_blk + p_size); + if (ra != bc->bcache_nblks) { /* do we have RA space? */ + ra = MIN(BCACHE_READAHEAD, ra); + p_size += ra; } - if (p_blk != -1) { - /* pending transfer left */ - result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, p_size * bcache_blksize, p_buf, NULL); - if (result != 0) - goto done; - for (j = 0; j < p_size; j++) - bcache_insert(p_buf + (j * bcache_blksize), p_blk + j); - } - + + result = dd->dv_strategy(dd->dv_devdata, rw, p_blk, 0, + p_size * bcache_blksize, p_buf, NULL); + if (result != 0) + goto done; + for (j = 0; j < p_size; j++) + bcache_insert(bc, p_blk + j); + bcache_rablks += p_size; + + bcopy(bc->bcache_data + (bcache_blksize * BHASH(bc, blk)) + offset, buf, + size); + done: if ((result == 0) && (rsize != NULL)) *rsize = size; @@ -220,35 +294,76 @@ } /* - * Requests larger than 1/2 the cache size will be bypassed and go + * Requests larger than 1/2 cache size will be bypassed and go * directly to the disk. XXX tune this. */ int -bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, size_t size, - char *buf, size_t *rsize) +bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, + size_t size, char *buf, size_t *rsize) { - static int bcache_unit = -1; struct bcache_devdata *dd = (struct bcache_devdata *)devdata; + struct bcache *bc = dd->dv_cache; + u_int bcache_nblks = 0; + int nblk, cblk, ret; + size_t csize, isize, total; bcache_ops++; - if(bcache_unit != unit) { - bcache_flush(); - bcache_unit = unit; - } + if (bc != NULL) + bcache_nblks = bc->bcache_nblks; /* bypass large requests, or when the cache is inactive */ - if ((bcache_data == NULL) || ((size * 2 / bcache_blksize) > bcache_nblks)) { + if (bc == NULL || + (offset == 0 && ((size * 2 / bcache_blksize) > bcache_nblks))) { DEBUG("bypass %d from %d", size / bcache_blksize, blk); bcache_bypasses++; - return(dd->dv_strategy(dd->dv_devdata, rw, blk, size, buf, rsize)); + return (dd->dv_strategy(dd->dv_devdata, rw, blk, offset, size, buf, + rsize)); } + /* normalize offset */ + while (offset >= bcache_blksize) { + blk++; + offset -= bcache_blksize; + } + switch (rw) { case F_READ: - return read_strategy(devdata, unit, rw, blk, size, buf, rsize); + nblk = size / bcache_blksize; + if (offset || (size != 0 && nblk == 0)) + nblk++; /* read at least one block */ + + ret = 0; + total = 0; + while(size) { + cblk = bcache_nblks - BHASH(bc, blk); /* # of blocks left */ + cblk = MIN(cblk, nblk); + + if (size <= bcache_blksize) + csize = size; + else { + csize = cblk * bcache_blksize; + if (offset) + csize -= (bcache_blksize - offset); + } + + ret = read_strategy(devdata, rw, blk, offset, + csize, buf+total, &isize); + if (ret != 0) + return (ret); + blk += (offset+isize) / bcache_blksize; + offset = 0; + total += isize; + size -= isize; + nblk = size / bcache_blksize; + } + + if (rsize) + *rsize = total; + + return (ret); case F_WRITE: - return write_strategy(devdata, unit, rw, blk, size, buf, rsize); + return write_strategy(devdata, rw, blk, offset, size, buf, rsize); } return -1; } @@ -255,95 +370,54 @@ /* - * Insert a block into the cache. Retire the oldest block to do so, if required. - * - * XXX the LRU algorithm will fail after 2^31 blocks have been transferred. + * Insert a block into the cache. */ static void -bcache_insert(caddr_t buf, daddr_t blkno) +bcache_insert(struct bcache *bc, daddr_t blkno) { - time_t now; - int cand, ocount; - u_int i; + u_int cand; - time(&now); - cand = 0; /* assume the first block */ - ocount = bcache_ctl[0].bc_count; + cand = BHASH(bc, blkno); - /* find the oldest block */ - for (i = 1; i < bcache_nblks; i++) { - if (bcache_ctl[i].bc_blkno == blkno) { - /* reuse old entry */ - cand = i; - break; - } - if (bcache_ctl[i].bc_count < ocount) { - ocount = bcache_ctl[i].bc_count; - cand = i; - } - } - - DEBUG("insert blk %d -> %d @ %d # %d", blkno, cand, now, bcache_bcount); - bcopy(buf, bcache_data + (bcache_blksize * cand), bcache_blksize); - bcache_ctl[cand].bc_blkno = blkno; - bcache_ctl[cand].bc_stamp = now; - bcache_ctl[cand].bc_count = bcache_bcount++; + DEBUG("insert blk %llu -> %u # %d", blkno, cand, bcache_bcount); + bc->bcache_ctl[cand].bc_blkno = blkno; + bc->bcache_ctl[cand].bc_count = bcache_bcount++; } /* - * Look for a block in the cache. Blocks more than BCACHE_TIMEOUT seconds old - * may be stale (removable media) and thus are discarded. Copy the block out - * if successful and return zero, or return nonzero on failure. - */ -static int -bcache_lookup(caddr_t buf, daddr_t blkno) -{ - time_t now; - u_int i; - - time(&now); - - for (i = 0; i < bcache_nblks; i++) - /* cache hit? */ - if ((bcache_ctl[i].bc_blkno == blkno) && ((bcache_ctl[i].bc_stamp + BCACHE_TIMEOUT) >= now)) { - bcopy(bcache_data + (bcache_blksize * i), buf, bcache_blksize); - DEBUG("hit blk %d <- %d (now %d then %d)", blkno, i, now, bcache_ctl[i].bc_stamp); - return(0); - } - return(ENOENT); -} - -/* * Invalidate a block from the cache. */ static void -bcache_invalidate(daddr_t blkno) +bcache_invalidate(struct bcache *bc, daddr_t blkno) { u_int i; - for (i = 0; i < bcache_nblks; i++) { - if (bcache_ctl[i].bc_blkno == blkno) { - bcache_ctl[i].bc_count = -1; - bcache_ctl[i].bc_blkno = -1; - DEBUG("invalidate blk %d", blkno); - break; - } + i = BHASH(bc, blkno); + if (bc->bcache_ctl[i].bc_blkno == blkno) { + bc->bcache_ctl[i].bc_count = -1; + bc->bcache_ctl[i].bc_blkno = -1; + DEBUG("invalidate blk %llu", blkno); } } +#ifndef BOOT2 COMMAND_SET(bcachestat, "bcachestat", "get disk block cache stats", command_bcache); static int command_bcache(int argc, char *argv[]) { - u_int i; - - for (i = 0; i < bcache_nblks; i++) { - printf("%08jx %04x %04x|", (uintmax_t)bcache_ctl[i].bc_blkno, (unsigned int)bcache_ctl[i].bc_stamp & 0xffff, bcache_ctl[i].bc_count & 0xffff); - if (((i + 1) % 4) == 0) - printf("\n"); + if (argc != 1) { + command_errmsg = "wrong number of arguments"; + return(CMD_ERROR); } - printf("\n%d ops %d bypasses %d hits %d misses %d flushes\n", bcache_ops, bcache_bypasses, bcache_hits, bcache_misses, bcache_flushes); + + printf("\ncache blocks: %d\n", bcache_total_nblks); + printf("cache blocksz: %d\n", bcache_blksize); + printf("cache readahead: %d\n", bcache_rablks); + printf("unit cache blocks: %d\n", bcache_unit_nblks); + printf("cached units: %d\n", bcache_units); + printf("%d ops %d bypasses %d hits %d misses\n", bcache_ops, + bcache_bypasses, bcache_hits, bcache_misses); return(CMD_OK); } - +#endif Index: sys/boot/common/bootstrap.h =================================================================== --- sys/boot/common/bootstrap.h +++ sys/boot/common/bootstrap.h @@ -88,8 +88,10 @@ /* bcache.c */ int bcache_init(u_int nblks, size_t bsize); -void bcache_flush(void); -int bcache_strategy(void *devdata, int unit, int rw, daddr_t blk, +void bcache_add_dev(int); +void *bcache_allocate(void); +void bcache_free(void *); +int bcache_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, char *buf, size_t *rsize); /* @@ -97,8 +99,10 @@ */ struct bcache_devdata { - int (*dv_strategy)(void *devdata, int rw, daddr_t blk, size_t size, char *buf, size_t *rsize); + int (*dv_strategy)(void *devdata, int rw, daddr_t blk, + size_t offset, size_t size, char *buf, size_t *rsize); void *dv_devdata; + void *dv_cache; }; /* Index: sys/boot/common/disk.c =================================================================== --- sys/boot/common/disk.c +++ sys/boot/common/disk.c @@ -178,7 +178,7 @@ dev = (struct disk_devdesc *)d; od = (struct open_disk *)dev->d_opendata; - return (dev->d_dev->dv_strategy(dev, F_READ, offset, + return (dev->d_dev->dv_strategy(dev, F_READ, offset, 0, blocks * od->sectorsize, (char *)buf, NULL)); } @@ -239,7 +239,7 @@ int ret; od = (struct open_disk *)dev->d_opendata; - ret = dev->d_dev->dv_strategy(dev, F_READ, dev->d_offset + offset, + ret = dev->d_dev->dv_strategy(dev, F_READ, dev->d_offset + offset, 0, blocks * od->sectorsize, buf, NULL); return (ret); @@ -252,7 +252,7 @@ int ret; od = (struct open_disk *)dev->d_opendata; - ret = dev->d_dev->dv_strategy(dev, F_WRITE, dev->d_offset + offset, + ret = dev->d_dev->dv_strategy(dev, F_WRITE, dev->d_offset + offset, 0, blocks * od->sectorsize, buf, NULL); return (ret); Index: sys/boot/common/md.c =================================================================== --- sys/boot/common/md.c +++ sys/boot/common/md.c @@ -60,7 +60,7 @@ /* devsw I/F */ static int md_init(void); -static int md_strategy(void *, int, daddr_t, size_t, char *, size_t *); +static int md_strategy(void *, int, daddr_t, size_t, size_t, char *, size_t *); static int md_open(struct open_file *, ...); static int md_close(struct open_file *); static void md_print(int); @@ -84,8 +84,8 @@ } static int -md_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, - size_t *rsize) +md_strategy(void *devdata, int rw, daddr_t blk, size_t offset, size_t size, + char *buf, size_t *rsize) { struct devdesc *dev = (struct devdesc *)devdata; size_t ofs; Index: sys/boot/efi/libefi/efipart.c =================================================================== --- sys/boot/efi/libefi/efipart.c +++ sys/boot/efi/libefi/efipart.c @@ -42,7 +42,8 @@ static EFI_GUID devpath_guid = DEVICE_PATH_PROTOCOL; static int efipart_init(void); -static int efipart_strategy(void *, int, daddr_t, size_t, char *, size_t *); +static int efipart_strategy(void *, int, daddr_t, size_t, size_t, char *, + size_t *); static int efipart_open(struct open_file *, ...); static int efipart_close(struct open_file *); static void efipart_print(int); @@ -256,8 +257,8 @@ } static int -efipart_strategy(void *devdata, int rw, daddr_t blk, size_t size, char *buf, - size_t *rsize) +efipart_strategy(void *devdata, int rw, daddr_t blk, size_t offset, + size_t size, char *buf, size_t *rsize) { struct devdesc *dev = (struct devdesc *)devdata; EFI_BLOCK_IO *blkio; Index: sys/boot/i386/libi386/bioscd.c =================================================================== --- sys/boot/i386/libi386/bioscd.c +++ sys/boot/i386/libi386/bioscd.c @@ -85,13 +85,19 @@ static struct bcinfo { int bc_unit; /* BIOS unit number */ struct specification_packet bc_sp; + int bc_open; /* reference counter */ + void *bc_bcache; /* buffer cache data */ } bcinfo [MAXBCDEV]; static int nbcinfo = 0; +#define BC(dev) (bcinfo[(dev)->d_unit]) + static int bc_read(int unit, daddr_t dblk, int blks, caddr_t dest); static int bc_init(void); static int bc_strategy(void *devdata, int flag, daddr_t dblk, - size_t size, char *buf, size_t *rsize); + size_t offset, size_t size, char *buf, size_t *rsize); +static int bc_realstrategy(void *devdata, int flag, daddr_t dblk, + size_t offset, size_t size, char *buf, size_t *rsize); static int bc_open(struct open_file *f, ...); static int bc_close(struct open_file *f); static void bc_print(int verbose); @@ -164,6 +170,7 @@ printf("BIOS CD is cd%d\n", nbcinfo); nbcinfo++; + bcache_add_dev(nbcinfo); /* register cd device in bcache */ return(0); } @@ -200,6 +207,9 @@ return(ENXIO); } + BC(dev).bc_open++; + if (BC(dev).bc_bcache == NULL) + BC(dev).bc_bcache = bcache_allocate(); return(0); } @@ -206,13 +216,35 @@ static int bc_close(struct open_file *f) { + struct i386_devdesc *dev; + dev = (struct i386_devdesc *)f->f_devdata; + BC(dev).bc_open--; + if (BC(dev).bc_open == 0) { + bcache_free(BC(dev).bc_bcache); + BC(dev).bc_bcache = NULL; + } return(0); } +static int +bc_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, + char *buf, size_t *rsize) +{ + struct bcache_devdata bcd; + struct i386_devdesc *dev; + + dev = (struct i386_devdesc *)devdata; + bcd.dv_strategy = bc_realstrategy; + bcd.dv_devdata = devdata; + bcd.dv_cache = BC(dev).bc_bcache; + + return (bcache_strategy(&bcd, rw, dblk, offset, size, buf, rsize)); +} + static int -bc_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, - size_t *rsize) +bc_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, + char *buf, size_t *rsize) { struct i386_devdesc *dev; int unit; Index: sys/boot/i386/libi386/biosdisk.c =================================================================== --- sys/boot/i386/libi386/biosdisk.c +++ sys/boot/i386/libi386/biosdisk.c @@ -86,6 +86,8 @@ int bd_type; /* BIOS 'drive type' (floppy only) */ uint16_t bd_sectorsize; /* Sector size */ uint64_t bd_sectors; /* Disk size */ + int bd_open; /* reference counter */ + void *bd_bcache; /* buffer cache data */ } bdinfo [MAXBDDEV]; static int nbdinfo = 0; @@ -98,10 +100,10 @@ static int bd_int13probe(struct bdinfo *bd); static int bd_init(void); -static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t size, - char *buf, size_t *rsize); -static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, +static int bd_strategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize); +static int bd_realstrategy(void *devdata, int flag, daddr_t dblk, size_t offset, + size_t size, char *buf, size_t *rsize); static int bd_open(struct open_file *f, ...); static int bd_close(struct open_file *f); static int bd_ioctl(struct open_file *f, u_long cmd, void *data); @@ -166,6 +168,8 @@ (nfd >= *(unsigned char *)PTOV(BIOS_NUMDRIVES))) break; #endif + bdinfo[nbdinfo].bd_open = 0; + bdinfo[nbdinfo].bd_bcache = NULL; bdinfo[nbdinfo].bd_unit = unit; bdinfo[nbdinfo].bd_flags = unit < 0x80 ? BD_FLOPPY: 0; if (!bd_int13probe(&bdinfo[nbdinfo])) @@ -179,6 +183,7 @@ nfd++; } } + bcache_add_dev(nbdinfo); return(0); } @@ -308,7 +313,9 @@ if (dev->d_unit < 0 || dev->d_unit >= nbdinfo) return (EIO); - + BD(dev).bd_open++; + if (BD(dev).bd_bcache == NULL) + BD(dev).bd_bcache = bcache_allocate(); return (disk_open(dev, BD(dev).bd_sectors * BD(dev).bd_sectorsize, BD(dev).bd_sectorsize, (BD(dev).bd_flags & BD_FLOPPY) ? DISK_F_NOCACHE: 0)); @@ -320,6 +327,11 @@ struct disk_devdesc *dev; dev = (struct disk_devdesc *)f->f_devdata; + BD(dev).bd_open--; + if (BD(dev).bd_open == 0) { + bcache_free(BD(dev).bd_bcache); + BD(dev).bd_bcache = NULL; + } return (disk_close(dev)); } @@ -343,8 +355,8 @@ } static int -bd_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, - size_t *rsize) +bd_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, + char *buf, size_t *rsize) { struct bcache_devdata bcd; struct disk_devdesc *dev; @@ -352,13 +364,14 @@ dev = (struct disk_devdesc *)devdata; bcd.dv_strategy = bd_realstrategy; bcd.dv_devdata = devdata; - return (bcache_strategy(&bcd, BD(dev).bd_unit, rw, dblk + dev->d_offset, - size, buf, rsize)); + bcd.dv_cache = BD(dev).bd_bcache; + return (bcache_strategy(&bcd, rw, dblk + dev->d_offset, offset, size, + buf, rsize)); } static int -bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, - size_t *rsize) +bd_realstrategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, + char *buf, size_t *rsize) { struct disk_devdesc *dev = (struct disk_devdesc *)devdata; int blks; Index: sys/boot/i386/libi386/biosmem.c =================================================================== --- sys/boot/i386/libi386/biosmem.c +++ sys/boot/i386/libi386/biosmem.c @@ -56,7 +56,7 @@ /* * The minimum amount of memory to reserve in bios_extmem for the heap. */ -#define HEAP_MIN (3 * 1024 * 1024) +#define HEAP_MIN (64 * 1024 * 1024) /* * Products in this list need quirks to detect Index: sys/boot/i386/libi386/pxe.c =================================================================== --- sys/boot/i386/libi386/pxe.c +++ sys/boot/i386/libi386/pxe.c @@ -72,7 +72,7 @@ static int pxe_init(void); static int pxe_strategy(void *devdata, int flag, daddr_t dblk, - size_t size, char *buf, size_t *rsize); + size_t offset, size_t size, char *buf, size_t *rsize); static int pxe_open(struct open_file *f, ...); static int pxe_close(struct open_file *f); static void pxe_print(int verbose); @@ -247,7 +247,7 @@ static int -pxe_strategy(void *devdata, int flag, daddr_t dblk, size_t size, +pxe_strategy(void *devdata, int flag, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { return (EIO); Index: sys/boot/i386/loader/main.c =================================================================== --- sys/boot/i386/loader/main.c +++ sys/boot/i386/loader/main.c @@ -137,9 +137,9 @@ cons_probe(); /* - * Initialise the block cache + * Initialise the block cache. Set the upper limit. */ - bcache_init(32, 512); /* 16k cache XXX tune this */ + bcache_init(32768, 512); /* * Special handling for PXE and CD booting. Index: sys/boot/zfs/zfs.c =================================================================== --- sys/boot/zfs/zfs.c +++ sys/boot/zfs/zfs.c @@ -564,7 +564,7 @@ } static int -zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t size, char *buf, size_t *rsize) +zfs_dev_strategy(void *devdata, int rw, daddr_t dblk, size_t offset, size_t size, char *buf, size_t *rsize) { return (ENOSYS);