Index: head/sys/fs/specfs/spec_vnops.c =================================================================== --- head/sys/fs/specfs/spec_vnops.c (revision 40659) +++ head/sys/fs/specfs/spec_vnops.c (revision 40660) @@ -1,937 +1,936 @@ /* * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.73 1998/09/05 14:13:12 phk Exp $ + * $Id: spec_vnops.c,v 1.74 1998/09/12 20:21:54 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int spec_advlock __P((struct vop_advlock_args *)); static int spec_badop __P((void)); static int spec_bmap __P((struct vop_bmap_args *)); static int spec_close __P((struct vop_close_args *)); static int spec_freeblks __P((struct vop_freeblks_args *)); static int spec_fsync __P((struct vop_fsync_args *)); static int spec_getattr __P((struct vop_getattr_args *)); static int spec_getpages __P((struct vop_getpages_args *)); static int spec_inactive __P((struct vop_inactive_args *)); static int spec_ioctl __P((struct vop_ioctl_args *)); static int spec_lookup __P((struct vop_lookup_args *)); static int spec_open __P((struct vop_open_args *)); static int spec_poll __P((struct vop_poll_args *)); static int spec_print __P((struct vop_print_args *)); static int spec_read __P((struct vop_read_args *)); static int spec_strategy __P((struct vop_strategy_args *)); static int spec_write __P((struct vop_write_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) vop_ebadf }, { &vop_advlock_desc, (vop_t *) spec_advlock }, { &vop_bmap_desc, (vop_t *) spec_bmap }, { &vop_close_desc, (vop_t *) spec_close }, { &vop_create_desc, (vop_t *) spec_badop }, { &vop_freeblks_desc, (vop_t *) spec_freeblks }, { &vop_fsync_desc, (vop_t *) spec_fsync }, { &vop_getattr_desc, (vop_t *) spec_getattr }, { &vop_getpages_desc, (vop_t *) spec_getpages }, { &vop_inactive_desc, (vop_t *) spec_inactive }, { &vop_ioctl_desc, (vop_t *) spec_ioctl }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) spec_badop }, { &vop_lookup_desc, (vop_t *) spec_lookup }, { &vop_mkdir_desc, (vop_t *) spec_badop }, { &vop_mknod_desc, (vop_t *) spec_badop }, { &vop_open_desc, (vop_t *) spec_open }, { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, { &vop_poll_desc, (vop_t *) spec_poll }, { &vop_print_desc, (vop_t *) spec_print }, { &vop_read_desc, (vop_t *) spec_read }, { &vop_readdir_desc, (vop_t *) spec_badop }, { &vop_readlink_desc, (vop_t *) spec_badop }, { &vop_reallocblks_desc, (vop_t *) spec_badop }, { &vop_reclaim_desc, (vop_t *) vop_null }, { &vop_remove_desc, (vop_t *) spec_badop }, { &vop_rename_desc, (vop_t *) spec_badop }, { &vop_rmdir_desc, (vop_t *) spec_badop }, { &vop_setattr_desc, (vop_t *) vop_ebadf }, { &vop_strategy_desc, (vop_t *) spec_strategy }, { &vop_symlink_desc, (vop_t *) spec_badop }, { &vop_write_desc, (vop_t *) spec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); int spec_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ static int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ static int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && cdevsw[maj]->d_bmaj != -1 && (cdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } if ((cdevsw[maj]->d_flags & D_TYPEMASK) == D_TTY) vp->v_flag |= VISTTY; VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p)); } return (0); } /* * Vnode op for read */ /* ARGSUSED */ static int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; - int n, on, majordev; + int n, on; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; - if ((majordev = major(dev)) < nblkdev && - (ioctl = bdevsw[majordev]->d_ioctl) != NULL && + if ((ioctl = bdevsw[major(dev)]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ static int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = btodb(bsize) - 1; do { bn = btodb(uio->uio_offset) & ~blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) bawrite(bp); else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ static int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ static int spec_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p); default: return (vop_defaultop((struct vop_generic_args *)ap)); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ static int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { vfs_bio_awrite(bp); splx(s); } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); } goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } static int spec_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } /* * Just call the device strategy routine */ static int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp; bp = ap->a_bp; if (((bp->b_flags & B_READ) == 0) && (LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) (*bioops.io_start)(bp); (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } static int spec_freeblks(ap) struct vop_freeblks_args /* { struct vnode *a_vp; daddr_t a_addr; daddr_t a_length; } */ *ap; { struct cdevsw *bsw; struct buf *bp; bsw = bdevsw[major(ap->a_vp->v_rdev)]; if ((bsw->d_flags & D_CANFREE) == 0) return (0); bp = geteblk(ap->a_length); bp->b_flags |= B_FREEBUF | B_BUSY; bp->b_dev = ap->a_vp->v_rdev; bp->b_blkno = ap->a_addr; bp->b_offset = dbtob(ap->a_addr); bp->b_bcount = ap->a_length; (*bsw->d_strategy)(bp); return (0); } /* * This is a noop, simply returning what one has been given. */ static int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Device close routine */ /* ARGSUSED */ static int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && (vp->v_flag & VXLOCK) == 0 && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0) return (0); devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ static int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ static int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } /* * Special device bad operation */ static int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } static int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* XXX sanity check before we go into details. */ /* XXX limits should be defined elsewhere. */ #define DADDR_T_BIT 32 #define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1) if (offset < 0 || offset > OFFSET_MAX) { /* XXX still no %q in kernel. */ printf("spec_getpages: preposterous offset 0x%x%08x\n", (u_int)((u_quad_t)offset >> 32), (u_int)(offset & 0xffffffff)); return (VM_PAGER_ERROR); } blkno = btodb(offset); /* * Round up physical size for real devices, use the * fundamental blocksize of the fs if possible. */ if (vp && vp->v_mount) { if (vp->v_type != VBLK) { vprint("Non VBLK", vp); } blksiz = vp->v_mount->mnt_stat.f_bsize; if (blksiz < DEV_BSIZE) { blksiz = DEV_BSIZE; } } else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; bp->b_resid = 0; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp->b_vp, bp); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "spread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) { if (bp->b_error) error = bp->b_error; else error = EIO; } nread = size - bp->b_resid; if (nread < ap->a_count) { bzero((caddr_t)kva + nread, ap->a_count - nread); } pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else if (toff < nread) { int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } else { m->valid = 0; m->dirty = 0; } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_WANTED) { vm_page_activate(m); } else { vm_page_deactivate(m); } vm_page_wakeup(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; #ifndef MAX_PERF printf( "spec_getpages: I/O read failure: (error code=%d)\n", error); printf( " size: %d, resid: %ld, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); printf( " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", nread, ap->a_reqpage, (u_long)m->pindex, pcount); #endif /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_OK; } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = dbtob(dpart.disklab->d_partitions [minor(vp->v_rdev)].p_size); vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/gnu/ext2fs/ext2_bmap.c =================================================================== --- head/sys/gnu/ext2fs/ext2_bmap.c (revision 40659) +++ head/sys/gnu/ext2fs/ext2_bmap.c (revision 40660) @@ -1,355 +1,353 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 - * $Id: ufs_bmap.c,v 1.21 1998/07/04 20:45:39 julian Exp $ + * $Id: ufs_bmap.c,v 1.22 1998/08/18 03:54:39 bde Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Bmap converts a the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block * number to index into the array of block pointers described by the dinode. */ int ufs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; ufs_daddr_t a_bn; struct vnode **a_vpp; ufs_daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ if (ap->a_vpp != NULL) *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; if (ap->a_bnp == NULL) return (0); return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, ap->a_runp, ap->a_runb)); } /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) struct vnode *vp; ufs_daddr_t bn; ufs_daddr_t *bnp; struct indir *ap; int *nump; int *runp; int *runb; { register struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; ufs_daddr_t daddr; long metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if (ap != NULL && nump == NULL || ap == NULL && nump != NULL) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = 0; if (runp || runb || (vp->v_maxio == 0)) { struct vnode *devvp; int blksize; blksize = mp->mnt_stat.f_iosize; /* * XXX * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ devvp = ip->i_devvp; - if (devvp && devvp->v_type == VBLK && - (devvp->v_rdev != NODEV) && - (major(devvp->v_rdev) < nblkdev)) { + if (devvp != NULL && devvp->v_type == VBLK) { if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) { maxrun = MAXPHYS; vp->v_maxio = MAXPHYS; } else { maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio; vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio; } maxrun = maxrun / blksize; maxrun -= 1; } if (maxrun <= 0) { vp->v_maxio = DFLTPHYS; maxrun = DFLTPHYS / blksize; maxrun -= 1; } } xap = ap == NULL ? a : ap; if (!nump) nump = # error = ufs_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); VOP_STRATEGY(bp->b_vp, bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); if (error) { brelse(bp); return (error); } } daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ufs_daddr_t *)bp->b_data)[bn - 1], ((ufs_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn > 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); } /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ int ufs_getlbns(vp, bn, ap, nump) struct vnode *vp; ufs_daddr_t bn; struct indir *ap; int *nump; { long blockcnt, metalbn, realbn; struct ufsmount *ump; int i, numlevels, off; int64_t qblockcnt; ump = VFSTOUFS(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if ((long)bn < 0) bn = -(long)bn; /* The first NDADDR blocks are direct blocks. */ if (bn < NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); /* * Use int64_t's here to avoid overflow for triple indirect * blocks when longs have 32 bits and the block size is more * than 4K. */ qblockcnt = (int64_t)blockcnt * MNINDIR(ump); if (bn < qblockcnt) break; blockcnt = qblockcnt; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NIADDR - i); else metalbn = -(-realbn - bn + NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NIADDR - i; ap->in_exists = 0; ap++; for (++numlevels; i <= NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ap->in_exists = 0; ++ap; metalbn -= -1 + off * blockcnt; blockcnt /= MNINDIR(ump); } if (nump) *nump = numlevels; return (0); } Index: head/sys/gnu/fs/ext2fs/ext2_bmap.c =================================================================== --- head/sys/gnu/fs/ext2fs/ext2_bmap.c (revision 40659) +++ head/sys/gnu/fs/ext2fs/ext2_bmap.c (revision 40660) @@ -1,355 +1,353 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 - * $Id: ufs_bmap.c,v 1.21 1998/07/04 20:45:39 julian Exp $ + * $Id: ufs_bmap.c,v 1.22 1998/08/18 03:54:39 bde Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Bmap converts a the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block * number to index into the array of block pointers described by the dinode. */ int ufs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; ufs_daddr_t a_bn; struct vnode **a_vpp; ufs_daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ if (ap->a_vpp != NULL) *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; if (ap->a_bnp == NULL) return (0); return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, ap->a_runp, ap->a_runb)); } /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) struct vnode *vp; ufs_daddr_t bn; ufs_daddr_t *bnp; struct indir *ap; int *nump; int *runp; int *runb; { register struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; ufs_daddr_t daddr; long metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if (ap != NULL && nump == NULL || ap == NULL && nump != NULL) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = 0; if (runp || runb || (vp->v_maxio == 0)) { struct vnode *devvp; int blksize; blksize = mp->mnt_stat.f_iosize; /* * XXX * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ devvp = ip->i_devvp; - if (devvp && devvp->v_type == VBLK && - (devvp->v_rdev != NODEV) && - (major(devvp->v_rdev) < nblkdev)) { + if (devvp != NULL && devvp->v_type == VBLK) { if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) { maxrun = MAXPHYS; vp->v_maxio = MAXPHYS; } else { maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio; vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio; } maxrun = maxrun / blksize; maxrun -= 1; } if (maxrun <= 0) { vp->v_maxio = DFLTPHYS; maxrun = DFLTPHYS / blksize; maxrun -= 1; } } xap = ap == NULL ? a : ap; if (!nump) nump = # error = ufs_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); VOP_STRATEGY(bp->b_vp, bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); if (error) { brelse(bp); return (error); } } daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ufs_daddr_t *)bp->b_data)[bn - 1], ((ufs_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn > 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); } /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ int ufs_getlbns(vp, bn, ap, nump) struct vnode *vp; ufs_daddr_t bn; struct indir *ap; int *nump; { long blockcnt, metalbn, realbn; struct ufsmount *ump; int i, numlevels, off; int64_t qblockcnt; ump = VFSTOUFS(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if ((long)bn < 0) bn = -(long)bn; /* The first NDADDR blocks are direct blocks. */ if (bn < NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); /* * Use int64_t's here to avoid overflow for triple indirect * blocks when longs have 32 bits and the block size is more * than 4K. */ qblockcnt = (int64_t)blockcnt * MNINDIR(ump); if (bn < qblockcnt) break; blockcnt = qblockcnt; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NIADDR - i); else metalbn = -(-realbn - bn + NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NIADDR - i; ap->in_exists = 0; ap++; for (++numlevels; i <= NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ap->in_exists = 0; ++ap; metalbn -= -1 + off * blockcnt; blockcnt /= MNINDIR(ump); } if (nump) *nump = numlevels; return (0); } Index: head/sys/miscfs/specfs/spec_vnops.c =================================================================== --- head/sys/miscfs/specfs/spec_vnops.c (revision 40659) +++ head/sys/miscfs/specfs/spec_vnops.c (revision 40660) @@ -1,937 +1,936 @@ /* * Copyright (c) 1989, 1993, 1995 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.73 1998/09/05 14:13:12 phk Exp $ + * $Id: spec_vnops.c,v 1.74 1998/09/12 20:21:54 phk Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int spec_advlock __P((struct vop_advlock_args *)); static int spec_badop __P((void)); static int spec_bmap __P((struct vop_bmap_args *)); static int spec_close __P((struct vop_close_args *)); static int spec_freeblks __P((struct vop_freeblks_args *)); static int spec_fsync __P((struct vop_fsync_args *)); static int spec_getattr __P((struct vop_getattr_args *)); static int spec_getpages __P((struct vop_getpages_args *)); static int spec_inactive __P((struct vop_inactive_args *)); static int spec_ioctl __P((struct vop_ioctl_args *)); static int spec_lookup __P((struct vop_lookup_args *)); static int spec_open __P((struct vop_open_args *)); static int spec_poll __P((struct vop_poll_args *)); static int spec_print __P((struct vop_print_args *)); static int spec_read __P((struct vop_read_args *)); static int spec_strategy __P((struct vop_strategy_args *)); static int spec_write __P((struct vop_write_args *)); struct vnode *speclisth[SPECHSZ]; vop_t **spec_vnodeop_p; static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { { &vop_default_desc, (vop_t *) vop_defaultop }, { &vop_access_desc, (vop_t *) vop_ebadf }, { &vop_advlock_desc, (vop_t *) spec_advlock }, { &vop_bmap_desc, (vop_t *) spec_bmap }, { &vop_close_desc, (vop_t *) spec_close }, { &vop_create_desc, (vop_t *) spec_badop }, { &vop_freeblks_desc, (vop_t *) spec_freeblks }, { &vop_fsync_desc, (vop_t *) spec_fsync }, { &vop_getattr_desc, (vop_t *) spec_getattr }, { &vop_getpages_desc, (vop_t *) spec_getpages }, { &vop_inactive_desc, (vop_t *) spec_inactive }, { &vop_ioctl_desc, (vop_t *) spec_ioctl }, { &vop_lease_desc, (vop_t *) vop_null }, { &vop_link_desc, (vop_t *) spec_badop }, { &vop_lookup_desc, (vop_t *) spec_lookup }, { &vop_mkdir_desc, (vop_t *) spec_badop }, { &vop_mknod_desc, (vop_t *) spec_badop }, { &vop_open_desc, (vop_t *) spec_open }, { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, { &vop_poll_desc, (vop_t *) spec_poll }, { &vop_print_desc, (vop_t *) spec_print }, { &vop_read_desc, (vop_t *) spec_read }, { &vop_readdir_desc, (vop_t *) spec_badop }, { &vop_readlink_desc, (vop_t *) spec_badop }, { &vop_reallocblks_desc, (vop_t *) spec_badop }, { &vop_reclaim_desc, (vop_t *) vop_null }, { &vop_remove_desc, (vop_t *) spec_badop }, { &vop_rename_desc, (vop_t *) spec_badop }, { &vop_rmdir_desc, (vop_t *) spec_badop }, { &vop_setattr_desc, (vop_t *) vop_ebadf }, { &vop_strategy_desc, (vop_t *) spec_strategy }, { &vop_symlink_desc, (vop_t *) spec_badop }, { &vop_write_desc, (vop_t *) spec_write }, { NULL, NULL } }; static struct vnodeopv_desc spec_vnodeop_opv_desc = { &spec_vnodeop_p, spec_vnodeop_entries }; VNODEOP_SET(spec_vnodeop_opv_desc); int spec_vnoperate(ap) struct vop_generic_args /* { struct vnodeop_desc *a_desc; } */ *ap; { return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap)); } static void spec_getpages_iodone __P((struct buf *bp)); /* * Trivial lookup routine that always fails. */ static int spec_lookup(ap) struct vop_lookup_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { *ap->a_vpp = NULL; return (ENOTDIR); } /* * Open a special file. */ /* ARGSUSED */ static int spec_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct proc *a_p; } */ *ap; { struct proc *p = ap->a_p; struct vnode *bvp, *vp = ap->a_vp; dev_t bdev, dev = (dev_t)vp->v_rdev; int maj = major(dev); int error; /* * Don't allow open if fs is mounted -nodev. */ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) return (ENXIO); switch (vp->v_type) { case VCHR: if ((u_int)maj >= nchrdev) return (ENXIO); if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) return ENXIO; if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { /* * When running in very secure mode, do not allow * opens for writing of any disk character devices. */ if (securelevel >= 2 && cdevsw[maj]->d_bmaj != -1 && (cdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * When running in secure mode, do not allow opens * for writing of /dev/mem, /dev/kmem, or character * devices whose corresponding block devices are * currently mounted. */ if (securelevel >= 1) { if ((bdev = chrtoblk(dev)) != NODEV && vfinddev(bdev, VBLK, &bvp) && bvp->v_usecount > 0 && (error = vfs_mountedon(bvp))) return (error); if (iskmemdev(dev)) return (EPERM); } } if ((cdevsw[maj]->d_flags & D_TYPEMASK) == D_TTY) vp->v_flag |= VISTTY; VOP_UNLOCK(vp, 0, p); error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if ((u_int)maj >= nblkdev) return (ENXIO); if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) return ENXIO; /* * When running in very secure mode, do not allow * opens for writing of any disk block devices. */ if (securelevel >= 2 && ap->a_cred != FSCRED && (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); /* * Do not allow opens of block devices that are * currently mounted. */ error = vfs_mountedon(vp); if (error) return (error); return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p)); } return (0); } /* * Vnode op for read */ /* ARGSUSED */ static int spec_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn, nextbn; long bsize, bscale; struct partinfo dpart; - int n, on, majordev; + int n, on; d_ioctl_t *ioctl; int error = 0; dev_t dev; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_READ) panic("spec_read mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_read proc"); #endif if (uio->uio_resid == 0) return (0); switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_read) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; dev = vp->v_rdev; - if ((majordev = major(dev)) < nblkdev && - (ioctl = bdevsw[majordev]->d_ioctl) != NULL && + if ((ioctl = bdevsw[major(dev)]->d_ioctl) != NULL && (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; bscale = btodb(bsize); do { bn = btodb(uio->uio_offset) & ~(bscale - 1); on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (vp->v_lastr + bscale == bn) { nextbn = bn + bscale; error = breadn(vp, bn, (int)bsize, &nextbn, (int *)&bsize, 1, NOCRED, &bp); } else error = bread(vp, bn, (int)bsize, NOCRED, &bp); vp->v_lastr = bn; n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_read type"); } /* NOTREACHED */ } /* * Vnode op for write */ /* ARGSUSED */ static int spec_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct uio *uio = ap->a_uio; struct proc *p = uio->uio_procp; struct buf *bp; daddr_t bn; int bsize, blkmask; struct partinfo dpart; register int n, on; int error = 0; #ifdef DIAGNOSTIC if (uio->uio_rw != UIO_WRITE) panic("spec_write mode"); if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) panic("spec_write proc"); #endif switch (vp->v_type) { case VCHR: VOP_UNLOCK(vp, 0, p); error = (*cdevsw[major(vp->v_rdev)]->d_write) (vp->v_rdev, uio, ap->a_ioflag); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); return (error); case VBLK: if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); bsize = BLKDEV_IOSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0) { if (dpart.part->p_fstype == FS_BSDFFS && dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) bsize = dpart.part->p_frag * dpart.part->p_fsize; } blkmask = btodb(bsize) - 1; do { bn = btodb(uio->uio_offset) & ~blkmask; on = uio->uio_offset % bsize; n = min((unsigned)(bsize - on), uio->uio_resid); if (n == bsize) bp = getblk(vp, bn, bsize, 0, 0); else error = bread(vp, bn, bsize, NOCRED, &bp); n = min(n, bsize - bp->b_resid); if (error) { brelse(bp); return (error); } error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) bawrite(bp); else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); default: panic("spec_write type"); } /* NOTREACHED */ } /* * Device ioctl operation. */ /* ARGSUSED */ static int spec_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; int a_command; caddr_t a_data; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { dev_t dev = ap->a_vp->v_rdev; switch (ap->a_vp->v_type) { case VCHR: return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); case VBLK: return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, ap->a_data, ap->a_fflag, ap->a_p)); default: panic("spec_ioctl"); /* NOTREACHED */ } } /* ARGSUSED */ static int spec_poll(ap) struct vop_poll_args /* { struct vnode *a_vp; int a_events; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register dev_t dev; switch (ap->a_vp->v_type) { case VCHR: dev = ap->a_vp->v_rdev; return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p); default: return (vop_defaultop((struct vop_generic_args *)ap)); } } /* * Synch buffers associated with a block device */ /* ARGSUSED */ static int spec_fsync(ap) struct vop_fsync_args /* { struct vnode *a_vp; struct ucred *a_cred; int a_waitfor; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct buf *bp; struct buf *nbp; int s; if (vp->v_type == VCHR) return (0); /* * Flush all dirty buffers associated with a block device. */ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; if ((bp->b_flags & B_BUSY)) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { vfs_bio_awrite(bp); splx(s); } else { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); bawrite(bp); } goto loop; } if (ap->a_waitfor == MNT_WAIT) { while (vp->v_numoutput) { vp->v_flag |= VBWAIT; (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); } #ifdef DIAGNOSTIC if (vp->v_dirtyblkhd.lh_first) { vprint("spec_fsync: dirty", vp); splx(s); goto loop; } #endif } splx(s); return (0); } static int spec_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct proc *a_p; } */ *ap; { VOP_UNLOCK(ap->a_vp, 0, ap->a_p); return (0); } /* * Just call the device strategy routine */ static int spec_strategy(ap) struct vop_strategy_args /* { struct buf *a_bp; } */ *ap; { struct buf *bp; bp = ap->a_bp; if (((bp->b_flags & B_READ) == 0) && (LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) (*bioops.io_start)(bp); (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); return (0); } static int spec_freeblks(ap) struct vop_freeblks_args /* { struct vnode *a_vp; daddr_t a_addr; daddr_t a_length; } */ *ap; { struct cdevsw *bsw; struct buf *bp; bsw = bdevsw[major(ap->a_vp->v_rdev)]; if ((bsw->d_flags & D_CANFREE) == 0) return (0); bp = geteblk(ap->a_length); bp->b_flags |= B_FREEBUF | B_BUSY; bp->b_dev = ap->a_vp->v_rdev; bp->b_blkno = ap->a_addr; bp->b_offset = dbtob(ap->a_addr); bp->b_bcount = ap->a_length; (*bsw->d_strategy)(bp); return (0); } /* * This is a noop, simply returning what one has been given. */ static int spec_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; daddr_t a_bn; struct vnode **a_vpp; daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { if (ap->a_vpp != NULL) *ap->a_vpp = ap->a_vp; if (ap->a_bnp != NULL) *ap->a_bnp = ap->a_bn; if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; return (0); } /* * Device close routine */ /* ARGSUSED */ static int spec_close(ap) struct vop_close_args /* { struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; struct proc *p = ap->a_p; dev_t dev = vp->v_rdev; d_close_t *devclose; int mode, error; switch (vp->v_type) { case VCHR: /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (vcount(vp) == 2 && ap->a_p && (vp->v_flag & VXLOCK) == 0 && vp == ap->a_p->p_session->s_ttyvp) { vrele(vp); ap->a_p->p_session->s_ttyvp = NULL; } /* * If the vnode is locked, then we are in the midst * of forcably closing the device, otherwise we only * close on last reference. */ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) return (0); devclose = cdevsw[major(dev)]->d_close; mode = S_IFCHR; break; case VBLK: /* * On last close of a block device (that isn't mounted) * we must invalidate any in core blocks, so that * we can, for instance, change floppy disks. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); VOP_UNLOCK(vp, 0, ap->a_p); if (error) return (error); /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0) return (0); devclose = bdevsw[major(dev)]->d_close; mode = S_IFBLK; break; default: panic("spec_close: not special"); } return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); } /* * Print out the contents of a special device vnode. */ static int spec_print(ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), minor(ap->a_vp->v_rdev)); return (0); } /* * Special device advisory byte-level locks. */ /* ARGSUSED */ static int spec_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); } /* * Special device bad operation */ static int spec_badop() { panic("spec_badop called"); /* NOTREACHED */ } static void spec_getpages_iodone(bp) struct buf *bp; { bp->b_flags |= B_DONE; wakeup(bp); } static int spec_getpages(ap) struct vop_getpages_args *ap; { vm_offset_t kva; int error; int i, pcount, size, s; daddr_t blkno; struct buf *bp; vm_page_t m; vm_ooffset_t offset; int toff, nextoff, nread; struct vnode *vp = ap->a_vp; int blksiz; int gotreqpage; error = 0; pcount = round_page(ap->a_count) / PAGE_SIZE; /* * Calculate the offset of the transfer. */ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; /* XXX sanity check before we go into details. */ /* XXX limits should be defined elsewhere. */ #define DADDR_T_BIT 32 #define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1) if (offset < 0 || offset > OFFSET_MAX) { /* XXX still no %q in kernel. */ printf("spec_getpages: preposterous offset 0x%x%08x\n", (u_int)((u_quad_t)offset >> 32), (u_int)(offset & 0xffffffff)); return (VM_PAGER_ERROR); } blkno = btodb(offset); /* * Round up physical size for real devices, use the * fundamental blocksize of the fs if possible. */ if (vp && vp->v_mount) { if (vp->v_type != VBLK) { vprint("Non VBLK", vp); } blksiz = vp->v_mount->mnt_stat.f_bsize; if (blksiz < DEV_BSIZE) { blksiz = DEV_BSIZE; } } else blksiz = DEV_BSIZE; size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); bp = getpbuf(); kva = (vm_offset_t)bp->b_data; /* * Map the pages to be read into the kva. */ pmap_qenter(kva, ap->a_m, pcount); /* Build a minimal buffer header. */ bp->b_flags = B_BUSY | B_READ | B_CALL; bp->b_iodone = spec_getpages_iodone; /* B_PHYS is not set, but it is nice to fill this in. */ bp->b_proc = curproc; bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; if (bp->b_rcred != NOCRED) crhold(bp->b_rcred); if (bp->b_wcred != NOCRED) crhold(bp->b_wcred); bp->b_blkno = blkno; bp->b_lblkno = blkno; pbgetvp(ap->a_vp, bp); bp->b_bcount = size; bp->b_bufsize = size; bp->b_resid = 0; cnt.v_vnodein++; cnt.v_vnodepgsin += pcount; /* Do the input. */ VOP_STRATEGY(bp->b_vp, bp); s = splbio(); /* We definitely need to be at splbio here. */ while ((bp->b_flags & B_DONE) == 0) tsleep(bp, PVM, "spread", 0); splx(s); if ((bp->b_flags & B_ERROR) != 0) { if (bp->b_error) error = bp->b_error; else error = EIO; } nread = size - bp->b_resid; if (nread < ap->a_count) { bzero((caddr_t)kva + nread, ap->a_count - nread); } pmap_qremove(kva, pcount); gotreqpage = 0; for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { nextoff = toff + PAGE_SIZE; m = ap->a_m[i]; m->flags &= ~PG_ZERO; if (nextoff <= nread) { m->valid = VM_PAGE_BITS_ALL; m->dirty = 0; } else if (toff < nread) { int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); vm_page_set_validclean(m, 0, nvalid); } else { m->valid = 0; m->dirty = 0; } if (i != ap->a_reqpage) { /* * Just in case someone was asking for this page we * now tell them that it is ok to use. */ if (!error || (m->valid == VM_PAGE_BITS_ALL)) { if (m->valid) { if (m->flags & PG_WANTED) { vm_page_activate(m); } else { vm_page_deactivate(m); } vm_page_wakeup(m); } else { vm_page_free(m); } } else { vm_page_free(m); } } else if (m->valid) { gotreqpage = 1; } } if (!gotreqpage) { m = ap->a_m[ap->a_reqpage]; #ifndef MAX_PERF printf( "spec_getpages: I/O read failure: (error code=%d)\n", error); printf( " size: %d, resid: %ld, a_count: %d, valid: 0x%x\n", size, bp->b_resid, ap->a_count, m->valid); printf( " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", nread, ap->a_reqpage, (u_long)m->pindex, pcount); #endif /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_ERROR; } /* * Free the buffer header back to the swap buffer pool. */ relpbuf(bp); return VM_PAGER_OK; } /* ARGSUSED */ static int spec_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; struct proc *a_p; } */ *ap; { register struct vnode *vp = ap->a_vp; register struct vattr *vap = ap->a_vap; struct partinfo dpart; bzero(vap, sizeof (*vap)); if (vp->v_type == VBLK) vap->va_blocksize = BLKDEV_IOSIZE; else if (vp->v_type == VCHR) vap->va_blocksize = MAXBSIZE; if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, (caddr_t)&dpart, FREAD, ap->a_p) == 0) { vap->va_bytes = dbtob(dpart.disklab->d_partitions [minor(vp->v_rdev)].p_size); vap->va_size = vap->va_bytes; } return (0); } Index: head/sys/ufs/ufs/ufs_bmap.c =================================================================== --- head/sys/ufs/ufs/ufs_bmap.c (revision 40659) +++ head/sys/ufs/ufs/ufs_bmap.c (revision 40660) @@ -1,355 +1,353 @@ /* * Copyright (c) 1989, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 - * $Id: ufs_bmap.c,v 1.21 1998/07/04 20:45:39 julian Exp $ + * $Id: ufs_bmap.c,v 1.22 1998/08/18 03:54:39 bde Exp $ */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Bmap converts a the logical block number of a file to its physical block * number on the disk. The conversion is done by using the logical block * number to index into the array of block pointers described by the dinode. */ int ufs_bmap(ap) struct vop_bmap_args /* { struct vnode *a_vp; ufs_daddr_t a_bn; struct vnode **a_vpp; ufs_daddr_t *a_bnp; int *a_runp; int *a_runb; } */ *ap; { /* * Check for underlying vnode requests and ensure that logical * to physical mapping is requested. */ if (ap->a_vpp != NULL) *ap->a_vpp = VTOI(ap->a_vp)->i_devvp; if (ap->a_bnp == NULL) return (0); return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL, ap->a_runp, ap->a_runb)); } /* * Indirect blocks are now on the vnode for the file. They are given negative * logical block numbers. Indirect blocks are addressed by the negative * address of the first data block to which they point. Double indirect blocks * are addressed by one less than the address of the first indirect block to * which they point. Triple indirect blocks are addressed by one less than * the address of the first double indirect block to which they point. * * ufs_bmaparray does the bmap conversion, and if requested returns the * array of logical blocks which must be traversed to get to a block. * Each entry contains the offset into that block that gets you to the * next block and the disk address of the block (if it is assigned). */ int ufs_bmaparray(vp, bn, bnp, ap, nump, runp, runb) struct vnode *vp; ufs_daddr_t bn; ufs_daddr_t *bnp; struct indir *ap; int *nump; int *runp; int *runb; { register struct inode *ip; struct buf *bp; struct ufsmount *ump; struct mount *mp; struct vnode *devvp; struct indir a[NIADDR+1], *xap; ufs_daddr_t daddr; long metalbn; int error, maxrun = 0, num; ip = VTOI(vp); mp = vp->v_mount; ump = VFSTOUFS(mp); #ifdef DIAGNOSTIC if (ap != NULL && nump == NULL || ap == NULL && nump != NULL) panic("ufs_bmaparray: invalid arguments"); #endif if (runp) { *runp = 0; } if (runb) { *runb = 0; } maxrun = 0; if (runp || runb || (vp->v_maxio == 0)) { struct vnode *devvp; int blksize; blksize = mp->mnt_stat.f_iosize; /* * XXX * If MAXPHYS is the largest transfer the disks can handle, * we probably want maxrun to be 1 block less so that we * don't create a block larger than the device can handle. */ devvp = ip->i_devvp; - if (devvp && devvp->v_type == VBLK && - (devvp->v_rdev != NODEV) && - (major(devvp->v_rdev) < nblkdev)) { + if (devvp != NULL && devvp->v_type == VBLK) { if (bdevsw[major(devvp->v_rdev)]->d_maxio > MAXPHYS) { maxrun = MAXPHYS; vp->v_maxio = MAXPHYS; } else { maxrun = bdevsw[major(devvp->v_rdev)]->d_maxio; vp->v_maxio = bdevsw[major(devvp->v_rdev)]->d_maxio; } maxrun = maxrun / blksize; maxrun -= 1; } if (maxrun <= 0) { vp->v_maxio = DFLTPHYS; maxrun = DFLTPHYS / blksize; maxrun -= 1; } } xap = ap == NULL ? a : ap; if (!nump) nump = # error = ufs_getlbns(vp, bn, xap, nump); if (error) return (error); num = *nump; if (num == 0) { *bnp = blkptrtodb(ump, ip->i_db[bn]); if (*bnp == 0) *bnp = -1; else if (runp) { daddr_t bnb = bn; for (++bn; bn < NDADDR && *runp < maxrun && is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); ++bn, ++*runp); bn = bnb; if (runb && (bn > 0)) { for (--bn; (bn >= 0) && (*runb < maxrun) && is_sequential(ump, ip->i_db[bn], ip->i_db[bn+1]); --bn, ++*runb); } } return (0); } /* Get disk address out of indirect block array */ daddr = ip->i_ib[xap->in_off]; devvp = VFSTOUFS(vp->v_mount)->um_devvp; for (bp = NULL, ++xap; --num; ++xap) { /* * Exit the loop if there is no disk address assigned yet and * the indirect block isn't in the cache, or if we were * looking for an indirect block and we've found it. */ metalbn = xap->in_lbn; if ((daddr == 0 && !incore(vp, metalbn)) || metalbn == bn) break; /* * If we get here, we've either got the block in the cache * or we have a disk address for it, go fetch it. */ if (bp) bqrelse(bp); xap->in_exists = 1; bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { #ifdef DIAGNOSTIC if (!daddr) panic("ufs_bmaparray: indirect block not in cache"); #endif bp->b_blkno = blkptrtodb(ump, daddr); bp->b_flags |= B_READ; vfs_busy_pages(bp, 0); VOP_STRATEGY(bp->b_vp, bp); curproc->p_stats->p_ru.ru_inblock++; /* XXX */ error = biowait(bp); if (error) { brelse(bp); return (error); } } daddr = ((ufs_daddr_t *)bp->b_data)[xap->in_off]; if (num == 1 && daddr && runp) { for (bn = xap->in_off + 1; bn < MNINDIR(ump) && *runp < maxrun && is_sequential(ump, ((ufs_daddr_t *)bp->b_data)[bn - 1], ((ufs_daddr_t *)bp->b_data)[bn]); ++bn, ++*runp); bn = xap->in_off; if (runb && bn) { for(--bn; bn > 0 && *runb < maxrun && is_sequential(ump, ((daddr_t *)bp->b_data)[bn], ((daddr_t *)bp->b_data)[bn+1]); --bn, ++*runb); } } } if (bp) bqrelse(bp); daddr = blkptrtodb(ump, daddr); *bnp = daddr == 0 ? -1 : daddr; return (0); } /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ int ufs_getlbns(vp, bn, ap, nump) struct vnode *vp; ufs_daddr_t bn; struct indir *ap; int *nump; { long blockcnt, metalbn, realbn; struct ufsmount *ump; int i, numlevels, off; int64_t qblockcnt; ump = VFSTOUFS(vp->v_mount); if (nump) *nump = 0; numlevels = 0; realbn = bn; if ((long)bn < 0) bn = -(long)bn; /* The first NDADDR blocks are direct blocks. */ if (bn < NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and NIADDR - i is the number * of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { if (i == 0) return (EFBIG); /* * Use int64_t's here to avoid overflow for triple indirect * blocks when longs have 32 bits and the block size is more * than 4K. */ qblockcnt = (int64_t)blockcnt * MNINDIR(ump); if (bn < qblockcnt) break; blockcnt = qblockcnt; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NIADDR - i); else metalbn = -(-realbn - bn + NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NIADDR - i; ap->in_exists = 0; ap++; for (++numlevels; i <= NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; off = (bn / blockcnt) % MNINDIR(ump); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; ap->in_exists = 0; ++ap; metalbn -= -1 + off * blockcnt; blockcnt /= MNINDIR(ump); } if (nump) *nump = numlevels; return (0); }