diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c index 86838df837f2..5687522c4c65 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c @@ -1,1611 +1,1611 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Portions Copyright 2007 Jeremy Teo */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include "zfs_prop.h" /* Used by fstat(1). */ SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, 0, sizeof(znode_t), "sizeof(znode_t)"); /* * Define ZNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */ #ifdef DEBUG #define ZNODE_STATS #endif /* DEBUG */ #ifdef ZNODE_STATS #define ZNODE_STAT_ADD(stat) ((stat)++) #else #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ #define POINTER_IS_VALID(p) (!((uintptr_t)(p) & 0x3)) #define POINTER_INVALIDATE(pp) (*(pp) = (void *)((uintptr_t)(*(pp)) | 0x1)) /* * Functions needed for userland (ie: libzpool) are not put under * #ifdef_KERNEL; the rest of the functions have dependencies * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL static kmem_cache_t *znode_cache = NULL; /*ARGSUSED*/ static void znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) { #if 1 /* XXXPJD: From OpenSolaris. */ /* * We should never drop all dbuf refs without first clearing * the eviction callback. */ panic("evicting znode %p\n", user_ptr); #else /* XXXPJD */ znode_t *zp = user_ptr; vnode_t *vp; mutex_enter(&zp->z_lock); zp->z_dbuf = NULL; vp = ZTOV(zp); if (vp == NULL) { mutex_exit(&zp->z_lock); zfs_znode_free(zp); } else if (vp->v_count == 0) { ZTOV(zp) = NULL; vhold(vp); mutex_exit(&zp->z_lock); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curthread); vrecycle(vp, curthread); VOP_UNLOCK(vp, 0); vdrop(vp); zfs_znode_free(zp); } else { mutex_exit(&zp->z_lock); } #endif } extern struct vop_vector zfs_vnodeops; extern struct vop_vector zfs_fifoops; /* * XXX: We cannot use this function as a cache constructor, because * there is one global cache for all file systems and we need * to pass vfsp here, which is not possible, because argument * 'cdrarg' is defined at kmem_cache_create() time. */ static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { znode_t *zp = buf; vnode_t *vp; vfs_t *vfsp = arg; int error; POINTER_INVALIDATE(&zp->z_zfsvfs); ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); ASSERT(vfsp != NULL); error = getnewvnode("zfs", vfsp, &zfs_vnodeops, &vp); if (error != 0 && (kmflags & KM_NOSLEEP)) return (-1); ASSERT(error == 0); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); zp->z_vnode = vp; vp->v_data = (caddr_t)zp; VN_LOCK_AREC(vp); VN_LOCK_ASHARE(vp); list_link_init(&zp->z_link_node); mutex_init(&zp->z_lock, NULL, MUTEX_DEFAULT, NULL); rw_init(&zp->z_map_lock, NULL, RW_DEFAULT, NULL); rw_init(&zp->z_parent_lock, NULL, RW_DEFAULT, NULL); rw_init(&zp->z_name_lock, NULL, RW_DEFAULT, NULL); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zp->z_range_avl, zfs_range_compare, sizeof (rl_t), offsetof(rl_t, r_node)); zp->z_dbuf = NULL; zp->z_dirlocks = NULL; return (0); } /*ARGSUSED*/ static void zfs_znode_cache_destructor(void *buf, void *arg) { znode_t *zp = buf; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); ASSERT(ZTOV(zp) == NULL); vn_free(ZTOV(zp)); ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_lock); rw_destroy(&zp->z_map_lock); rw_destroy(&zp->z_parent_lock); rw_destroy(&zp->z_name_lock); mutex_destroy(&zp->z_acl_lock); avl_destroy(&zp->z_range_avl); mutex_destroy(&zp->z_range_lock); ASSERT(zp->z_dbuf == NULL); ASSERT(zp->z_dirlocks == NULL); } #ifdef ZNODE_STATS static struct { uint64_t zms_zfsvfs_invalid; uint64_t zms_zfsvfs_unmounted; uint64_t zms_zfsvfs_recheck_invalid; uint64_t zms_obj_held; uint64_t zms_vnode_locked; uint64_t zms_not_only_dnlc; } znode_move_stats; #endif /* ZNODE_STATS */ #if defined(sun) static void zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) { vnode_t *vp; /* Copy fields. */ nzp->z_zfsvfs = ozp->z_zfsvfs; /* Swap vnodes. */ vp = nzp->z_vnode; nzp->z_vnode = ozp->z_vnode; ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ ZTOV(ozp)->v_data = ozp; ZTOV(nzp)->v_data = nzp; nzp->z_id = ozp->z_id; ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); nzp->z_unlinked = ozp->z_unlinked; nzp->z_atime_dirty = ozp->z_atime_dirty; nzp->z_zn_prefetch = ozp->z_zn_prefetch; nzp->z_blksz = ozp->z_blksz; nzp->z_seq = ozp->z_seq; nzp->z_mapcnt = ozp->z_mapcnt; nzp->z_last_itx = ozp->z_last_itx; nzp->z_gen = ozp->z_gen; nzp->z_sync_cnt = ozp->z_sync_cnt; nzp->z_phys = ozp->z_phys; nzp->z_dbuf = ozp->z_dbuf; /* Update back pointers. */ (void) dmu_buf_update_user(nzp->z_dbuf, ozp, nzp, &nzp->z_phys, znode_evict_error); /* * Invalidate the original znode by clearing fields that provide a * pointer back to the znode. Set the low bit of the vfs pointer to * ensure that zfs_znode_move() recognizes the znode as invalid in any * subsequent callback. */ ozp->z_dbuf = NULL; POINTER_INVALIDATE(&ozp->z_zfsvfs); } /* * Wrapper function for ZFS_ENTER that returns 0 if successful and otherwise * returns a non-zero error code. */ static int zfs_enter(zfsvfs_t *zfsvfs) { ZFS_ENTER(zfsvfs); return (0); } /*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Ensure that the filesystem is not unmounted during the move. */ if (zfs_enter(zfsvfs) != 0) { /* ZFS_ENTER */ ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); } #endif /* sun */ void zfs_znode_init(void) { /* * Initialize zcache */ ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, /* zfs_znode_cache_constructor */ NULL, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); #if defined(sun) kmem_cache_set_move(znode_cache, zfs_znode_move); #endif } void zfs_znode_fini(void) { /* * Cleanup zcache */ if (znode_cache) kmem_cache_destroy(znode_cache); znode_cache = NULL; } /* * zfs_init_fs - Initialize the zfsvfs struct and the file system * incore "master" object. Verify version compatibility. */ int zfs_init_fs(zfsvfs_t *zfsvfs, znode_t **zpp) { objset_t *os = zfsvfs->z_os; int i, error; uint64_t fsid_guid; uint64_t zval; *zpp = NULL; error = zfs_get_zplprop(os, ZFS_PROP_VERSION, &zfsvfs->z_version); if (error) { return (error); } else if (zfsvfs->z_version > ZPL_VERSION) { (void) printf("Mismatched versions: File system " "is version %llu on-disk format, which is " "incompatible with this software version %lld!", (u_longlong_t)zfsvfs->z_version, ZPL_VERSION); return (ENOTSUP); } if ((error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &zval)) != 0) return (error); zfsvfs->z_norm = (int)zval; if ((error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &zval)) != 0) return (error); zfsvfs->z_utf8 = (zval != 0); if ((error = zfs_get_zplprop(os, ZFS_PROP_CASE, &zval)) != 0) return (error); zfsvfs->z_case = (uint_t)zval; /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE || zfsvfs->z_case == ZFS_CASE_MIXED) zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; /* * The fsid is 64 bits, composed of an 8-bit fs type, which * separates our fsid from any other filesystem types, and a * 56-bit objset unique ID. The objset unique ID is unique to * all objsets open on this system, provided by unique_create(). * The 8-bit fs type must be put in the low bits of fsid[1] * because that's where other Solaris filesystems put it. */ fsid_guid = dmu_objset_fsid_guid(os); ASSERT((fsid_guid & ~((1ULL<<56)-1)) == 0); zfsvfs->z_vfs->vfs_fsid.val[0] = fsid_guid; zfsvfs->z_vfs->vfs_fsid.val[1] = ((fsid_guid>>32) << 8) | zfsvfs->z_vfs->mnt_vfc->vfc_typenum & 0xFF; error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_ROOT_OBJ, 8, 1, &zfsvfs->z_root); if (error) return (error); ASSERT(zfsvfs->z_root != 0); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_UNLINKED_SET, 8, 1, &zfsvfs->z_unlinkedobj); if (error) return (error); /* * Initialize zget mutex's */ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); error = zfs_zget(zfsvfs, zfsvfs->z_root, zpp); if (error) { /* * On error, we destroy the mutexes here since it's not * possible for the caller to determine if the mutexes were * initialized properly. */ for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); return (error); } ASSERT3U((*zpp)->z_id, ==, zfsvfs->z_root); error = zap_lookup(os, MASTER_NODE_OBJ, ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj); if (error == ENOENT) error = 0; return (0); } /* * define a couple of values we need available * for both 64 and 32 bit environments. */ #ifndef NBITSMINOR64 #define NBITSMINOR64 32 #endif #ifndef MAXMAJ64 #define MAXMAJ64 0xffffffffUL #endif #ifndef MAXMIN64 #define MAXMIN64 0xffffffffUL #endif /* * Create special expldev for ZFS private use. * Can't use standard expldev since it doesn't do * what we want. The standard expldev() takes a * dev32_t in LP64 and expands it to a long dev_t. * We need an interface that takes a dev32_t in ILP32 * and expands it to a long dev_t. */ static uint64_t zfs_expldev(dev_t dev) { - return (((uint64_t)umajor(dev) << NBITSMINOR64) | uminor(dev)); + return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); } /* * Special cmpldev for ZFS private use. * Can't use standard cmpldev since it takes * a long dev_t and compresses it to dev32_t in * LP64. We need to do a compaction of a long dev_t * to a dev32_t in ILP32. */ dev_t zfs_cmpldev(uint64_t dev) { return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); } static void zfs_znode_dmu_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db) { znode_t *nzp; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); mutex_enter(&zp->z_lock); ASSERT(zp->z_dbuf == NULL); zp->z_dbuf = db; nzp = dmu_buf_set_user_ie(db, zp, &zp->z_phys, znode_evict_error); /* * there should be no * concurrent zgets on this object. */ if (nzp != NULL) panic("existing znode %p for dbuf %p", (void *)nzp, (void *)db); /* * Slap on VROOT if we are the root znode */ if (zp->z_id == zfsvfs->z_root) ZTOV(zp)->v_flag |= VROOT; mutex_exit(&zp->z_lock); vn_exists(ZTOV(zp)); } void zfs_znode_dmu_fini(znode_t *zp) { dmu_buf_t *db = zp->z_dbuf; ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || zp->z_unlinked || RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); ASSERT(zp->z_dbuf != NULL); zp->z_dbuf = NULL; VERIFY(zp == dmu_buf_update_user(db, zp, NULL, NULL, NULL)); dmu_buf_rele(db, NULL); } /* * Construct a new znode/vnode and intialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz) { znode_t *zp; vnode_t *vp; zp = kmem_cache_alloc(znode_cache, KM_SLEEP); zfs_znode_cache_constructor(zp, zfsvfs->z_parent->z_vfs, 0); ASSERT(zp->z_dirlocks == NULL); ASSERT(zp->z_dbuf == NULL); ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); /* * Defer setting z_zfsvfs until the znode is ready to be a candidate for * the zfs_znode_move() callback. */ zp->z_phys = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_last_itx = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; vp = ZTOV(zp); #ifdef TODO vn_reinit(vp); #endif zfs_znode_dmu_init(zfsvfs, zp, db); zp->z_gen = zp->z_phys->zp_gen; #if 0 if (vp == NULL) return (zp); #endif vp->v_type = IFTOVT((mode_t)zp->z_phys->zp_mode); switch (vp->v_type) { case VDIR: zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ break; case VFIFO: vp->v_op = &zfs_fifoops; break; } mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); membar_producer(); /* * Everything else must be valid before assigning z_zfsvfs makes the * znode eligible for zfs_znode_move(). */ zp->z_zfsvfs = zfsvfs; mutex_exit(&zfsvfs->z_znodes_lock); VFS_HOLD(zfsvfs->z_vfs); return (zp); } /* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * IS_REPLAY - intent log replay * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, int bonuslen, zfs_acl_t *setaclp, zfs_fuid_info_t **fuidp) { dmu_buf_t *db; znode_phys_t *pzp; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; timestruc_t now; uint64_t gen, obj; int err; ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); if (zfsvfs->z_assign >= TXG_INITIAL) { /* ZIL replay */ obj = vap->va_nodeid; flag |= IS_REPLAY; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; gethrestime(&now); gen = dmu_tx_get_txg(tx); } /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be to needed allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (vap->va_type == VDIR) { if (flag & IS_REPLAY) { err = zap_create_claim_norm(zfsvfs->z_os, obj, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); ASSERT3U(err, ==, 0); } else { obj = zap_create_norm(zfsvfs->z_os, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); } } else { if (flag & IS_REPLAY) { err = dmu_object_claim(zfsvfs->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); ASSERT3U(err, ==, 0); } else { obj = dmu_object_alloc(zfsvfs->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, DMU_OT_ZNODE, sizeof (znode_phys_t) + bonuslen, tx); } } VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, obj, NULL, &db)); dmu_buf_will_dirty(db, tx); /* * Initialize the znode physical data to zero. */ ASSERT(db->db_size >= sizeof (znode_phys_t)); bzero(db->db_data, db->db_size); pzp = db->db_data; /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_dbuf = db; dzp->z_phys = pzp; dzp->z_id = obj; } /* * If parent is an xattr, so am I. */ if (dzp->z_phys->zp_flags & ZFS_XATTR) flag |= IS_XATTR; if (vap->va_type == VBLK || vap->va_type == VCHR) { pzp->zp_rdev = zfs_expldev(vap->va_rdev); } if (zfsvfs->z_use_fuids) pzp->zp_flags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; if (vap->va_type == VDIR) { pzp->zp_size = 2; /* contents ("." and "..") */ pzp->zp_links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } pzp->zp_parent = dzp->z_id; if (flag & IS_XATTR) pzp->zp_flags |= ZFS_XATTR; pzp->zp_gen = gen; ZFS_TIME_ENCODE(&now, pzp->zp_crtime); ZFS_TIME_ENCODE(&now, pzp->zp_ctime); if (vap->va_mask & AT_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, pzp->zp_atime); } else { ZFS_TIME_ENCODE(&now, pzp->zp_atime); } if (vap->va_mask & AT_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, pzp->zp_mtime); } else { ZFS_TIME_ENCODE(&now, pzp->zp_mtime); } pzp->zp_mode = MAKEIMODE(vap->va_type, vap->va_mode); if (!(flag & IS_ROOT_NODE)) { ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); *zpp = zfs_znode_alloc(zfsvfs, db, 0); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; } zfs_perm_init(*zpp, dzp, flag, vap, tx, cr, setaclp, fuidp); if (!(flag & IS_ROOT_NODE)) { vnode_t *vp; vp = ZTOV(*zpp); vp->v_vflag |= VV_FORCEINSMQ; err = insmntque(vp, zfsvfs->z_vfs); vp->v_vflag &= ~VV_FORCEINSMQ; KASSERT(err == 0, ("insmntque() failed: error %d", err)); } } void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap) { xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); ASSERT(xoap); if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { ZFS_TIME_ENCODE(&xoap->xoa_createtime, zp->z_phys->zp_crtime); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable); XVA_SET_RTN(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly); XVA_SET_RTN(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, xoap->xoa_av_quarantined); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { (void) memcpy(zp->z_phys + 1, xoap->xoa_av_scanstamp, sizeof (xoap->xoa_av_scanstamp)); zp->z_phys->zp_flags |= ZFS_BONUS_SCANSTAMP; XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); } } int zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; vnode_t *vp; int err, first = 1; *zpp = NULL; again: ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_ZNODE || doi.doi_bonus_size < sizeof (znode_phys_t)) { dmu_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EINVAL); } zp = dmu_buf_get_user(db); if (zp != NULL) { mutex_enter(&zp->z_lock); /* * Since we do immediate eviction of the z_dbuf, we * should never find a dbuf with a znode that doesn't * know about the dbuf. */ ASSERT3P(zp->z_dbuf, ==, db); ASSERT3U(zp->z_id, ==, obj_num); if (zp->z_unlinked) { err = ENOENT; } else { if (ZTOV(zp) != NULL) VN_HOLD(ZTOV(zp)); else { if (first) { ZFS_LOG(1, "dying znode detected (zp=%p)", zp); first = 0; } /* * znode is dying so we can't reuse it, we must * wait until destruction is completed. */ dmu_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); tsleep(zp, 0, "zcollide", 1); goto again; } *zpp = zp; err = 0; } dmu_buf_rele(db, NULL); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (err); } /* * Not found create new znode/vnode */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size); vp = ZTOV(zp); vp->v_vflag |= VV_FORCEINSMQ; err = insmntque(vp, zfsvfs->z_vfs); vp->v_vflag &= ~VV_FORCEINSMQ; KASSERT(err == 0, ("insmntque() failed: error %d", err)); VOP_UNLOCK(vp, 0); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); *zpp = zp; return (0); } int zfs_rezget(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_object_info_t doi; dmu_buf_t *db; uint64_t obj_num = zp->z_id; int err; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = dmu_bonus_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_ZNODE || doi.doi_bonus_size < sizeof (znode_phys_t)) { dmu_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EINVAL); } if (((znode_phys_t *)db->db_data)->zp_gen != zp->z_gen) { dmu_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EIO); } zfs_znode_dmu_init(zfsvfs, zp, db); zp->z_unlinked = (zp->z_phys->zp_links == 0); zp->z_blksz = doi.doi_data_block_size; ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (0); } void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zp->z_phys->zp_acl.z_acl_extern_obj; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); if (acl_obj) VERIFY(0 == dmu_object_free(os, acl_obj, tx)); VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); zfs_znode_free(zp); } void zfs_zinactive(znode_t *zp) { vnode_t *vp = ZTOV(zp); zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; ASSERT(zp->z_dbuf && zp->z_phys); /* * Don't allow a zfs_zget() while were trying to release this znode */ ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); mutex_enter(&zp->z_lock); VI_LOCK(vp); if (vp->v_count > 0) { /* * If the hold count is greater than zero, somebody has * obtained a new reference on this znode while we were * processing it here, so we are done. */ VI_UNLOCK(vp); mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); return; } VI_UNLOCK(vp); /* * If this was the last reference to a file with no links, * remove the file from the file system. */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); ASSERT(vp->v_count == 0); vrecycle(vp, curthread); zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); } void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; ASSERT(ZTOV(zp) == NULL); mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); list_remove(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); kmem_cache_free(znode_cache, zp); VFS_RELE(zfsvfs->z_vfs); } void zfs_time_stamper_locked(znode_t *zp, uint_t flag, dmu_tx_t *tx) { timestruc_t now; ASSERT(MUTEX_HELD(&zp->z_lock)); gethrestime(&now); if (tx) { dmu_buf_will_dirty(zp->z_dbuf, tx); zp->z_atime_dirty = 0; zp->z_seq++; } else { zp->z_atime_dirty = 1; } if (flag & AT_ATIME) ZFS_TIME_ENCODE(&now, zp->z_phys->zp_atime); if (flag & AT_MTIME) { ZFS_TIME_ENCODE(&now, zp->z_phys->zp_mtime); if (zp->z_zfsvfs->z_use_fuids) zp->z_phys->zp_flags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); } if (flag & AT_CTIME) { ZFS_TIME_ENCODE(&now, zp->z_phys->zp_ctime); if (zp->z_zfsvfs->z_use_fuids) zp->z_phys->zp_flags |= ZFS_ARCHIVE; } } /* * Update the requested znode timestamps with the current time. * If we are in a transaction, then go ahead and mark the znode * dirty in the transaction so the timestamps will go to disk. * Otherwise, we will get pushed next time the znode is updated * in a transaction, or when this znode eventually goes inactive. * * Why is this OK? * 1 - Only the ACCESS time is ever updated outside of a transaction. * 2 - Multiple consecutive updates will be collapsed into a single * znode update by the transaction grouping semantics of the DMU. */ void zfs_time_stamper(znode_t *zp, uint_t flag, dmu_tx_t *tx) { mutex_enter(&zp->z_lock); zfs_time_stamper_locked(zp, flag, tx); mutex_exit(&zp->z_lock); } /* * Grow the block size for a file. * * IN: zp - znode of file to free data in. * size - requested block size * tx - open transaction. * * NOTE: this function assumes that the znode is write locked. */ void zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) { int error; u_longlong_t dummy; if (size <= zp->z_blksz) return; /* * If the file size is already greater than the current blocksize, * we will not grow. If there is more than one block in a file, * the blocksize cannot change. */ if (zp->z_blksz && zp->z_phys->zp_size > zp->z_blksz) return; error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, size, 0, tx); if (error == ENOTSUP) return; ASSERT3U(error, ==, 0); /* What blocksize did we actually get? */ dmu_object_size_from_db(zp->z_dbuf, &zp->z_blksz, &dummy); } /* * Increase the file length * * IN: zp - znode of file to free data in. * end - new end-of-file * * RETURN: 0 if success * error code if failure */ static int zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_tx_t *tx; rl_t *rl; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_phys->zp_size) { zfs_range_unlock(rl); return (0); } top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { /* * We are growing the file past the current block size. */ if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { ASSERT(!ISP2(zp->z_blksz)); newblksz = MIN(end, SPA_MAXBLOCKSIZE); } else { newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); } dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); } else { newblksz = 0; } error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } dmu_buf_will_dirty(zp->z_dbuf, tx); if (newblksz) zfs_grow_blocksize(zp, newblksz, tx); zp->z_phys->zp_size = end; zfs_range_unlock(rl); dmu_tx_commit(tx); rw_enter(&zp->z_map_lock, RW_WRITER); error = vinvalbuf(ZTOV(zp), V_SAVE, 0, 0); ASSERT(error == 0); vnode_pager_setsize(ZTOV(zp), end); rw_exit(&zp->z_map_lock); return (0); } /* * Free space in a file. * * IN: zp - znode of file to free data in. * off - start of section to free. * len - length of section to free. * * RETURN: 0 if success * error code if failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; rl_t *rl; int error; /* * Lock the range being freed. */ rl = zfs_range_lock(zp, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_phys->zp_size) { zfs_range_unlock(rl); return (0); } if (off + len > zp->z_phys->zp_size) len = zp->z_phys->zp_size - off; error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); if (error == 0) { /* * In FreeBSD we cannot free block in the middle of a file, * but only at the end of a file. */ rw_enter(&zp->z_map_lock, RW_WRITER); error = vinvalbuf(ZTOV(zp), V_SAVE, 0, 0); ASSERT(error == 0); vnode_pager_setsize(ZTOV(zp), off); rw_exit(&zp->z_map_lock); } zfs_range_unlock(rl); return (error); } /* * Truncate a file * * IN: zp - znode of file to free data in. * end - new end-of-file. * * RETURN: 0 if success * error code if failure */ static int zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; rl_t *rl; int error; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_phys->zp_size) { zfs_range_unlock(rl); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); if (error) { zfs_range_unlock(rl); return (error); } top: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } dmu_buf_will_dirty(zp->z_dbuf, tx); zp->z_phys->zp_size = end; dmu_tx_commit(tx); zfs_range_unlock(rl); /* * Clear any mapped pages in the truncated region. This has to * happen outside of the transaction to avoid the possibility of * a deadlock with someone trying to push a page that we are * about to invalidate. */ rw_enter(&zp->z_map_lock, RW_WRITER); #if 0 error = vtruncbuf(vp, curthread->td_ucred, curthread, end, PAGE_SIZE); #else error = vinvalbuf(vp, V_SAVE, 0, 0); ASSERT(error == 0); vnode_pager_setsize(vp, end); #endif rw_exit(&zp->z_map_lock); return (0); } /* * Free space in a file * * IN: zp - znode of file to free data in. * off - start of range * len - end of range (0 => EOF) * flag - current file open mode flags. * log - TRUE if this action should be logged * * RETURN: 0 if success * error code if failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog = zfsvfs->z_log; int error; if (off > zp->z_phys->zp_size) { error = zfs_extend(zp, off+len); if (error == 0 && log) goto log; else return (error); } if (len == 0) { error = zfs_trunc(zp, off); } else { if ((error = zfs_free_range(zp, off, len)) == 0 && off + len > zp->z_phys->zp_size) error = zfs_extend(zp, off+len); } if (error || !log) return (error); log: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_bonus(tx, zp->z_id); error = dmu_tx_assign(tx, zfsvfs->z_assign); if (error) { if (error == ERESTART && zfsvfs->z_assign == TXG_NOWAIT) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto log; } dmu_tx_abort(tx); return (error); } zfs_time_stamper(zp, CONTENT_MODIFIED, tx); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); return (0); } void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { zfsvfs_t zfsvfs; uint64_t moid, doid, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; znode_t *rootzp = NULL; vnode_t *vp; vattr_t vattr; znode_t *zp; /* * First attempt to create master node. */ /* * In an empty objset, there are no blocks to read and thus * there can be no i/o errors (which we assert below). */ moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); ASSERT(error == 0); /* * Set starting attributes. */ if (spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) version = ZPL_VERSION; else version = ZPL_VERSION_FUID - 1; error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); elem = NULL; while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { /* For the moment we expect all zpl props to be uint64_ts */ uint64_t val; char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); VERIFY(nvpair_value_uint64(elem, &val) == 0); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { version = val; error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } ASSERT(version != 0); /* * Create a delete queue. */ doid = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &doid, tx); ASSERT(error == 0); /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. */ VATTR_NULL(&vattr); vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VDIR; vattr.va_mode = S_IFDIR|0755; vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); zfs_znode_cache_constructor(rootzp, &zfsvfs, 0); rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; vp = ZTOV(rootzp); vp->v_type = VDIR; bzero(&zfsvfs, sizeof (zfsvfs_t)); zfsvfs.z_os = os; zfsvfs.z_assign = TXG_NOWAIT; zfsvfs.z_parent = &zfsvfs; zfsvfs.z_version = version; zfsvfs.z_use_fuids = USE_FUIDS(version, os); zfsvfs.z_norm = norm; /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zfsvfs.z_norm |= U8_TEXTPREP_TOUPPER; mutex_init(&zfsvfs.z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs.z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); rootzp->z_zfsvfs = &zfsvfs; zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, 0, NULL, NULL); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); ASSERT(error == 0); POINTER_INVALIDATE(&rootzp->z_zfsvfs); VI_LOCK(vp); ZTOV(rootzp)->v_data = NULL; ZTOV(rootzp)->v_count = 0; ZTOV(rootzp)->v_holdcnt = 0; ZTOV(rootzp) = NULL; VOP_UNLOCK(vp, 0); vdestroy(vp); dmu_buf_rele(rootzp->z_dbuf, NULL); rootzp->z_dbuf = NULL; mutex_destroy(&zfsvfs.z_znodes_lock); kmem_cache_free(znode_cache, rootzp); } #endif /* _KERNEL */ /* * Given an object number, return its parent object number and whether * or not the object is an extended attribute directory. */ static int zfs_obj_to_pobj(objset_t *osp, uint64_t obj, uint64_t *pobjp, int *is_xattrdir) { dmu_buf_t *db; dmu_object_info_t doi; znode_phys_t *zp; int error; if ((error = dmu_bonus_hold(osp, obj, FTAG, &db)) != 0) return (error); dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_ZNODE || doi.doi_bonus_size < sizeof (znode_phys_t)) { dmu_buf_rele(db, FTAG); return (EINVAL); } zp = db->db_data; *pobjp = zp->zp_parent; *is_xattrdir = ((zp->zp_flags & ZFS_XATTR) != 0) && S_ISDIR(zp->zp_mode); dmu_buf_rele(db, FTAG); return (0); } int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) { char *path = buf + len - 1; int error; *path = '\0'; for (;;) { uint64_t pobj; char component[MAXNAMELEN + 2]; size_t complen; int is_xattrdir; if ((error = zfs_obj_to_pobj(osp, obj, &pobj, &is_xattrdir)) != 0) break; if (pobj == obj) { if (path[0] != '/') *--path = '/'; break; } component[0] = '/'; if (is_xattrdir) { (void) sprintf(component + 1, ""); } else { error = zap_value_search(osp, pobj, obj, ZFS_DIRENT_OBJ(-1ULL), component + 1); if (error != 0) break; } complen = strlen(component); path -= complen; ASSERT(path >= buf); bcopy(component, path, complen); obj = pobj; } if (error == 0) (void) memmove(buf, path, buf + len - path); return (error); } diff --git a/sys/compat/linux/linux_stats.c b/sys/compat/linux/linux_stats.c index c5f10af82c5f..5f1ce5329517 100644 --- a/sys/compat/linux/linux_stats.c +++ b/sys/compat/linux/linux_stats.c @@ -1,609 +1,609 @@ /*- * Copyright (c) 1994-1995 Søren Schmidt * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_mac.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_LINUX32 #include #include #else #include #include #endif #include #include #include /* * XXX: This was removed from newstat_copyout(), and almost identical * XXX: code was in stat64_copyout(). findcdev() needs to be replaced * XXX: with something that does lookup and locking properly. * XXX: When somebody fixes this: please try to avoid duplicating it. */ #if 0 static void disk_foo(struct somestat *tbuf) { struct cdevsw *cdevsw; struct cdev *dev; /* Lie about disk drives which are character devices * in FreeBSD but block devices under Linux. */ if (S_ISCHR(tbuf.st_mode) && (dev = findcdev(buf->st_rdev)) != NULL) { cdevsw = dev_refthread(dev); if (cdevsw != NULL) { if (cdevsw->d_flags & D_DISK) { tbuf.st_mode &= ~S_IFMT; tbuf.st_mode |= S_IFBLK; /* XXX this may not be quite right */ /* Map major number to 0 */ - tbuf.st_dev = uminor(buf->st_dev) & 0xf; + tbuf.st_dev = minor(buf->st_dev) & 0xf; tbuf.st_rdev = buf->st_rdev & 0xff; } dev_relthread(dev); } } } #endif static void translate_fd_major_minor(struct thread *td, int fd, struct stat *buf) { struct file *fp; int major, minor; if ((!S_ISCHR(buf->st_mode) && !S_ISBLK(buf->st_mode)) || fget(td, fd, &fp) != 0) return; if (fp->f_vnode != NULL && fp->f_vnode->v_un.vu_cdev != NULL && linux_driver_get_major_minor(fp->f_vnode->v_un.vu_cdev->si_name, &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } else if (fp->f_type == DTYPE_PTS) { struct tty *tp = fp->f_data; /* Convert the numbers for the slave device. */ if (linux_driver_get_major_minor(tp->t_dev->si_name, &major, &minor) == 0) { buf->st_rdev = (major << 8 | minor); } } fdrop(fp, td); } static void translate_path_major_minor_at(struct thread *td, char *path, struct stat *buf, int dfd) { struct proc *p = td->td_proc; struct filedesc *fdp = p->p_fd; int fd; int temp; if (!S_ISCHR(buf->st_mode) && !S_ISBLK(buf->st_mode)) return; temp = td->td_retval[0]; if (kern_openat(td, dfd, path, UIO_SYSSPACE, O_RDONLY, 0) != 0) return; fd = td->td_retval[0]; td->td_retval[0] = temp; translate_fd_major_minor(td, fd, buf); fdclose(fdp, fdp->fd_ofiles[fd], fd, td); } static inline void translate_path_major_minor(struct thread *td, char *path, struct stat *buf) { translate_path_major_minor_at(td, path, buf, AT_FDCWD); } static int newstat_copyout(struct stat *buf, void *ubuf) { struct l_newstat tbuf; bzero(&tbuf, sizeof(tbuf)); - tbuf.st_dev = uminor(buf->st_dev) | (umajor(buf->st_dev) << 8); + tbuf.st_dev = minor(buf->st_dev) | (major(buf->st_dev) << 8); tbuf.st_ino = buf->st_ino; tbuf.st_mode = buf->st_mode; tbuf.st_nlink = buf->st_nlink; tbuf.st_uid = buf->st_uid; tbuf.st_gid = buf->st_gid; tbuf.st_rdev = buf->st_rdev; tbuf.st_size = buf->st_size; tbuf.st_atime = buf->st_atime; tbuf.st_mtime = buf->st_mtime; tbuf.st_ctime = buf->st_ctime; tbuf.st_blksize = buf->st_blksize; tbuf.st_blocks = buf->st_blocks; return (copyout(&tbuf, ubuf, sizeof(tbuf))); } int linux_newstat(struct thread *td, struct linux_newstat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(newstat)) printf(ARGS(newstat, "%s, *"), path); #endif error = kern_stat(td, path, UIO_SYSSPACE, &buf); if (!error) translate_path_major_minor(td, path, &buf); LFREEPATH(path); if (error) return (error); return (newstat_copyout(&buf, args->buf)); } int linux_newlstat(struct thread *td, struct linux_newlstat_args *args) { struct stat sb; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(newlstat)) printf(ARGS(newlstat, "%s, *"), path); #endif error = kern_lstat(td, path, UIO_SYSSPACE, &sb); if (!error) translate_path_major_minor(td, path, &sb); LFREEPATH(path); if (error) return (error); return (newstat_copyout(&sb, args->buf)); } int linux_newfstat(struct thread *td, struct linux_newfstat_args *args) { struct stat buf; int error; #ifdef DEBUG if (ldebug(newfstat)) printf(ARGS(newfstat, "%d, *"), args->fd); #endif error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = newstat_copyout(&buf, args->buf); return (error); } static int stat_copyout(struct stat *buf, void *ubuf) { struct l_stat lbuf; bzero(&lbuf, sizeof(lbuf)); lbuf.st_dev = buf->st_dev; lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; if (buf->st_size < (quad_t)1 << 32) lbuf.st_size = buf->st_size; else lbuf.st_size = -2; lbuf.st_atime = buf->st_atime; lbuf.st_mtime = buf->st_mtime; lbuf.st_ctime = buf->st_ctime; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; lbuf.st_flags = buf->st_flags; lbuf.st_gen = buf->st_gen; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat(struct thread *td, struct linux_stat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(stat)) printf(ARGS(stat, "%s, *"), path); #endif error = kern_stat(td, path, UIO_SYSSPACE, &buf); if (error) { LFREEPATH(path); return (error); } translate_path_major_minor(td, path, &buf); LFREEPATH(path); return(stat_copyout(&buf, args->up)); } int linux_lstat(struct thread *td, struct linux_lstat_args *args) { struct stat buf; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(lstat)) printf(ARGS(lstat, "%s, *"), path); #endif error = kern_lstat(td, path, UIO_SYSSPACE, &buf); if (error) { LFREEPATH(path); return (error); } translate_path_major_minor(td, path, &buf); LFREEPATH(path); return(stat_copyout(&buf, args->up)); } /* XXX - All fields of type l_int are defined as l_long on i386 */ struct l_statfs { l_int f_type; l_int f_bsize; l_int f_blocks; l_int f_bfree; l_int f_bavail; l_int f_files; l_int f_ffree; l_fsid_t f_fsid; l_int f_namelen; l_int f_spare[6]; }; #define LINUX_CODA_SUPER_MAGIC 0x73757245L #define LINUX_EXT2_SUPER_MAGIC 0xEF53L #define LINUX_HPFS_SUPER_MAGIC 0xf995e849L #define LINUX_ISOFS_SUPER_MAGIC 0x9660L #define LINUX_MSDOS_SUPER_MAGIC 0x4d44L #define LINUX_NCP_SUPER_MAGIC 0x564cL #define LINUX_NFS_SUPER_MAGIC 0x6969L #define LINUX_NTFS_SUPER_MAGIC 0x5346544EL #define LINUX_PROC_SUPER_MAGIC 0x9fa0L #define LINUX_UFS_SUPER_MAGIC 0x00011954L /* XXX - UFS_MAGIC in Linux */ #define LINUX_DEVFS_SUPER_MAGIC 0x1373L static long bsd_to_linux_ftype(const char *fstypename) { int i; static struct {const char *bsd_name; long linux_type;} b2l_tbl[] = { {"ufs", LINUX_UFS_SUPER_MAGIC}, {"cd9660", LINUX_ISOFS_SUPER_MAGIC}, {"nfs", LINUX_NFS_SUPER_MAGIC}, {"ext2fs", LINUX_EXT2_SUPER_MAGIC}, {"procfs", LINUX_PROC_SUPER_MAGIC}, {"msdosfs", LINUX_MSDOS_SUPER_MAGIC}, {"ntfs", LINUX_NTFS_SUPER_MAGIC}, {"nwfs", LINUX_NCP_SUPER_MAGIC}, {"hpfs", LINUX_HPFS_SUPER_MAGIC}, {"coda", LINUX_CODA_SUPER_MAGIC}, {"devfs", LINUX_DEVFS_SUPER_MAGIC}, {NULL, 0L}}; for (i = 0; b2l_tbl[i].bsd_name != NULL; i++) if (strcmp(b2l_tbl[i].bsd_name, fstypename) == 0) return (b2l_tbl[i].linux_type); return (0L); } static void bsd_to_linux_statfs(struct statfs *bsd_statfs, struct l_statfs *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; } int linux_statfs(struct thread *td, struct linux_statfs_args *args) { struct l_statfs linux_statfs; struct statfs bsd_statfs; char *path; int error; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(statfs)) printf(ARGS(statfs, "%s, *"), path); #endif error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs); LFREEPATH(path); if (error) return (error); bsd_to_linux_statfs(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } static void bsd_to_linux_statfs64(struct statfs *bsd_statfs, struct l_statfs64 *linux_statfs) { linux_statfs->f_type = bsd_to_linux_ftype(bsd_statfs->f_fstypename); linux_statfs->f_bsize = bsd_statfs->f_bsize; linux_statfs->f_blocks = bsd_statfs->f_blocks; linux_statfs->f_bfree = bsd_statfs->f_bfree; linux_statfs->f_bavail = bsd_statfs->f_bavail; linux_statfs->f_ffree = bsd_statfs->f_ffree; linux_statfs->f_files = bsd_statfs->f_files; linux_statfs->f_fsid.val[0] = bsd_statfs->f_fsid.val[0]; linux_statfs->f_fsid.val[1] = bsd_statfs->f_fsid.val[1]; linux_statfs->f_namelen = MAXNAMLEN; } int linux_statfs64(struct thread *td, struct linux_statfs64_args *args) { struct l_statfs64 linux_statfs; struct statfs bsd_statfs; char *path; int error; if (args->bufsize != sizeof(struct l_statfs64)) return EINVAL; LCONVPATHEXIST(td, args->path, &path); #ifdef DEBUG if (ldebug(statfs64)) printf(ARGS(statfs64, "%s, *"), path); #endif error = kern_statfs(td, path, UIO_SYSSPACE, &bsd_statfs); LFREEPATH(path); if (error) return (error); bsd_to_linux_statfs64(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } int linux_fstatfs(struct thread *td, struct linux_fstatfs_args *args) { struct l_statfs linux_statfs; struct statfs bsd_statfs; int error; #ifdef DEBUG if (ldebug(fstatfs)) printf(ARGS(fstatfs, "%d, *"), args->fd); #endif error = kern_fstatfs(td, args->fd, &bsd_statfs); if (error) return error; bsd_to_linux_statfs(&bsd_statfs, &linux_statfs); return copyout(&linux_statfs, args->buf, sizeof(linux_statfs)); } struct l_ustat { l_daddr_t f_tfree; l_ino_t f_tinode; char f_fname[6]; char f_fpack[6]; }; int linux_ustat(struct thread *td, struct linux_ustat_args *args) { #ifdef DEBUG if (ldebug(ustat)) printf(ARGS(ustat, "%d, *"), args->dev); #endif return (EOPNOTSUPP); } #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) static int stat64_copyout(struct stat *buf, void *ubuf) { struct l_stat64 lbuf; bzero(&lbuf, sizeof(lbuf)); - lbuf.st_dev = uminor(buf->st_dev) | (umajor(buf->st_dev) << 8); + lbuf.st_dev = minor(buf->st_dev) | (major(buf->st_dev) << 8); lbuf.st_ino = buf->st_ino; lbuf.st_mode = buf->st_mode; lbuf.st_nlink = buf->st_nlink; lbuf.st_uid = buf->st_uid; lbuf.st_gid = buf->st_gid; lbuf.st_rdev = buf->st_rdev; lbuf.st_size = buf->st_size; lbuf.st_atime = buf->st_atime; lbuf.st_mtime = buf->st_mtime; lbuf.st_ctime = buf->st_ctime; lbuf.st_blksize = buf->st_blksize; lbuf.st_blocks = buf->st_blocks; /* * The __st_ino field makes all the difference. In the Linux kernel * it is conditionally compiled based on STAT64_HAS_BROKEN_ST_INO, * but without the assignment to __st_ino the runtime linker refuses * to mmap(2) any shared libraries. I guess it's broken alright :-) */ lbuf.__st_ino = buf->st_ino; return (copyout(&lbuf, ubuf, sizeof(lbuf))); } int linux_stat64(struct thread *td, struct linux_stat64_args *args) { struct stat buf; char *filename; int error; LCONVPATHEXIST(td, args->filename, &filename); #ifdef DEBUG if (ldebug(stat64)) printf(ARGS(stat64, "%s, *"), filename); #endif error = kern_stat(td, filename, UIO_SYSSPACE, &buf); if (!error) translate_path_major_minor(td, filename, &buf); LFREEPATH(filename); if (error) return (error); return (stat64_copyout(&buf, args->statbuf)); } int linux_lstat64(struct thread *td, struct linux_lstat64_args *args) { struct stat sb; char *filename; int error; LCONVPATHEXIST(td, args->filename, &filename); #ifdef DEBUG if (ldebug(lstat64)) printf(ARGS(lstat64, "%s, *"), args->filename); #endif error = kern_lstat(td, filename, UIO_SYSSPACE, &sb); if (!error) translate_path_major_minor(td, filename, &sb); LFREEPATH(filename); if (error) return (error); return (stat64_copyout(&sb, args->statbuf)); } int linux_fstat64(struct thread *td, struct linux_fstat64_args *args) { struct stat buf; int error; #ifdef DEBUG if (ldebug(fstat64)) printf(ARGS(fstat64, "%d, *"), args->fd); #endif error = kern_fstat(td, args->fd, &buf); translate_fd_major_minor(td, args->fd, &buf); if (!error) error = stat64_copyout(&buf, args->statbuf); return (error); } int linux_fstatat64(struct thread *td, struct linux_fstatat64_args *args) { char *path; int error, dfd, flag; struct stat buf; if (args->flag & ~LINUX_AT_SYMLINK_NOFOLLOW) return (EINVAL); flag = (args->flag & LINUX_AT_SYMLINK_NOFOLLOW) ? AT_SYMLINK_NOFOLLOW : 0; dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); #ifdef DEBUG if (ldebug(fstatat64)) printf(ARGS(fstatat64, "%i, %s, %i"), args->dfd, path, args->flag); #endif error = kern_statat(td, flag, dfd, path, UIO_SYSSPACE, &buf); translate_path_major_minor_at(td, args->pathname, &buf, dfd); if (!error) error = stat64_copyout(&buf, args->statbuf); LFREEPATH(path); return (error); } #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ diff --git a/sys/compat/svr4/svr4_types.h b/sys/compat/svr4/svr4_types.h index af801d874322..e4f51b5528ab 100644 --- a/sys/compat/svr4/svr4_types.h +++ b/sys/compat/svr4/svr4_types.h @@ -1,81 +1,81 @@ /*- * Copyright (c) 1998 Mark Newton * Copyright (c) 1994 Christos Zoulas * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SVR4_TYPES_H_ #define _SVR4_TYPES_H_ typedef u_quad_t svr4_ino64_t; typedef quad_t svr4_off64_t; typedef quad_t svr4_blkcnt64_t; typedef u_quad_t svr4_fsblkcnt64_t; typedef long svr4_off_t; typedef u_long svr4_dev_t; typedef u_long svr4_ino_t; typedef u_long svr4_mode_t; typedef u_long svr4_nlink_t; typedef long svr4_uid_t; typedef long svr4_gid_t; typedef long svr4_daddr_t; typedef long svr4_pid_t; typedef long svr4_time_t; typedef long svr4_blkcnt_t; typedef u_long svr4_fsblkcnt_t; typedef char *svr4_caddr_t; typedef u_int svr4_size_t; typedef short svr4_o_dev_t; typedef short svr4_o_pid_t; typedef u_short svr4_o_ino_t; typedef u_short svr4_o_mode_t; typedef short svr4_o_nlink_t; typedef u_short svr4_o_uid_t; typedef u_short svr4_o_gid_t; typedef long svr4_clock_t; typedef int svr4_key_t; typedef struct timespec svr4_timestruc_t; #define svr4_omajor(x) ((int32_t)((((x) & 0x7f00) >> 8))) #define svr4_ominor(x) ((int32_t)((((x) & 0x00ff) >> 0))) #define svr4_omakedev(x,y) ((svr4_o_dev_t)((((x) << 8) & 0x7f00) | \ (((y) << 0) & 0x00ff))) #define svr4_to_bsd_odev_t(d) makedev(svr4_omajor(d), svr4_ominor(d)) -#define bsd_to_svr4_odev_t(d) svr4_omakedev(umajor(d), uminor(d)) +#define bsd_to_svr4_odev_t(d) svr4_omakedev(major(d), minor(d)) #define svr4_major(x) ((int32_t)((((x) & 0xfffc0000) >> 18))) #define svr4_minor(x) ((int32_t)((((x) & 0x0003ffff) >> 0))) #define svr4_makedev(x,y) ((svr4_dev_t)((((x) << 18) & 0xfffc0000) | \ (((y) << 0) & 0x0003ffff))) #define svr4_to_bsd_dev_t(d) makedev(svr4_major(d), svr4_minor(d)) -#define bsd_to_svr4_dev_t(d) svr4_makedev(umajor(d), uminor(d)) +#define bsd_to_svr4_dev_t(d) svr4_makedev(major(d), minor(d)) #endif /* !_SVR4_TYPES_H_ */ diff --git a/sys/dev/xen/blkback/blkback.c b/sys/dev/xen/blkback/blkback.c index a418c6d9ca55..259f2f6c0418 100644 --- a/sys/dev/xen/blkback/blkback.c +++ b/sys/dev/xen/blkback/blkback.c @@ -1,1349 +1,1349 @@ /* * Copyright (c) 2006, Cisco Systems, Inc. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if XEN_BLKBACK_DEBUG #define DPRINTF(fmt, args...) \ printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #else #define DPRINTF(fmt, args...) ((void)0) #endif #define WPRINTF(fmt, args...) \ printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args) #define BLKBACK_INVALID_HANDLE (~0) struct ring_ref { vm_offset_t va; grant_handle_t handle; uint64_t bus_addr; }; typedef struct blkback_info { /* Schedule lists */ STAILQ_ENTRY(blkback_info) next_req; int on_req_sched_list; struct xenbus_device *xdev; XenbusState frontend_state; domid_t domid; int state; int ring_connected; struct ring_ref rr; blkif_back_ring_t ring; evtchn_port_t evtchn; int irq; void *irq_cookie; int ref_cnt; int handle; char *mode; char *type; char *dev_name; struct vnode *vn; struct cdev *cdev; struct cdevsw *csw; u_int sector_size; int sector_size_shift; off_t media_size; u_int media_num_sectors; int major; int minor; int read_only; struct mtx blk_ring_lock; device_t ndev; /* Stats */ int st_rd_req; int st_wr_req; int st_oo_req; int st_err_req; } blkif_t; /* * These are rather arbitrary. They are fairly large because adjacent requests * pulled from a communication ring are quite likely to end up being part of * the same scatter/gather request at the disc. * * ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW ** * * This will increase the chances of being able to write whole tracks. * 64 should be enough to keep us competitive with Linux. */ static int blkif_reqs = 64; TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs); static int mmap_pages; /* * Each outstanding request that we've passed to the lower device layers has a * 'pending_req' allocated to it. Each buffer_head that completes decrements * the pendcnt towards zero. When it hits zero, the specified domain has a * response queued for it, with the saved 'id' passed back. */ typedef struct pending_req { blkif_t *blkif; uint64_t id; int nr_pages; int pendcnt; unsigned short operation; int status; STAILQ_ENTRY(pending_req) free_list; } pending_req_t; static pending_req_t *pending_reqs; static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free = STAILQ_HEAD_INITIALIZER(pending_free); static struct mtx pending_free_lock; static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list = STAILQ_HEAD_INITIALIZER(req_sched_list); static struct mtx req_sched_list_lock; static unsigned long mmap_vstart; static unsigned long *pending_vaddrs; static grant_handle_t *pending_grant_handles; static struct task blk_req_task; /* Protos */ static void disconnect_ring(blkif_t *blkif); static int vbd_add_dev(struct xenbus_device *xdev); static inline int vaddr_pagenr(pending_req_t *req, int seg) { return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg; } static inline unsigned long vaddr(pending_req_t *req, int seg) { return pending_vaddrs[vaddr_pagenr(req, seg)]; } #define pending_handle(_req, _seg) \ (pending_grant_handles[vaddr_pagenr(_req, _seg)]) static unsigned long alloc_empty_page_range(unsigned long nr_pages) { void *pages; int i = 0, j = 0; multicall_entry_t mcl[17]; unsigned long mfn_list[16]; struct xen_memory_reservation reservation = { .extent_start = mfn_list, .nr_extents = 0, .address_bits = 0, .extent_order = 0, .domid = DOMID_SELF }; pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT); if (pages == NULL) return 0; memset(mcl, 0, sizeof(mcl)); while (i < nr_pages) { unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE); mcl[j].op = __HYPERVISOR_update_va_mapping; mcl[j].args[0] = va; mfn_list[j++] = vtomach(va) >> PAGE_SHIFT; xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY; if (j == 16 || i == nr_pages) { mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL; reservation.nr_extents = j; mcl[j].op = __HYPERVISOR_memory_op; mcl[j].args[0] = XENMEM_decrease_reservation; mcl[j].args[1] = (unsigned long)&reservation; (void)HYPERVISOR_multicall(mcl, j+1); mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0; j = 0; } } return (unsigned long)pages; } static pending_req_t * alloc_req(void) { pending_req_t *req; mtx_lock(&pending_free_lock); if ((req = STAILQ_FIRST(&pending_free))) { STAILQ_REMOVE(&pending_free, req, pending_req, free_list); STAILQ_NEXT(req, free_list) = NULL; } mtx_unlock(&pending_free_lock); return req; } static void free_req(pending_req_t *req) { int was_empty; mtx_lock(&pending_free_lock); was_empty = STAILQ_EMPTY(&pending_free); STAILQ_INSERT_TAIL(&pending_free, req, free_list); mtx_unlock(&pending_free_lock); if (was_empty) taskqueue_enqueue(taskqueue_swi, &blk_req_task); } static void fast_flush_area(pending_req_t *req) { struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int i, invcount = 0; grant_handle_t handle; int ret; for (i = 0; i < req->nr_pages; i++) { handle = pending_handle(req, i); if (handle == BLKBACK_INVALID_HANDLE) continue; unmap[invcount].host_addr = vaddr(req, i); unmap[invcount].dev_bus_addr = 0; unmap[invcount].handle = handle; pending_handle(req, i) = BLKBACK_INVALID_HANDLE; invcount++; } ret = HYPERVISOR_grant_table_op( GNTTABOP_unmap_grant_ref, unmap, invcount); PANIC_IF(ret); } static void blkif_get(blkif_t *blkif) { atomic_add_int(&blkif->ref_cnt, 1); } static void blkif_put(blkif_t *blkif) { if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) { DPRINTF("Removing %x\n", (unsigned int)blkif); disconnect_ring(blkif); if (blkif->mode) free(blkif->mode, M_DEVBUF); if (blkif->type) free(blkif->type, M_DEVBUF); if (blkif->dev_name) free(blkif->dev_name, M_DEVBUF); free(blkif, M_DEVBUF); } } static int blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params) { blkif_t *blkif; blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO); if (!blkif) return ENOMEM; DPRINTF("Created %x\n", (unsigned int)blkif); blkif->ref_cnt = 1; blkif->domid = xdev->otherend_id; blkif->handle = handle; blkif->mode = mode; blkif->type = type; blkif->dev_name = params; blkif->xdev = xdev; xdev->data = blkif; mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF); if (strcmp(mode, "w")) blkif->read_only = 1; return 0; } static void add_to_req_schedule_list_tail(blkif_t *blkif) { if (!blkif->on_req_sched_list) { mtx_lock(&req_sched_list_lock); if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) { blkif_get(blkif); STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); blkif->on_req_sched_list = 1; taskqueue_enqueue(taskqueue_swi, &blk_req_task); } mtx_unlock(&req_sched_list_lock); } } /* This routine does not call blkif_get(), does not schedule the blk_req_task to run, and assumes that the state is connected */ static void add_to_req_schedule_list_tail2(blkif_t *blkif) { mtx_lock(&req_sched_list_lock); if (!blkif->on_req_sched_list) { STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req); blkif->on_req_sched_list = 1; } mtx_unlock(&req_sched_list_lock); } /* Removes blkif from front of list and does not call blkif_put() (caller must) */ static blkif_t * remove_from_req_schedule_list(void) { blkif_t *blkif; mtx_lock(&req_sched_list_lock); if ((blkif = STAILQ_FIRST(&req_sched_list))) { STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req); STAILQ_NEXT(blkif, next_req) = NULL; blkif->on_req_sched_list = 0; } mtx_unlock(&req_sched_list_lock); return blkif; } static void make_response(blkif_t *blkif, uint64_t id, unsigned short op, int st) { blkif_response_t *resp; blkif_back_ring_t *blk_ring = &blkif->ring; int more_to_do = 0; int notify; mtx_lock(&blkif->blk_ring_lock); /* Place on the response ring for the relevant domain. */ resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt); resp->id = id; resp->operation = op; resp->status = st; blk_ring->rsp_prod_pvt++; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify); if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) { /* * Tail check for pending requests. Allows frontend to avoid * notifications if requests are already in flight (lower * overheads and promotes batching). */ RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do); } else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring)) more_to_do = 1; mtx_unlock(&blkif->blk_ring_lock); if (more_to_do) add_to_req_schedule_list_tail(blkif); if (notify) notify_remote_via_irq(blkif->irq); } static void end_block_io_op(struct bio *bio) { pending_req_t *pending_req = bio->bio_caller2; if (bio->bio_error) { DPRINTF("BIO returned error %d for operation on device %s\n", bio->bio_error, pending_req->blkif->dev_name); pending_req->status = BLKIF_RSP_ERROR; pending_req->blkif->st_err_req++; } #if 0 printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n", (unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags); #endif if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) { fast_flush_area(pending_req); make_response(pending_req->blkif, pending_req->id, pending_req->operation, pending_req->status); blkif_put(pending_req->blkif); free_req(pending_req); } g_destroy_bio(bio); } static void dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req) { struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST]; struct { unsigned long buf; unsigned int nsec; } seg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; unsigned int nseg = req->nr_segments, nr_sects = 0; struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST]; int operation, ret, i, nbio = 0; /* Check that number of segments is sane. */ if (unlikely(nseg == 0) || unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) { DPRINTF("Bad number of segments in request (%d)\n", nseg); goto fail_response; } if (req->operation == BLKIF_OP_WRITE) { if (blkif->read_only) { DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name); goto fail_response; } operation = BIO_WRITE; } else operation = BIO_READ; pending_req->blkif = blkif; pending_req->id = req->id; pending_req->operation = req->operation; pending_req->status = BLKIF_RSP_OKAY; pending_req->nr_pages = nseg; for (i = 0; i < nseg; i++) { seg[i].nsec = req->seg[i].last_sect - req->seg[i].first_sect + 1; if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) || (seg[i].nsec <= 0)) goto fail_response; nr_sects += seg[i].nsec; map[i].host_addr = vaddr(pending_req, i); map[i].dom = blkif->domid; map[i].ref = req->seg[i].gref; map[i].flags = GNTMAP_host_map; if (operation == BIO_WRITE) map[i].flags |= GNTMAP_readonly; } /* Convert to the disk's sector size */ nr_sects = (nr_sects << 9) >> blkif->sector_size_shift; ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg); PANIC_IF(ret); for (i = 0; i < nseg; i++) { if (unlikely(map[i].status != 0)) { DPRINTF("invalid buffer -- could not remap it\n"); goto fail_flush; } pending_handle(pending_req, i) = map[i].handle; #if 0 /* Can't do this in FreeBSD since vtophys() returns the pfn */ /* of the remote domain who loaned us the machine page - DPT */ xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] = map[i]dev_bus_addr >> PAGE_SHIFT; #endif seg[i].buf = map[i].dev_bus_addr | (req->seg[i].first_sect << 9); } if (req->sector_number + nr_sects > blkif->media_num_sectors) { DPRINTF("%s of [%llu,%llu] extends past end of device %s\n", operation == BIO_READ ? "read" : "write", req->sector_number, req->sector_number + nr_sects, blkif->dev_name); goto fail_flush; } for (i = 0; i < nseg; i++) { struct bio *bio; if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) { DPRINTF("Misaligned I/O request from domain %d", blkif->domid); goto fail_put_bio; } bio = biolist[nbio++] = g_new_bio(); if (unlikely(bio == NULL)) goto fail_put_bio; bio->bio_cmd = operation; bio->bio_offset = req->sector_number << blkif->sector_size_shift; bio->bio_length = seg[i].nsec << 9; bio->bio_bcount = bio->bio_length; bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK)); bio->bio_done = end_block_io_op; bio->bio_caller2 = pending_req; bio->bio_dev = blkif->cdev; req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift; #if 0 printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n", (unsigned int)bio, req->operation, req->sector_number, seg[i].nsec, blkif->cdev->si_iosize_max, seg[i].buf); #endif } pending_req->pendcnt = nbio; blkif_get(blkif); for (i = 0; i < nbio; i++) (*blkif->csw->d_strategy)(biolist[i]); return; fail_put_bio: for (i = 0; i < (nbio-1); i++) g_destroy_bio(biolist[i]); fail_flush: fast_flush_area(pending_req); fail_response: make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); } static void blk_req_action(void *context, int pending) { blkif_t *blkif; DPRINTF("\n"); while (!STAILQ_EMPTY(&req_sched_list)) { blkif_back_ring_t *blk_ring; RING_IDX rc, rp; blkif = remove_from_req_schedule_list(); blk_ring = &blkif->ring; rc = blk_ring->req_cons; rp = blk_ring->sring->req_prod; rmb(); /* Ensure we see queued requests up to 'rp'. */ while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) { blkif_request_t *req; pending_req_t *pending_req; pending_req = alloc_req(); if (pending_req == NULL) goto out_of_preqs; req = RING_GET_REQUEST(blk_ring, rc); blk_ring->req_cons = ++rc; /* before make_response() */ switch (req->operation) { case BLKIF_OP_READ: blkif->st_rd_req++; dispatch_rw_block_io(blkif, req, pending_req); break; case BLKIF_OP_WRITE: blkif->st_wr_req++; dispatch_rw_block_io(blkif, req, pending_req); break; default: blkif->st_err_req++; DPRINTF("error: unknown block io operation [%d]\n", req->operation); make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR); free_req(pending_req); break; } } blkif_put(blkif); } return; out_of_preqs: /* We ran out of pending req structs */ /* Just requeue interface and wait to be rescheduled to run when one is freed */ add_to_req_schedule_list_tail2(blkif); blkif->st_oo_req++; } /* Handle interrupt from a frontend */ static void blkback_intr(void *arg) { blkif_t *blkif = arg; DPRINTF("%x\n", (unsigned int)blkif); add_to_req_schedule_list_tail(blkif); } /* Map grant ref for ring */ static int map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring) { struct gnttab_map_grant_ref op; ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE); if (ring->va == 0) return ENOMEM; op.host_addr = ring->va; op.flags = GNTMAP_host_map; op.ref = ref; op.dom = dom; HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1); if (op.status) { WPRINTF("grant table op err=%d\n", op.status); kmem_free(kernel_map, ring->va, PAGE_SIZE); ring->va = 0; return EACCES; } ring->handle = op.handle; ring->bus_addr = op.dev_bus_addr; return 0; } /* Unmap grant ref for ring */ static void unmap_ring(struct ring_ref *ring) { struct gnttab_unmap_grant_ref op; op.host_addr = ring->va; op.dev_bus_addr = ring->bus_addr; op.handle = ring->handle; HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1); if (op.status) WPRINTF("grant table op err=%d\n", op.status); kmem_free(kernel_map, ring->va, PAGE_SIZE); ring->va = 0; } static int connect_ring(blkif_t *blkif) { struct xenbus_device *xdev = blkif->xdev; blkif_sring_t *ring; unsigned long ring_ref; evtchn_port_t evtchn; evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain }; int err; if (blkif->ring_connected) return 0; // Grab FE data and map his memory err = xenbus_gather(NULL, xdev->otherend, "ring-ref", "%lu", &ring_ref, "event-channel", "%u", &evtchn, NULL); if (err) { xenbus_dev_fatal(xdev, err, "reading %s/ring-ref and event-channel", xdev->otherend); return err; } err = map_ring(ring_ref, blkif->domid, &blkif->rr); if (err) { xenbus_dev_fatal(xdev, err, "mapping ring"); return err; } ring = (blkif_sring_t *)blkif->rr.va; BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE); op.u.bind_interdomain.remote_dom = blkif->domid; op.u.bind_interdomain.remote_port = evtchn; err = HYPERVISOR_event_channel_op(&op); if (err) { unmap_ring(&blkif->rr); xenbus_dev_fatal(xdev, err, "binding event channel"); return err; } blkif->evtchn = op.u.bind_interdomain.local_port; /* bind evtchn to irq handler */ blkif->irq = bind_evtchn_to_irqhandler(blkif->evtchn, "blkback", blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie); blkif->ring_connected = 1; DPRINTF("%x rings connected! evtchn=%d irq=%d\n", (unsigned int)blkif, blkif->evtchn, blkif->irq); return 0; } static void disconnect_ring(blkif_t *blkif) { DPRINTF("\n"); if (blkif->ring_connected) { unbind_from_irqhandler(blkif->irq, blkif->irq_cookie); blkif->irq = 0; unmap_ring(&blkif->rr); blkif->ring_connected = 0; } } static void connect(blkif_t *blkif) { struct xenbus_transaction *xbt; struct xenbus_device *xdev = blkif->xdev; int err; if (!blkif->ring_connected || blkif->vn == NULL || blkif->state == XenbusStateConnected) return; DPRINTF("%s\n", xdev->otherend); /* Supply the information about the device the frontend needs */ again: xbt = xenbus_transaction_start(); if (IS_ERR(xbt)) { xenbus_dev_fatal(xdev, PTR_ERR(xbt), "Error writing configuration for backend " "(start transaction)"); return; } err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u", blkif->media_num_sectors); if (err) { xenbus_dev_fatal(xdev, err, "writing %s/sectors", xdev->nodename); goto abort; } err = xenbus_printf(xbt, xdev->nodename, "info", "%u", blkif->read_only ? VDISK_READONLY : 0); if (err) { xenbus_dev_fatal(xdev, err, "writing %s/info", xdev->nodename); goto abort; } err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u", blkif->sector_size); if (err) { xenbus_dev_fatal(xdev, err, "writing %s/sector-size", xdev->nodename); goto abort; } err = xenbus_transaction_end(xbt, 0); if (err == -EAGAIN) goto again; if (err) xenbus_dev_fatal(xdev, err, "ending transaction"); err = xenbus_switch_state(xdev, NULL, XenbusStateConnected); if (err) xenbus_dev_fatal(xdev, err, "switching to Connected state", xdev->nodename); blkif->state = XenbusStateConnected; return; abort: xenbus_transaction_end(xbt, 1); } static int blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id) { int err; char *p, *mode = NULL, *type = NULL, *params = NULL; long handle; DPRINTF("node=%s\n", xdev->nodename); p = strrchr(xdev->otherend, '/') + 1; handle = strtoul(p, NULL, 0); mode = xenbus_read(NULL, xdev->nodename, "mode", NULL); if (IS_ERR(mode)) { xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode"); err = PTR_ERR(mode); goto error; } type = xenbus_read(NULL, xdev->nodename, "type", NULL); if (IS_ERR(type)) { xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type"); err = PTR_ERR(type); goto error; } params = xenbus_read(NULL, xdev->nodename, "params", NULL); if (IS_ERR(type)) { xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params"); err = PTR_ERR(params); goto error; } err = blkif_create(xdev, handle, mode, type, params); if (err) { xenbus_dev_fatal(xdev, err, "creating blkif"); goto error; } err = vbd_add_dev(xdev); if (err) { blkif_put((blkif_t *)xdev->data); xenbus_dev_fatal(xdev, err, "adding vbd device"); } return err; error: if (mode) free(mode, M_DEVBUF); if (type) free(type, M_DEVBUF); if (params) free(params, M_DEVBUF); return err; } static int blkback_remove(struct xenbus_device *xdev) { blkif_t *blkif = xdev->data; device_t ndev; DPRINTF("node=%s\n", xdev->nodename); blkif->state = XenbusStateClosing; if ((ndev = blkif->ndev)) { blkif->ndev = NULL; mtx_lock(&Giant); device_detach(ndev); mtx_unlock(&Giant); } xdev->data = NULL; blkif->xdev = NULL; blkif_put(blkif); return 0; } static int blkback_resume(struct xenbus_device *xdev) { DPRINTF("node=%s\n", xdev->nodename); return 0; } static void frontend_changed(struct xenbus_device *xdev, XenbusState frontend_state) { blkif_t *blkif = xdev->data; DPRINTF("state=%d\n", frontend_state); blkif->frontend_state = frontend_state; switch (frontend_state) { case XenbusStateInitialising: break; case XenbusStateInitialised: case XenbusStateConnected: connect_ring(blkif); connect(blkif); break; case XenbusStateClosing: xenbus_switch_state(xdev, NULL, XenbusStateClosing); break; case XenbusStateClosed: xenbus_remove_device(xdev); break; case XenbusStateUnknown: case XenbusStateInitWait: xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend", frontend_state); break; } } /* ** Driver registration ** */ static struct xenbus_device_id blkback_ids[] = { { "vbd" }, { "" } }; static struct xenbus_driver blkback = { .name = "blkback", .ids = blkback_ids, .probe = blkback_probe, .remove = blkback_remove, .resume = blkback_resume, .otherend_changed = frontend_changed, }; static void blkback_init(void *unused) { int i; TASK_INIT(&blk_req_task, 0, blk_req_action, NULL); mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF); mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF); mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST; pending_reqs = malloc(sizeof(pending_reqs[0]) * blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT); pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) * mmap_pages, M_DEVBUF, M_NOWAIT); pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) * mmap_pages, M_DEVBUF, M_NOWAIT); mmap_vstart = alloc_empty_page_range(mmap_pages); if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) { if (pending_reqs) free(pending_reqs, M_DEVBUF); if (pending_grant_handles) free(pending_grant_handles, M_DEVBUF); if (pending_vaddrs) free(pending_vaddrs, M_DEVBUF); WPRINTF("out of memory\n"); return; } for (i = 0; i < mmap_pages; i++) { pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT); pending_grant_handles[i] = BLKBACK_INVALID_HANDLE; } for (i = 0; i < blkif_reqs; i++) { STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list); } DPRINTF("registering %s\n", blkback.name); xenbus_register_backend(&blkback); } SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL) static void close_device(blkif_t *blkif) { DPRINTF("closing dev=%s\n", blkif->dev_name); if (blkif->vn) { int flags = FREAD; if (!blkif->read_only) flags |= FWRITE; if (blkif->csw) { dev_relthread(blkif->cdev); blkif->csw = NULL; } (void)vn_close(blkif->vn, flags, NOCRED, curthread); blkif->vn = NULL; } } static int open_device(blkif_t *blkif) { struct nameidata nd; struct vattr vattr; struct cdev *dev; struct cdevsw *devsw; int flags = FREAD, err = 0; DPRINTF("opening dev=%s\n", blkif->dev_name); if (!blkif->read_only) flags |= FWRITE; if (!curthread->td_proc->p_fd->fd_cdir) { curthread->td_proc->p_fd->fd_cdir = rootvnode; VREF(rootvnode); } if (!curthread->td_proc->p_fd->fd_rdir) { curthread->td_proc->p_fd->fd_rdir = rootvnode; VREF(rootvnode); } if (!curthread->td_proc->p_fd->fd_jdir) { curthread->td_proc->p_fd->fd_jdir = rootvnode; VREF(rootvnode); } again: NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread); err = vn_open(&nd, &flags, 0, -1); if (err) { if (blkif->dev_name[0] != '/') { char *dev_path = "/dev/"; char *dev_name; /* Try adding device path at beginning of name */ dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT); if (dev_name) { sprintf(dev_name, "%s%s", dev_path, blkif->dev_name); free(blkif->dev_name, M_DEVBUF); blkif->dev_name = dev_name; goto again; } } xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name); return err; } NDFREE(&nd, NDF_ONLY_PNBUF); blkif->vn = nd.ni_vp; /* We only support disks for now */ if (!vn_isdisk(blkif->vn, &err)) { xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name); VOP_UNLOCK(blkif->vn, 0, curthread); goto error; } blkif->cdev = blkif->vn->v_rdev; blkif->csw = dev_refthread(blkif->cdev); PANIC_IF(blkif->csw == NULL); err = VOP_GETATTR(blkif->vn, &vattr, NOCRED); if (err) { xenbus_dev_fatal(blkif->xdev, err, "error getting vnode attributes for device %s", blkif->dev_name); VOP_UNLOCK(blkif->vn, 0, curthread); goto error; } VOP_UNLOCK(blkif->vn, 0, curthread); dev = blkif->vn->v_rdev; devsw = dev->si_devsw; if (!devsw->d_ioctl) { err = ENODEV; xenbus_dev_fatal(blkif->xdev, err, "no d_ioctl for device %s!", blkif->dev_name); goto error; } err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread); if (err) { xenbus_dev_fatal(blkif->xdev, err, "error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name); goto error; } blkif->sector_size_shift = fls(blkif->sector_size) - 1; err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread); if (err) { xenbus_dev_fatal(blkif->xdev, err, "error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name); goto error; } blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift; - blkif->major = umajor(vattr.va_rdev); - blkif->minor = uminor(vattr.va_rdev); + blkif->major = major(vattr.va_rdev); + blkif->minor = minor(vattr.va_rdev); DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n", blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size); return 0; error: close_device(blkif); return err; } static int vbd_add_dev(struct xenbus_device *xdev) { blkif_t *blkif = xdev->data; device_t nexus, ndev; devclass_t dc; int err = 0; mtx_lock(&Giant); /* We will add a vbd device as a child of nexus0 (for now) */ if (!(dc = devclass_find("nexus")) || !(nexus = devclass_get_device(dc, 0))) { WPRINTF("could not find nexus0!\n"); err = ENOENT; goto done; } /* Create a newbus device representing the vbd */ ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle); if (!ndev) { WPRINTF("could not create newbus device vbd%d!\n", blkif->handle); err = EFAULT; goto done; } blkif_get(blkif); device_set_ivars(ndev, blkif); blkif->ndev = ndev; device_probe_and_attach(ndev); done: mtx_unlock(&Giant); return err; } enum { VBD_SYSCTL_DOMID, VBD_SYSCTL_ST_RD_REQ, VBD_SYSCTL_ST_WR_REQ, VBD_SYSCTL_ST_OO_REQ, VBD_SYSCTL_ST_ERR_REQ, VBD_SYSCTL_RING, }; static char * vbd_sysctl_ring_info(blkif_t *blkif, int cmd) { char *buf = malloc(256, M_DEVBUF, M_WAITOK); if (buf) { if (!blkif->ring_connected) sprintf(buf, "ring not connected\n"); else { blkif_back_ring_t *ring = &blkif->ring; sprintf(buf, "nr_ents=%x req_cons=%x" " req_prod=%x req_event=%x" " rsp_prod=%x rsp_event=%x", ring->nr_ents, ring->req_cons, ring->sring->req_prod, ring->sring->req_event, ring->sring->rsp_prod, ring->sring->rsp_event); } } return buf; } static int vbd_sysctl_handler(SYSCTL_HANDLER_ARGS) { device_t dev = (device_t)arg1; blkif_t *blkif = (blkif_t *)device_get_ivars(dev); const char *value; char *buf = NULL; int err; switch (arg2) { case VBD_SYSCTL_DOMID: return sysctl_handle_int(oidp, NULL, blkif->domid, req); case VBD_SYSCTL_ST_RD_REQ: return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req); case VBD_SYSCTL_ST_WR_REQ: return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req); case VBD_SYSCTL_ST_OO_REQ: return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req); case VBD_SYSCTL_ST_ERR_REQ: return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req); case VBD_SYSCTL_RING: value = buf = vbd_sysctl_ring_info(blkif, arg2); break; default: return (EINVAL); } err = SYSCTL_OUT(req, value, strlen(value)); if (buf != NULL) free(buf, M_DEVBUF); return err; } /* Newbus vbd device driver probe */ static int vbd_probe(device_t dev) { DPRINTF("vbd%d\n", device_get_unit(dev)); return 0; } /* Newbus vbd device driver attach */ static int vbd_attach(device_t dev) { blkif_t *blkif = (blkif_t *)device_get_ivars(dev); DPRINTF("%s\n", blkif->dev_name); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I", "domid of frontend"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I", "number of read reqs"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I", "number of write reqs"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I", "number of deferred reqs"); SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD, dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I", "number of reqs that returned error"); #if XEN_BLKBACK_DEBUG SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "ring", CTLFLAG_RD, dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A", "req ring info"); #endif if (!open_device(blkif)) connect(blkif); return bus_generic_attach(dev); } /* Newbus vbd device driver detach */ static int vbd_detach(device_t dev) { blkif_t *blkif = (blkif_t *)device_get_ivars(dev); DPRINTF("%s\n", blkif->dev_name); close_device(blkif); bus_generic_detach(dev); blkif_put(blkif); return 0; } static device_method_t vbd_methods[] = { /* Device interface */ DEVMETHOD(device_probe, vbd_probe), DEVMETHOD(device_attach, vbd_attach), DEVMETHOD(device_detach, vbd_detach), DEVMETHOD(device_shutdown, bus_generic_shutdown), DEVMETHOD(device_suspend, bus_generic_suspend), DEVMETHOD(device_resume, bus_generic_resume), {0, 0} }; static devclass_t vbd_devclass; static driver_t vbd_driver = { "vbd", vbd_methods, 0, }; DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0); /* * Local variables: * mode: C * c-set-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: t * End: */ diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c index 739972bded15..9a32e9be4710 100644 --- a/sys/fs/cd9660/cd9660_rrip.c +++ b/sys/fs/cd9660/cd9660_rrip.c @@ -1,724 +1,724 @@ /*- * Copyright (c) 1993, 1994 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension * Support code is derived from software contributed to Berkeley * by Atsushi Murai (amurai@spec.co.jp). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)cd9660_rrip.c 8.6 (Berkeley) 12/5/94 */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include typedef int rrt_func_t(void *, ISO_RRIP_ANALYZE *ana); typedef struct { char type[2]; rrt_func_t *func; void (*func2)(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); int result; } RRIP_TABLE; static int cd9660_rrip_altname(ISO_RRIP_ALTNAME *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_attr(ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_cont(ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_defattr(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_defname(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static void cd9660_rrip_deftstamp(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_device(ISO_RRIP_DEVICE *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_extref(ISO_RRIP_EXTREF *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_idflag(ISO_RRIP_IDFLAG *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_loop(struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana, RRIP_TABLE *table); static int cd9660_rrip_pclink(ISO_RRIP_CLINK *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_reldir(ISO_RRIP_RELDIR *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_slink(ISO_RRIP_SLINK *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_stop(ISO_SUSP_HEADER *p, ISO_RRIP_ANALYZE *ana); static int cd9660_rrip_tstamp(ISO_RRIP_TSTAMP *p, ISO_RRIP_ANALYZE *ana); /* * POSIX file attribute */ static int cd9660_rrip_attr(p,ana) ISO_RRIP_ATTR *p; ISO_RRIP_ANALYZE *ana; { ana->inop->inode.iso_mode = isonum_733(p->mode); ana->inop->inode.iso_uid = isonum_733(p->uid); ana->inop->inode.iso_gid = isonum_733(p->gid); ana->inop->inode.iso_links = isonum_733(p->links); ana->fields &= ~ISO_SUSP_ATTR; return ISO_SUSP_ATTR; } static void cd9660_rrip_defattr(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { /* But this is a required field! */ printf("RRIP without PX field?\n"); cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); } /* * Symbolic Links */ static int cd9660_rrip_slink(p,ana) ISO_RRIP_SLINK *p; ISO_RRIP_ANALYZE *ana; { INIT_VPROCG(TD_TO_VPROCG(curthread)); ISO_RRIP_SLINK_COMPONENT *pcomp; ISO_RRIP_SLINK_COMPONENT *pcompe; int len, wlen, cont; char *outbuf, *inbuf; pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component; pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length)); len = *ana->outlen; outbuf = ana->outbuf; cont = ana->cont; /* * Gathering a Symbolic name from each component with path */ for (; pcomp < pcompe; pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ + isonum_711(pcomp->clen))) { if (!cont) { if (len < ana->maxlen) { len++; *outbuf++ = '/'; } } cont = 0; inbuf = ".."; wlen = 0; switch (*pcomp->cflag) { case ISO_SUSP_CFLAG_CURRENT: /* Inserting Current */ wlen = 1; break; case ISO_SUSP_CFLAG_PARENT: /* Inserting Parent */ wlen = 2; break; case ISO_SUSP_CFLAG_ROOT: /* Inserting slash for ROOT */ /* Double slash, nothing really to do here. */ break; case ISO_SUSP_CFLAG_VOLROOT: /* Inserting a mount point i.e. "/cdrom" */ /* same as above */ outbuf -= len; len = 0; inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname; wlen = strlen(inbuf); break; case ISO_SUSP_CFLAG_HOST: /* XXXRW: locking. */ /* Inserting hostname i.e. "kurt.tools.de" */ inbuf = V_hostname; wlen = strlen(V_hostname); break; case ISO_SUSP_CFLAG_CONTINUE: cont = 1; /* FALLTHROUGH */ case 0: /* Inserting component */ wlen = isonum_711(pcomp->clen); inbuf = pcomp->name; break; default: printf("RRIP with incorrect flags?"); wlen = ana->maxlen + 1; break; } if (len + wlen > ana->maxlen) { /* indicate error to caller */ ana->cont = 1; ana->fields = 0; ana->outbuf -= *ana->outlen; *ana->outlen = 0; return 0; } bcopy(inbuf,outbuf,wlen); outbuf += wlen; len += wlen; } ana->outbuf = outbuf; *ana->outlen = len; ana->cont = cont; if (!isonum_711(p->flags)) { ana->fields &= ~ISO_SUSP_SLINK; return ISO_SUSP_SLINK; } return 0; } /* * Alternate name */ static int cd9660_rrip_altname(p,ana) ISO_RRIP_ALTNAME *p; ISO_RRIP_ANALYZE *ana; { INIT_VPROCG(TD_TO_VPROCG(curthread)); char *inbuf; int wlen; int cont; inbuf = ".."; wlen = 0; cont = 0; switch (*p->flags) { case ISO_SUSP_CFLAG_CURRENT: /* Inserting Current */ wlen = 1; break; case ISO_SUSP_CFLAG_PARENT: /* Inserting Parent */ wlen = 2; break; case ISO_SUSP_CFLAG_HOST: /* XXXRW: locking. */ /* Inserting hostname i.e. "kurt.tools.de" */ inbuf = V_hostname; wlen = strlen(V_hostname); break; case ISO_SUSP_CFLAG_CONTINUE: cont = 1; /* FALLTHROUGH */ case 0: /* Inserting component */ wlen = isonum_711(p->h.length) - 5; inbuf = (char *)p + 5; break; default: printf("RRIP with incorrect NM flags?\n"); wlen = ana->maxlen + 1; break; } if ((*ana->outlen += wlen) > ana->maxlen) { /* treat as no name field */ ana->fields &= ~ISO_SUSP_ALTNAME; ana->outbuf -= *ana->outlen - wlen; *ana->outlen = 0; return 0; } bcopy(inbuf,ana->outbuf,wlen); ana->outbuf += wlen; if (!cont) { ana->fields &= ~ISO_SUSP_ALTNAME; return ISO_SUSP_ALTNAME; } return 0; } static void cd9660_rrip_defname(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { isofntrans(isodir->name,isonum_711(isodir->name_len), ana->outbuf,ana->outlen, 1,isonum_711(isodir->flags)&4, ana->imp->joliet_level, ana->imp->im_flags, ana->imp->im_d2l); switch (*ana->outbuf) { default: break; case 1: *ana->outlen = 2; /* FALLTHROUGH */ case 0: /* outlen is 1 already */ strcpy(ana->outbuf,".."); break; } } /* * Parent or Child Link */ static int cd9660_rrip_pclink(p,ana) ISO_RRIP_CLINK *p; ISO_RRIP_ANALYZE *ana; { *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift; ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK); return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK; } /* * Relocated directory */ static int cd9660_rrip_reldir(p,ana) ISO_RRIP_RELDIR *p; ISO_RRIP_ANALYZE *ana; { /* special hack to make caller aware of RE field */ *ana->outlen = 0; ana->fields = 0; return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK; } static int cd9660_rrip_tstamp(p,ana) ISO_RRIP_TSTAMP *p; ISO_RRIP_ANALYZE *ana; { u_char *ptime; ptime = p->time; /* Check a format of time stamp (7bytes/17bytes) */ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) { if (*p->flags&ISO_SUSP_TSTAMP_CREAT) ptime += 7; if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime, ISO_FTYPE_RRIP); ptime += 7; } else bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime, ISO_FTYPE_RRIP); ptime += 7; } else ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; if (*p->flags&ISO_SUSP_TSTAMP_ATTR) cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime, ISO_FTYPE_RRIP); else ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; } else { if (*p->flags&ISO_SUSP_TSTAMP_CREAT) ptime += 17; if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime); ptime += 17; } else bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime); ptime += 17; } else ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; if (*p->flags&ISO_SUSP_TSTAMP_ATTR) cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime); else ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; } ana->fields &= ~ISO_SUSP_TSTAMP; return ISO_SUSP_TSTAMP; } static void cd9660_rrip_deftstamp(isodir,ana) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; { cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); } /* * POSIX device modes */ static int cd9660_rrip_device(p,ana) ISO_RRIP_DEVICE *p; ISO_RRIP_ANALYZE *ana; { u_int high, low; high = isonum_733(p->dev_t_high); low = isonum_733(p->dev_t_low); if (high == 0) - ana->inop->inode.iso_rdev = makedev(umajor(low), uminor(low)); + ana->inop->inode.iso_rdev = makedev(major(low), minor(low)); else - ana->inop->inode.iso_rdev = makedev(high, uminor(low)); + ana->inop->inode.iso_rdev = makedev(high, minor(low)); ana->fields &= ~ISO_SUSP_DEVICE; return ISO_SUSP_DEVICE; } /* * Flag indicating */ static int cd9660_rrip_idflag(p,ana) ISO_RRIP_IDFLAG *p; ISO_RRIP_ANALYZE *ana; { ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */ /* special handling of RE field */ if (ana->fields&ISO_SUSP_RELDIR) return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana); return ISO_SUSP_IDFLAG; } /* * Continuation pointer */ static int cd9660_rrip_cont(p,ana) ISO_RRIP_CONT *p; ISO_RRIP_ANALYZE *ana; { ana->iso_ce_blk = isonum_733(p->location); ana->iso_ce_off = isonum_733(p->offset); ana->iso_ce_len = isonum_733(p->length); return ISO_SUSP_CONT; } /* * System Use end */ static int cd9660_rrip_stop(p,ana) ISO_SUSP_HEADER *p; ISO_RRIP_ANALYZE *ana; { return ISO_SUSP_STOP; } /* * Extension reference */ static int cd9660_rrip_extref(p,ana) ISO_RRIP_EXTREF *p; ISO_RRIP_ANALYZE *ana; { if ( ! ((isonum_711(p->len_id) == 10 && bcmp((char *)p + 8,"RRIP_1991A",10) == 0) || (isonum_711(p->len_id) == 10 && bcmp((char *)p + 8,"IEEE_P1282",10) == 0) || (isonum_711(p->len_id) == 9 && bcmp((char *)p + 8,"IEEE_1282", 9) == 0)) || isonum_711(p->version) != 1) return 0; ana->fields &= ~ISO_SUSP_EXTREF; return ISO_SUSP_EXTREF; } static int cd9660_rrip_loop(isodir,ana,table) struct iso_directory_record *isodir; ISO_RRIP_ANALYZE *ana; RRIP_TABLE *table; { RRIP_TABLE *ptable; ISO_SUSP_HEADER *phead; ISO_SUSP_HEADER *pend; struct buf *bp = NULL; char *pwhead; u_short c; int result; /* * Note: If name length is odd, * it will be padding 1 byte after the name */ pwhead = isodir->name + isonum_711(isodir->name_len); if (!(isonum_711(isodir->name_len)&1)) pwhead++; isochar(isodir->name, pwhead, ana->imp->joliet_level, &c, NULL, ana->imp->im_flags, ana->imp->im_d2l); /* If it's not the '.' entry of the root dir obey SP field */ if (c != 0 || isonum_733(isodir->extent) != ana->imp->root_extent) pwhead += ana->imp->rr_skip; else pwhead += ana->imp->rr_skip0; phead = (ISO_SUSP_HEADER *)pwhead; pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length)); result = 0; while (1) { ana->iso_ce_len = 0; /* * Note: "pend" should be more than one SUSP header */ while (pend >= phead + 1) { if (isonum_711(phead->version) == 1) { for (ptable = table; ptable->func; ptable++) { if (*phead->type == *ptable->type && phead->type[1] == ptable->type[1]) { result |= ptable->func(phead,ana); break; } } if (!ana->fields) break; } if (result&ISO_SUSP_STOP) { result &= ~ISO_SUSP_STOP; break; } /* plausibility check */ if (isonum_711(phead->length) < sizeof(*phead)) break; /* * move to next SUSP * Hopefully this works with newer versions, too */ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length)); } if (ana->fields && ana->iso_ce_len) { if (ana->iso_ce_blk >= ana->imp->volume_space_size || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size || bread(ana->imp->im_devvp, ana->iso_ce_blk << (ana->imp->im_bshift - DEV_BSHIFT), ana->imp->logical_block_size, NOCRED, &bp)) /* what to do now? */ break; phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off); pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len); } else break; } if (bp) brelse(bp); /* * If we don't find the Basic SUSP stuffs, just set default value * (attribute/time stamp) */ for (ptable = table; ptable->func2; ptable++) if (!(ptable->result&result)) ptable->func2(isodir,ana); return result; } /* * Get Attributes. */ /* * XXX the casts are bogus but will do for now. */ #define BC (rrt_func_t *) static RRIP_TABLE rrip_table_analyze[] = { { "PX", BC cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR }, { "TF", BC cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP }, { "PN", BC cd9660_rrip_device, 0, ISO_SUSP_DEVICE }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_analyze(isodir,inop,imp) struct iso_directory_record *isodir; struct iso_node *inop; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; analyze.inop = inop; analyze.imp = imp; analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE; return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze); } /* * Get Alternate Name. */ static RRIP_TABLE rrip_table_getname[] = { { "NM", BC cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME }, { "CL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, { "PL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, { "RE", BC cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp) struct iso_directory_record *isodir; char *outbuf; u_short *outlen; ino_t *inump; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; RRIP_TABLE *tab; u_short c; analyze.outbuf = outbuf; analyze.outlen = outlen; analyze.maxlen = NAME_MAX; analyze.inump = inump; analyze.imp = imp; analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK; *outlen = 0; isochar(isodir->name, isodir->name + isonum_711(isodir->name_len), imp->joliet_level, &c, NULL, imp->im_flags, imp->im_d2l); tab = rrip_table_getname; if (c == 0 || c == 1) { cd9660_rrip_defname(isodir,&analyze); analyze.fields &= ~ISO_SUSP_ALTNAME; tab++; } return cd9660_rrip_loop(isodir,&analyze,tab); } /* * Get Symbolic Link. */ static RRIP_TABLE rrip_table_getsymname[] = { { "SL", BC cd9660_rrip_slink, 0, ISO_SUSP_SLINK }, { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; int cd9660_rrip_getsymname(isodir,outbuf,outlen,imp) struct iso_directory_record *isodir; char *outbuf; u_short *outlen; struct iso_mnt *imp; { ISO_RRIP_ANALYZE analyze; analyze.outbuf = outbuf; analyze.outlen = outlen; *outlen = 0; analyze.maxlen = MAXPATHLEN; analyze.cont = 1; /* don't start with a slash */ analyze.imp = imp; analyze.fields = ISO_SUSP_SLINK; return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK); } static RRIP_TABLE rrip_table_extref[] = { { "ER", BC cd9660_rrip_extref, 0, ISO_SUSP_EXTREF }, { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, { "", 0, 0, 0 } }; /* * Check for Rock Ridge Extension and return offset of its fields. * Note: We insist on the ER field. */ int cd9660_rrip_offset(isodir,imp) struct iso_directory_record *isodir; struct iso_mnt *imp; { ISO_RRIP_OFFSET *p; ISO_RRIP_ANALYZE analyze; imp->rr_skip0 = 0; p = (ISO_RRIP_OFFSET *)(isodir->name + 1); if (bcmp(p,"SP\7\1\276\357",6)) { /* Maybe, it's a CDROM XA disc? */ imp->rr_skip0 = 15; p = (ISO_RRIP_OFFSET *)((char *)p + 15); if (bcmp(p,"SP\7\1\276\357",6)) return -1; } analyze.imp = imp; analyze.fields = ISO_SUSP_EXTREF; if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF)) return -1; return isonum_711(p->skip); } diff --git a/sys/nfs4client/nfs4_subs.c b/sys/nfs4client/nfs4_subs.c index a08240ae21d2..21c89c9e1a60 100644 --- a/sys/nfs4client/nfs4_subs.c +++ b/sys/nfs4client/nfs4_subs.c @@ -1,1367 +1,1367 @@ /* $FreeBSD$ */ /* $Id: nfs4_subs.c,v 1.52 2003/11/05 14:58:59 rees Exp $ */ /*- * copyright (c) 2003 * the regents of the university of michigan * all rights reserved * * permission is granted to use, copy, create derivative works and redistribute * this software and such derivative works for any purpose, so long as the name * of the university of michigan is not used in any advertising or publicity * pertaining to the use or distribution of this software without specific, * written prior authorization. if the above copyright notice or any other * identification of the university of michigan is included in any copy of any * portion of this software, then the disclaimer below must also be included. * * this software is provided as is, without representation from the university * of michigan as to its fitness for any purpose, and without warranty by the * university of michigan of any kind, either express or implied, including * without limitation the implied warranties of merchantability and fitness for * a particular purpose. the regents of the university of michigan shall not be * liable for any damages, including special, indirect, incidental, or * consequential damages, with respect to any claim arising out of or in * connection with the use of the software, even if it has been or is hereafter * advised of the possibility of such damages. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NFSM_DISSECT(s) do { \ tl = nfsm_dissect_xx((s), md, dpos); \ if (tl == NULL) { \ printf("NFSM_DISSECT error; allocation (%s/%d) (%s:%d)\n", #s, s, __FILE__, __LINE__); \ return (EBADRPC); \ } \ } while (0) #define NFSM_ADV(s) do { \ t1 = nfsm_adv_xx((s), md, dpos); \ if (t1 != 0) { \ printf("NFSM_ADV error; allocation (%s/%d) (%s:%d)\n", #s, s, __FILE__, __LINE__); \ return (EBADRPC); \ } \ } while (0) #define NFSM_MTOTIME(t) do { \ NFSM_DISSECT(3 * NFSX_UNSIGNED); \ (t).tv_sec = fxdr_hyper(tl); \ tl += 2; \ (t).tv_nsec = fxdr_unsigned(long, *tl++); \ } while (0) static uint32_t __fsinfo_bm[2], __fsattr_bm[2], __getattr_bm[2], __readdir_bm[2]; nfsv4bitmap nfsv4_fsinfobm = { 2, __fsinfo_bm }; nfsv4bitmap nfsv4_fsattrbm = { 2, __fsattr_bm }; nfsv4bitmap nfsv4_getattrbm = { 2, __getattr_bm }; nfsv4bitmap nfsv4_readdirbm = { 2, __readdir_bm }; /* Helper routines */ int nfsm_v4build_attrs_xx(struct vattr *, struct mbuf **, caddr_t *); int nfsm_v4dissect_changeinfo_xx(nfsv4changeinfo *, struct mbuf **, caddr_t *); void nfsm_v4init(void) { /* Set up bitmasks */ FA4_SET(FA4_FSID, __fsinfo_bm); FA4_SET(FA4_MAXREAD, __fsinfo_bm); FA4_SET(FA4_MAXWRITE, __fsinfo_bm); FA4_SET(FA4_LEASE_TIME, __fsinfo_bm); FA4_SET(FA4_FSID, __fsattr_bm); FA4_SET(FA4_FILES_FREE, __fsattr_bm); FA4_SET(FA4_FILES_TOTAL, __fsattr_bm); FA4_SET(FA4_SPACE_AVAIL, __fsattr_bm); FA4_SET(FA4_SPACE_FREE, __fsattr_bm); FA4_SET(FA4_SPACE_TOTAL, __fsattr_bm); FA4_SET(FA4_TYPE, __getattr_bm); FA4_SET(FA4_FSID, __getattr_bm); FA4_SET(FA4_SIZE, __getattr_bm); FA4_SET(FA4_MODE, __getattr_bm); FA4_SET(FA4_RAWDEV, __getattr_bm); FA4_SET(FA4_NUMLINKS, __getattr_bm); FA4_SET(FA4_OWNER, __getattr_bm); FA4_SET(FA4_OWNER_GROUP, __getattr_bm); FA4_SET(FA4_FILEID, __getattr_bm); FA4_SET(FA4_TIME_ACCESS, __getattr_bm); FA4_SET(FA4_TIME_CREATE, __getattr_bm); FA4_SET(FA4_TIME_METADATA, __getattr_bm); FA4_SET(FA4_TIME_MODIFY, __getattr_bm); FA4_SET(FA4_TYPE, __readdir_bm); FA4_SET(FA4_FSID, __readdir_bm); FA4_SET(FA4_FILEID, __readdir_bm); FA4_SET(FA4_RDATTR_ERROR, __readdir_bm); } /* * Util */ uint32_t nfs_v4fileid4_to_fileid(uint64_t fid) { return ((uint32_t)((fid >> 32) | fid)); } void nfs_v4initcompound(struct nfs4_compound *cp) { bzero(cp, sizeof(*cp)); } /* * Build/dissect XDR buffer with a format string. * * u - unsigned * h - hyper * s - stringlength, string * k - skip length (bytes) * a - arraylength, componentlenght, array * o - opaque fix length * O - opaque var length in bytes */ void nfsm_buildf_xx(struct mbuf **mb, caddr_t *bpos, char *fmt, ...) { uint32_t *tl, t1, len, uval; uint64_t hval; va_list args; char *p, *which; va_start(args, fmt); for (which = fmt; *which != '\0'; which++) switch (*which) { case 'u': /* Unsigned */ tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); uval = va_arg(args, uint32_t); *tl++ = txdr_unsigned(uval); break; case 'h': /* Hyper */ tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); hval = va_arg(args, uint64_t); txdr_hyper(hval, tl); break; case 'o': /* Fixed-length opaque */ len = va_arg(args, uint32_t); p = va_arg(args, char *); tl = nfsm_build_xx(nfsm_rndup(len), mb, bpos); bcopy(p, tl, len); break; case 'O': /* Variable-length opaque */ case 's': /* String */ len = va_arg(args, uint32_t); p = va_arg(args, char *); t1 = nfsm_strtom_xx(p, len, len, mb, bpos); break; case 'k': /* Skip */ len = va_arg(args, uint32_t); nfsm_build_xx(nfsm_rndup(len), mb, bpos); break; default: panic("Invalid buildf string %s[%c]", fmt, *which); break; } va_end(args); } int nfsm_dissectf_xx(struct mbuf **md, caddr_t *dpos, char *fmt, ...) { uint32_t *tl, t1, len, *uval; uint64_t *hval; va_list args; char *p, *which; va_start(args, fmt); for (which = fmt; *which != '\0'; which++) switch (*which) { case 'u': /* Unsigned */ tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return (EBADRPC); uval = va_arg(args, uint32_t *); *uval = fxdr_unsigned(uint32_t, *tl++); break; case 'h': /* Hyper */ tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return (EBADRPC); hval = va_arg(args, uint64_t *); *hval = fxdr_hyper(tl); break; case 'o': /* Fixed-length opaque */ len = va_arg(args, uint32_t); p = va_arg(args, void *); tl = nfsm_dissect_xx(nfsm_rndup(len), md, dpos); if (tl == NULL) return (EBADRPC); bcopy(tl, p, len); break; case 'O': /* Variable-length opaque */ case 's': /* String */ len = va_arg(args, uint32_t); p = va_arg(args, char *); tl = nfsm_dissect_xx(nfsm_rndup(len), md, dpos); if (tl == NULL) return (EBADRPC); bcopy(tl, p, len); break; case 'k': /* Skip bytes */ len = va_arg(args, uint32_t); t1 = nfsm_adv_xx(nfsm_rndup(len), md, dpos); break; default: panic("Invalid dissectf string %s[%c]", fmt, *which); break; } va_end(args); return (0); } /* * XXX - There are a few problems with the way the postops are places * in the code. Ideally, they should be taken care of immediately, as * to avoid uneceesary waits for mutexes, but then we would be * introducing even more complexity by having to handle two separate * cases. Also, since they are placed at the end of the vnops', there * may be operations which sleep in between, further extending this * wait. It is conceivable that there is a deadlock condition there, * too. * * Also, for vnops that do multiple operations, it's inconvenient * since on error, individual decoding will got nfsmout. */ int nfs_v4postop(struct nfs4_compound *cp, int status) { struct nfs4_fctx *fcp = cp->fcp; /* * XXX does the previous result need to be stores with the * lockowner? ack, spec is unclear .. */ if (fcp != NULL) if (cp->seqidused < cp->rep_nops || (cp->seqidused + 1 == cp->rep_nops && NFS4_SEQIDMUTATINGERROR(status))) fcp->lop->lo_seqid++; return (status); } int nfs_v4handlestatus(int status, struct nfs4_compound *cp) { return (status); } /* * Initial setup of compound. */ int nfsm_v4build_compound_xx(struct nfs4_compound *cp, char *tag, struct mbuf **mb, caddr_t *bpos) { uint32_t t1, *tl, siz; /* Tag */ siz = strlen(tag); t1 = nfsm_rndup(siz) + NFSX_UNSIGNED; if (t1 <= M_TRAILINGSPACE(*mb)) { tl = nfsm_build_xx(t1, mb, bpos); *tl++ = txdr_unsigned(siz); *(tl + ((t1 >> 2) - 2)) = 0; bcopy(tag, tl, siz); } else { t1 = nfsm_strtmbuf(mb, bpos, (const char *)tag, siz); if (t1 != 0) return (t1); } /* Minor version and argarray*/ tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); *tl++ = txdr_unsigned(NFS4_MINOR_VERSION); /* Save for backfill */ cp->req_nopsp = tl; *tl = txdr_unsigned(0); cp->curvp = NULL; cp->savevp = NULL; return (0); } /* * XXX * - backfill for stateid, and such */ int nfsm_v4build_finalize_xx(struct nfs4_compound *cp, struct mbuf **mb, caddr_t *bpos) { *cp->req_nopsp = txdr_unsigned(cp->req_nops); return (0); } int nfsm_v4build_putfh_xx(struct nfs4_compound *cp, struct vnode *vp, struct mbuf **mb, caddr_t *bpos) { uint32_t t1; /* Op */ nfsm_buildf_xx(mb, bpos, "u", NFSV4OP_PUTFH); /* FH */ t1 = nfsm_fhtom_xx(vp, 1, mb, bpos); if (t1 != 0) return (t1); cp->req_nops++; cp->curvp = vp; return (0); } int nfsm_v4build_putfh_nv_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uuo", NFSV4OP_PUTFH, gfh->fh_len, gfh->fh_len, &gfh->fh_val); cp->req_nops++; return (0); } int nfsm_v4build_simple_xx(struct nfs4_compound *cp, uint32_t op, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "u", op); cp->req_nops++; return (0); } int nfsm_v4build_getattr_xx(struct nfs4_compound *cp, struct nfs4_oparg_getattr *ga, struct mbuf **mb, caddr_t *bpos) { int i; /* Op + bitmap length + bitmap */ nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_GETATTR, ga->bm->bmlen); for (i = 0; i < ga->bm->bmlen; i++) nfsm_buildf_xx(mb, bpos, "u", ga->bm->bmval[i]); ga->vp = cp->curvp; cp->req_nops++; return (0); } int nfsm_v4build_setattr_xx(struct nfs4_compound *cp, struct vattr *vap, struct nfs4_fctx *fcp, struct mbuf **mb, caddr_t *bpos) { int error; static char zero_stateid[NFSX_V4STATEID]; nfsm_buildf_xx(mb, bpos, "uo", NFSV4OP_SETATTR, NFSX_V4STATEID, fcp ? fcp->stateid : zero_stateid); error = nfsm_v4build_attrs_xx(vap, mb, bpos); if (error == 0) cp->req_nops++; return (error); } int nfsm_v4build_getfh_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "u", NFSV4OP_GETFH); gfh->vp = cp->curvp; cp->req_nops++; return (0); } int nfsm_v4build_lookup_xx(struct nfs4_compound *cp, struct nfs4_oparg_lookup *l, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_LOOKUP, l->namelen, l->name); cp->curvp = l->vp; cp->req_nops++; return (0); } int nfsm_v4build_setclientid_xx(struct nfs4_compound *cp, struct nfs4_oparg_setclientid *sci, struct mbuf **mb, caddr_t *bpos) { struct timeval tv; microtime(&tv); nfsm_buildf_xx(mb, bpos, "uuusussu", NFSV4OP_SETCLIENTID, tv.tv_sec, tv.tv_usec, sci->namelen, sci->name, sci->cb_prog, sci->cb_netidlen, sci->cb_netid, sci->cb_univaddrlen, sci->cb_univaddr, 0xCA11BACC); cp->req_nops++; return (0); } int nfsm_v4build_setclientid_confirm_xx(struct nfs4_compound *cp, struct nfs4_oparg_setclientid *sci, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uho", NFSV4OP_SETCLIENTID_CONFIRM, sci->clientid, sizeof(sci->verf), sci->verf); cp->req_nops++; return (0); } int nfsm_v4build_open_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, struct mbuf **mb, caddr_t *bpos) { int error = 0; struct nfs4_lowner *lop = op->fcp->lop; nfsm_buildf_xx(mb, bpos, "uuuuhuu", NFSV4OP_OPEN, lop->lo_seqid, op->flags & O_ACCMODE, NFSV4OPENSHARE_DENY_NONE, cp->nmp->nm_clientid, 4, lop->lo_id); if (op->flags & O_CREAT) { nfsm_buildf_xx(mb, bpos, "u", OTCREATE); /* openflag4: mode */ nfsm_buildf_xx(mb, bpos, "u", CMUNCHECKED); /* openflag4: createattrs... */ if (op->vap != NULL) { if (op->flags & O_TRUNC) op->vap->va_size = 0; error = nfsm_v4build_attrs_xx(op->vap, mb, bpos); if (error != 0) return (error); } else nfsm_buildf_xx(mb, bpos, "uu", 0, 0); } else nfsm_buildf_xx(mb, bpos, "u", OTNOCREATE); nfsm_buildf_xx(mb, bpos, "us", op->ctype, op->cnp->cn_namelen, op->cnp->cn_nameptr); cp->seqidused = cp->req_nops++; cp->fcp = op->fcp; return (error); } /* * XXX * - Wait on recovery */ int nfsm_v4build_open_confirm_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uou", NFSV4OP_OPEN_CONFIRM, NFSX_V4STATEID, op->fcp->stateid, op->fcp->lop->lo_seqid); cp->seqidused = cp->req_nops++; cp->fcp = op->fcp; return (0); } /* * XXX * - Wait on recovery */ int nfsm_v4build_close_xx(struct nfs4_compound *cp, struct nfs4_fctx *fcp, struct mbuf **mb, caddr_t *bpos) { struct nfs4_lowner *lop = fcp->lop; nfsm_buildf_xx(mb, bpos, "uuo", NFSV4OP_CLOSE, lop->lo_seqid, NFSX_V4STATEID, fcp->stateid); cp->seqidused = cp->req_nops++; cp->fcp = fcp; return (0); } int nfsm_v4build_access_xx(struct nfs4_compound *cp, struct nfs4_oparg_access *acc, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_ACCESS, acc->mode); cp->req_nops++; return (0); } int nfsm_v4build_read_xx(struct nfs4_compound *cp, struct nfs4_oparg_read *r, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uohu", NFSV4OP_READ, NFSX_V4STATEID, r->fcp->stateid, r->off, r->maxcnt); cp->req_nops++; return (0); } int nfsm_v4build_write_xx(struct nfs4_compound *cp, struct nfs4_oparg_write *w, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uohuu", NFSV4OP_WRITE, NFSX_V4STATEID, w->fcp->stateid, w->off, w->stable, w->cnt); cp->req_nops++; return (nfsm_uiotombuf(w->uiop, mb, w->cnt, bpos)); } int nfsm_v4build_commit_xx(struct nfs4_compound *cp, struct nfs4_oparg_commit *c, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uhu", NFSV4OP_COMMIT, c->start, c->len); cp->req_nops++; return (0); } int nfsm_v4build_readdir_xx(struct nfs4_compound *cp, struct nfs4_oparg_readdir *r, struct mbuf **mb, caddr_t *bpos) { int i; nfsm_buildf_xx(mb, bpos, "uhouuu", NFSV4OP_READDIR, r->cookie, sizeof(r->verf), r->verf, r->cnt >> 4, /* meaningless "dircount" field */ r->cnt, r->bm->bmlen); for (i = 0; i < r->bm->bmlen; i++) nfsm_buildf_xx(mb, bpos, "u", r->bm->bmval[i]); cp->req_nops++; return (0); } int nfsm_v4build_renew_xx(struct nfs4_compound *cp, uint64_t cid, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uh", NFSV4OP_RENEW, cid); cp->req_nops++; return (0); } int nfsm_v4build_create_xx(struct nfs4_compound *cp, struct nfs4_oparg_create *c, struct mbuf **mb, caddr_t *bpos) { uint32_t t1; nfsm_buildf_xx(mb, bpos, "uu", NFSV4OP_CREATE, c->type); if (c->type == NFLNK) /* XXX strlen */ nfsm_buildf_xx(mb, bpos, "s", strlen(c->linktext), c->linktext); else if (c->type == NFCHR || c->type == NFBLK) nfsm_buildf_xx(mb, bpos, "uu", - umajor(c->vap->va_rdev), uminor(c->vap->va_rdev)); + major(c->vap->va_rdev), minor(c->vap->va_rdev)); /* Name */ nfsm_buildf_xx(mb, bpos, "s", c->namelen, c->name); /* Attributes */ t1 = nfsm_v4build_attrs_xx(c->vap, mb, bpos); if (t1 != 0) return (t1); cp->req_nops++; return (0); } int nfsm_v4build_rename_xx(struct nfs4_compound *cp, struct nfs4_oparg_rename *r, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "uss", NFSV4OP_RENAME, r->fnamelen, r->fname, r->tnamelen, r->tname); cp->req_nops++; return (0); } int nfsm_v4build_link_xx(struct nfs4_compound *cp, struct nfs4_oparg_link *l, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_LINK, l->namelen, l->name); cp->req_nops++; return (0); } int nfsm_v4build_remove_xx(struct nfs4_compound *cp, const char *name, u_int namelen, struct mbuf **mb, caddr_t *bpos) { nfsm_buildf_xx(mb, bpos, "us", NFSV4OP_REMOVE, namelen, name); cp->req_nops++; return (0); } int nfsm_v4build_attrs_xx(struct vattr *vap, struct mbuf **mb, caddr_t *bpos) { uint32_t *tl, *attrlenp, *bmvalp, len; size_t siz; tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); *tl++ = txdr_unsigned(2); /* bitmap length */ bmvalp = tl; bzero(bmvalp, 8); tl += 2; attrlenp = tl; len = 0; if (vap->va_size != VNOVAL) { tl = nfsm_build_xx(2 * NFSX_UNSIGNED, mb, bpos); FA4_SET(FA4_SIZE, bmvalp); txdr_hyper(vap->va_size, tl); tl += 2; len += 2 * NFSX_UNSIGNED; } if (vap->va_mode != (u_short)VNOVAL) { tl = nfsm_build_xx(NFSX_UNSIGNED, mb, bpos); FA4_SET(FA4_MODE, bmvalp); *tl++ = txdr_unsigned(vap->va_mode); len += NFSX_UNSIGNED; } if (vap->va_uid != VNOVAL) { int error; char *name; error = idmap_uid_to_name(vap->va_uid, &name, &siz); if (error || name == NULL || siz == 0) { /* XXX */ siz = sizeof("nobody") - 1; tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, bpos); *tl++ = txdr_unsigned(siz); bcopy("nobody", tl, siz); len += NFSX_UNSIGNED + nfsm_rndup(siz); } else { tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, bpos); *tl++ = txdr_unsigned(siz); bcopy(name, tl, siz); len += NFSX_UNSIGNED + nfsm_rndup(siz); } FA4_SET(FA4_OWNER, bmvalp); } if (vap->va_gid != VNOVAL) { int error; char *name; error = idmap_gid_to_name(vap->va_gid, &name, &siz); if (error || name == NULL || siz == 0) { /* XXX */ siz = sizeof("nogroup") - 1; tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, bpos); *tl++ = txdr_unsigned(siz); bcopy("nogroup", tl, siz); len += NFSX_UNSIGNED + nfsm_rndup(siz); } else { tl = nfsm_build_xx(NFSX_UNSIGNED + nfsm_rndup(siz), mb, bpos); *tl++ = txdr_unsigned(siz); bcopy(name, tl, siz); len += NFSX_UNSIGNED + nfsm_rndup(siz); } FA4_SET(FA4_OWNER_GROUP, bmvalp); } if (vap->va_atime.tv_sec != VNOVAL) { uint64_t val = vap->va_atime.tv_sec; tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); FA4_SET(FA4_TIME_ACCESS_SET, bmvalp); *tl++ = txdr_unsigned(THCLIENTTIME); txdr_hyper(val, tl); tl += 2; *tl++ = txdr_unsigned(vap->va_atime.tv_nsec); len += 4 * NFSX_UNSIGNED; } if (vap->va_mtime.tv_sec != VNOVAL) { uint64_t val = vap->va_mtime.tv_sec; tl = nfsm_build_xx(4 * NFSX_UNSIGNED, mb, bpos); FA4_SET(FA4_TIME_MODIFY_SET, bmvalp); *tl++ = txdr_unsigned(THCLIENTTIME); txdr_hyper(val, tl); tl += 2; *tl++ = txdr_unsigned(vap->va_mtime.tv_nsec); len += 4 * NFSX_UNSIGNED; } bmvalp[0] = txdr_unsigned(bmvalp[0]); bmvalp[1] = txdr_unsigned(bmvalp[1]); *attrlenp = txdr_unsigned(len); return (0); } int nfsm_v4dissect_compound_xx(struct nfs4_compound *cp, struct mbuf **md, caddr_t *dpos) { uint32_t taglen, t1, *tl; tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return (EBADRPC); /* Reply status is handled by the RPC code */ taglen = fxdr_unsigned(uint32_t, *tl++); t1 = nfsm_adv_xx(nfsm_rndup(taglen), md, dpos); if (t1 != 0) return (EBADRPC); tl = nfsm_dissect_xx(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return (EBADRPC); cp->rep_nops = fxdr_unsigned(uint32_t, *tl++); return (0); } int nfsm_v4dissect_simple_xx(struct nfs4_compound *cp, uint32_t op, uint32_t skipbytes, struct mbuf **md, caddr_t *dpos) { uint32_t t1, dop, status; t1 = nfsm_dissectf_xx(md, dpos, "uu", &dop, &status); if (t1 != 0) return (t1); if (dop != op || status != 0) return (EBADRPC); if (skipbytes > 0) NFSM_ADV(nfsm_rndup(skipbytes)); return (0); } int nfsm_v4dissect_getattr_xx(struct nfs4_compound *cp, struct nfs4_oparg_getattr *ga, struct mbuf **md, caddr_t *dpos) { uint32_t *tl; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_GETATTR || *tl++ != 0) return (EBADRPC); return (nfsm_v4dissect_attrs_xx(&ga->fa, md, dpos)); } int nfsm_v4dissect_setattr_xx(struct nfs4_compound *cp, struct mbuf **md, caddr_t *dpos) { uint32_t t1, op, bmlen, status; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_SETATTR || status != 0) return (EBADRPC); t1 = nfsm_dissectf_xx(md, dpos, "u", &bmlen); if (t1 != 0) return (t1); return (nfsm_dissectf_xx(md, dpos, "k", bmlen << 2)); } int nfsm_v4dissect_getfh_xx(struct nfs4_compound *cp, struct nfs4_oparg_getfh *gfh, struct mbuf **md, caddr_t *dpos) { uint32_t *tl, len, xdrlen; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_GETFH) return (EBADRPC); if (*tl++ != 0) return (EBADRPC); NFSM_DISSECT(NFSX_UNSIGNED); len = fxdr_unsigned(uint32_t, *tl++); if (len > NFSX_V4FH) return (EBADRPC); /* XXX integrate this into nfs_mtofh()? */ gfh->fh_len = len; xdrlen = nfsm_rndup(len); NFSM_DISSECT(xdrlen); bcopy(tl, &gfh->fh_val, xdrlen); return (0); } int nfsm_v4dissect_setclientid_xx(struct nfs4_compound *cp, struct nfs4_oparg_setclientid *sci, struct mbuf **md, caddr_t *dpos) { uint32_t *tl; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_SETCLIENTID) return (EBADRPC); /* Handle NFS4ERR_CLID_INUSE specially */ if (*tl++ != 0) return (EBADRPC); NFSM_DISSECT(2 * NFSX_UNSIGNED); sci->clientid = fxdr_hyper(tl); NFSM_DISSECT(nfsm_rndup(NFSX_V4VERF)); bcopy(tl, sci->verf, NFSX_V4VERF); return (0); } int nfsm_v4dissect_close_xx(struct nfs4_compound *cp, struct nfs4_fctx *fcp, struct mbuf **md, caddr_t *dpos) { uint32_t *tl, t1; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_CLOSE || *tl++ != 0) return (EBADRPC); /* Copy stateid */ t1 = nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, fcp->stateid); if (t1 != 0) return (t1); return (0); } int nfsm_v4dissect_access_xx(struct nfs4_compound *cp, struct nfs4_oparg_access *acc, struct mbuf **md, caddr_t *dpos) { uint32_t *tl; tl = nfsm_dissect_xx(4 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_ACCESS || *tl++ != 0) return (EBADRPC); acc->supported = fxdr_unsigned(uint32_t, *tl++); acc->rmode = fxdr_unsigned(uint32_t, *tl++); return (0); } int nfsm_v4dissect_open_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, struct mbuf **md, caddr_t *dpos) { uint32_t *tl, t1, bmlen, delegtype = ODNONE; int error = 0; nfsv4changeinfo cinfo; struct nfs4_fctx *fcp = op->fcp; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_OPEN || *tl++ != 0) return (EBADRPC); t1 = nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, fcp->stateid); if (t1 != 0) return (t1); error = nfsm_v4dissect_changeinfo_xx(&cinfo, md, dpos); if (error != 0) goto nfsmout; NFSM_DISSECT(2 * NFSX_UNSIGNED); op->rflags = fxdr_unsigned(uint32_t, *tl++); bmlen = fxdr_unsigned(uint32_t, *tl++); if (bmlen > 2) { error = EBADRPC; goto nfsmout; } /* Skip */ NFSM_ADV(nfsm_rndup(bmlen << 2)); NFSM_DISSECT(NFSX_UNSIGNED); delegtype = fxdr_unsigned(uint32_t, *tl++); switch (delegtype) { case ODREAD: case ODWRITE: printf("nfs4: client delegation not yet supported\n"); error = EOPNOTSUPP; goto nfsmout; break; case ODNONE: default: break; } nfsmout: return (error); } int nfsm_v4dissect_open_confirm_xx(struct nfs4_compound *cp, struct nfs4_oparg_open *op, struct mbuf **md, caddr_t *dpos) { uint32_t *tl; tl = nfsm_dissect_xx(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL || fxdr_unsigned(uint32_t, *tl++) != NFSV4OP_OPEN_CONFIRM || *tl++ != 0) return (EBADRPC); return nfsm_dissectf_xx(md, dpos, "o", NFSX_V4STATEID, op->fcp->stateid); } int nfsm_v4dissect_read_xx(struct nfs4_compound *cp, struct nfs4_oparg_read *r, struct mbuf **md, caddr_t *dpos) { uint32_t op, status, t1; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_READ || status != 0) return (EBADRPC); t1 = nfsm_dissectf_xx(md, dpos, "uu", &r->eof, &r->retlen); if (t1 != 0) return (t1); return (nfsm_mbuftouio(md, r->uiop, r->retlen, dpos)); } int nfsm_v4dissect_write_xx(struct nfs4_compound *cp, struct nfs4_oparg_write *w, struct mbuf **md, caddr_t *dpos) { uint32_t op, status, t1; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_WRITE || status != 0) return (EBADRPC); return (nfsm_dissectf_xx(md, dpos, "uuo", &w->retlen, &w->committed, NFSX_V4VERF, w->wverf)); } int nfsm_v4dissect_commit_xx(struct nfs4_compound *cp, struct nfs4_oparg_commit *c, struct mbuf **md, caddr_t *dpos) { uint32_t t1, op, status; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_COMMIT || status != 0) return (EBADRPC); return (nfsm_dissectf_xx(md, dpos, "o", NFSX_V4VERF, c->verf)); } int nfsm_v4dissect_create_xx(struct nfs4_compound *cp, struct nfs4_oparg_create *c, struct mbuf **md, caddr_t *dpos) { uint32_t t1, *tl, op, status, bmlen; nfsv4changeinfo ci; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_CREATE || status != 0) return (EBADRPC); /* Just throw this away for now */ t1 = nfsm_v4dissect_changeinfo_xx(&ci, md, dpos); if (t1 != 0) return (t1); /* Throw this away too */ NFSM_DISSECT(NFSX_UNSIGNED); bmlen = fxdr_unsigned(uint32_t, *tl++); NFSM_DISSECT(bmlen * NFSX_UNSIGNED); tl += bmlen; return 0; } int nfsm_v4dissect_readlink_xx(struct nfs4_compound *cp, struct uio *uiop, struct mbuf **md, caddr_t *dpos) { uint32_t t1, *tl, op, status, linklen; t1 = nfsm_dissectf_xx(md, dpos, "uu", &op, &status); if (t1 != 0) return (t1); if (op != NFSV4OP_READLINK || status != 0) return (EBADRPC); /* Do this one manually for careful checking of sizes. */ NFSM_DISSECT(NFSX_UNSIGNED); linklen = fxdr_unsigned(uint32_t, *tl++); if (linklen <= 0) return (EBADRPC); return (nfsm_mbuftouio(md, uiop, MIN(linklen, uiop->uio_resid), dpos)); } int nfsm_v4dissect_changeinfo_xx(nfsv4changeinfo *ci, struct mbuf **md, caddr_t *dpos) { uint32_t *tl; NFSM_DISSECT(5 * NFSX_UNSIGNED); ci->ciatomic = fxdr_unsigned(uint32_t, *tl++); ci->cibefore = fxdr_hyper(tl); tl += 2; ci->ciafter = fxdr_hyper(tl); tl += 2; return (0); } int nfsm_v4dissect_attrs_xx(struct nfsv4_fattr *fa, struct mbuf **md, caddr_t *dpos) { uint32_t t1, *tl, bmlen, bmval[2], attrlen, len = 0; /* Bitmap length + value */ NFSM_DISSECT(NFSX_UNSIGNED); bmlen = fxdr_unsigned(uint32_t, *tl++); if (bmlen > 2) return (EBADRPC); if (bmlen == 0) return (0); NFSM_DISSECT(nfsm_rndup(bmlen << 2) + NFSX_UNSIGNED); bmval[0] = bmlen > 0 ? fxdr_unsigned(uint32_t, *tl++) : 0; bmval[1] = bmlen > 1 ? fxdr_unsigned(uint32_t, *tl++) : 0; /* Attribute length */ attrlen = fxdr_unsigned(uint32_t, *tl++); /* * XXX check for correct (<=) attributes mask return from * server. need to pass this in. */ if (FA4_ISSET(FA4_TYPE, bmval)) { /* overflow check */ NFSM_DISSECT(NFSX_UNSIGNED); fa->fa4_type = fxdr_unsigned(uint32_t, *tl++); fa->fa4_valid |= FA4V_TYPE; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_CHANGE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_changeid = fxdr_hyper(tl); fa->fa4_valid |= FA4V_CHANGEID; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_SIZE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_size = fxdr_hyper(tl); fa->fa4_valid |= FA4V_SIZE; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_FSID, bmval)) { NFSM_DISSECT(4 * NFSX_UNSIGNED); fa->fa4_fsid_major = fxdr_hyper(tl); tl += 2; fa->fa4_fsid_minor = fxdr_hyper(tl); fa->fa4_valid |= FA4V_SIZE; len += 4 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_LEASE_TIME, bmval)) { NFSM_DISSECT(NFSX_UNSIGNED); fa->fa4_lease_time = fxdr_unsigned(uint32_t, *tl++); fa->fa4_valid |= FA4V_LEASE_TIME; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_RDATTR_ERROR, bmval)) { /* ignore for now; we only ask for it so the compound won't fail */ NFSM_DISSECT(NFSX_UNSIGNED); tl++; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_FILEID, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_fileid = fxdr_hyper(tl); fa->fa4_valid |= FA4V_FILEID; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_FILES_FREE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_ffree = fxdr_hyper(tl); fa->fa4_valid |= FA4V_FFREE; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_FILES_TOTAL, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_ftotal = fxdr_hyper(tl); fa->fa4_valid |= FA4V_FTOTAL; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_MAXFILESIZE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_maxfilesize = fxdr_hyper(tl); fa->fa4_valid |= FA4V_MAXFILESIZE; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_MAXNAME, bmval)) { NFSM_DISSECT(NFSX_UNSIGNED); fa->fa4_maxname = fxdr_unsigned(uint32_t, *tl++); fa->fa4_valid |= FA4V_MAXNAME; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_MAXREAD, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_maxread = fxdr_hyper(tl); fa->fa4_valid |= FA4V_MAXREAD; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_MAXWRITE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_maxwrite = fxdr_hyper(tl); fa->fa4_valid |= FA4V_MAXWRITE; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_MODE, bmval)) { NFSM_DISSECT(NFSX_UNSIGNED); fa->fa4_mode = fxdr_unsigned(mode_t, *tl++); fa->fa4_valid |= FA4V_MODE; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_NUMLINKS, bmval)) { NFSM_DISSECT(NFSX_UNSIGNED); fa->fa4_nlink = fxdr_unsigned(nlink_t, *tl++); fa->fa4_valid |= FA4V_NLINK; len += NFSX_UNSIGNED; } if (FA4_ISSET(FA4_OWNER, bmval)) { uint32_t ownerlen; int error; NFSM_DISSECT(NFSX_UNSIGNED); ownerlen = fxdr_unsigned(uint32_t, *tl++); NFSM_DISSECT(nfsm_rndup(ownerlen)); error = idmap_name_to_uid((char *)tl, ownerlen, &fa->fa4_uid); if (error) fa->fa4_uid = -2; fa->fa4_valid |= FA4V_UID; len += NFSX_UNSIGNED + nfsm_rndup(ownerlen); } if (FA4_ISSET(FA4_OWNER_GROUP, bmval)) { uint32_t ownergrouplen; int error; NFSM_DISSECT(NFSX_UNSIGNED); ownergrouplen = fxdr_unsigned(uint32_t, *tl++); NFSM_DISSECT(nfsm_rndup(ownergrouplen)); error = idmap_name_to_gid((char *)tl, ownergrouplen, &fa->fa4_gid); if (error) fa->fa4_gid = -2; fa->fa4_valid |= FA4V_GID; len += NFSX_UNSIGNED + nfsm_rndup(ownergrouplen); } if (FA4_ISSET(FA4_RAWDEV, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_rdev_major = fxdr_unsigned(uint32_t, *tl++); fa->fa4_rdev_minor = fxdr_unsigned(uint32_t, *tl++); fa->fa4_valid |= FA4V_RDEV; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_SPACE_AVAIL, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_savail = fxdr_hyper(tl); fa->fa4_valid |= FA4V_SAVAIL; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_SPACE_FREE, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_sfree = fxdr_hyper(tl); fa->fa4_valid |= FA4V_SFREE; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_SPACE_TOTAL, bmval)) { NFSM_DISSECT(2 * NFSX_UNSIGNED); fa->fa4_stotal = fxdr_hyper(tl); fa->fa4_valid |= FA4V_STOTAL; len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_SPACE_USED, bmval)) { NFSM_ADV(2 * NFSX_UNSIGNED); len += 2 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_TIME_ACCESS, bmval)) { NFSM_MTOTIME(fa->fa4_atime); fa->fa4_valid |= FA4V_ATIME; len += 3 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_TIME_CREATE, bmval)) { NFSM_MTOTIME(fa->fa4_btime); fa->fa4_valid |= FA4V_BTIME; len += 3 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_TIME_METADATA, bmval)) { NFSM_MTOTIME(fa->fa4_ctime); fa->fa4_valid |= FA4V_CTIME; len += 3 * NFSX_UNSIGNED; } if (FA4_ISSET(FA4_TIME_MODIFY, bmval)) { NFSM_MTOTIME(fa->fa4_mtime); fa->fa4_valid |= FA4V_MTIME; len += 3 * NFSX_UNSIGNED; } if (len != attrlen) return (EBADRPC); return (0); } diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 7f8ab1883d4b..e56e4eb59647 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -1,3327 +1,3327 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_vnops.c 8.16 (Berkeley) 5/27/95 */ #include __FBSDID("$FreeBSD$"); /* * vnode op calls for Sun NFS version 2 and 3 */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Defs */ #define TRUE 1 #define FALSE 0 /* * Ifdef for FreeBSD-current merged buffer cache. It is unfortunate that these * calls are not in getblk() and brelse() so that they would not be necessary * here. */ #ifndef B_VMIO #define vfs_busy_pages(bp, f) #endif static vop_read_t nfsfifo_read; static vop_write_t nfsfifo_write; static vop_close_t nfsfifo_close; static int nfs_flush(struct vnode *, int, int); static int nfs_setattrrpc(struct vnode *, struct vattr *, struct ucred *); static vop_lookup_t nfs_lookup; static vop_create_t nfs_create; static vop_mknod_t nfs_mknod; static vop_open_t nfs_open; static vop_close_t nfs_close; static vop_access_t nfs_access; static vop_getattr_t nfs_getattr; static vop_setattr_t nfs_setattr; static vop_read_t nfs_read; static vop_fsync_t nfs_fsync; static vop_remove_t nfs_remove; static vop_link_t nfs_link; static vop_rename_t nfs_rename; static vop_mkdir_t nfs_mkdir; static vop_rmdir_t nfs_rmdir; static vop_symlink_t nfs_symlink; static vop_readdir_t nfs_readdir; static vop_strategy_t nfs_strategy; static int nfs_lookitup(struct vnode *, const char *, int, struct ucred *, struct thread *, struct nfsnode **); static int nfs_sillyrename(struct vnode *, struct vnode *, struct componentname *); static vop_access_t nfsspec_access; static vop_readlink_t nfs_readlink; static vop_print_t nfs_print; static vop_advlock_t nfs_advlock; static vop_advlockasync_t nfs_advlockasync; /* * Global vfs data structures for nfs */ struct vop_vector nfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nfs_access, .vop_advlock = nfs_advlock, .vop_advlockasync = nfs_advlockasync, .vop_close = nfs_close, .vop_create = nfs_create, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_getpages = nfs_getpages, .vop_putpages = nfs_putpages, .vop_inactive = nfs_inactive, .vop_lease = VOP_NULL, .vop_link = nfs_link, .vop_lookup = nfs_lookup, .vop_mkdir = nfs_mkdir, .vop_mknod = nfs_mknod, .vop_open = nfs_open, .vop_print = nfs_print, .vop_read = nfs_read, .vop_readdir = nfs_readdir, .vop_readlink = nfs_readlink, .vop_reclaim = nfs_reclaim, .vop_remove = nfs_remove, .vop_rename = nfs_rename, .vop_rmdir = nfs_rmdir, .vop_setattr = nfs_setattr, .vop_strategy = nfs_strategy, .vop_symlink = nfs_symlink, .vop_write = nfs_write, }; struct vop_vector nfs_fifoops = { .vop_default = &fifo_specops, .vop_access = nfsspec_access, .vop_close = nfsfifo_close, .vop_fsync = nfs_fsync, .vop_getattr = nfs_getattr, .vop_inactive = nfs_inactive, .vop_print = nfs_print, .vop_read = nfsfifo_read, .vop_reclaim = nfs_reclaim, .vop_setattr = nfs_setattr, .vop_write = nfsfifo_write, }; static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td); static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td); static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp); /* * Global variables */ struct mtx nfs_iod_mtx; struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON]; struct nfsmount *nfs_iodmount[NFS_MAXASYNCDAEMON]; int nfs_numasync = 0; vop_advlock_t *nfs_advlock_p = nfs_dolock; vop_reclaim_t *nfs_reclaim_p = NULL; #define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1)) SYSCTL_DECL(_vfs_nfs); static int nfsaccess_cache_timeout = NFS_MAXATTRTIMO; SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_timeout, CTLFLAG_RW, &nfsaccess_cache_timeout, 0, "NFS ACCESS cache timeout"); static int nfsv3_commit_on_close = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfsv3_commit_on_close, CTLFLAG_RW, &nfsv3_commit_on_close, 0, "write+commit on close, else only write"); static int nfs_clean_pages_on_close = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, clean_pages_on_close, CTLFLAG_RW, &nfs_clean_pages_on_close, 0, "NFS clean dirty pages on close"); int nfs_directio_enable = 0; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_enable, CTLFLAG_RW, &nfs_directio_enable, 0, "Enable NFS directio"); /* * This sysctl allows other processes to mmap a file that has been opened * O_DIRECT by a process. In general, having processes mmap the file while * Direct IO is in progress can lead to Data Inconsistencies. But, we allow * this by default to prevent DoS attacks - to prevent a malicious user from * opening up files O_DIRECT preventing other users from mmap'ing these * files. "Protected" environments where stricter consistency guarantees are * required can disable this knob. The process that opened the file O_DIRECT * cannot mmap() the file, because mmap'ed IO on an O_DIRECT open() is not * meaningful. */ int nfs_directio_allow_mmap = 1; SYSCTL_INT(_vfs_nfs, OID_AUTO, nfs_directio_allow_mmap, CTLFLAG_RW, &nfs_directio_allow_mmap, 0, "Enable mmaped IO on file with O_DIRECT opens"); #if 0 SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_hits, CTLFLAG_RD, &nfsstats.accesscache_hits, 0, "NFS ACCESS cache hit count"); SYSCTL_INT(_vfs_nfs, OID_AUTO, access_cache_misses, CTLFLAG_RD, &nfsstats.accesscache_misses, 0, "NFS ACCESS cache miss count"); #endif #define NFSV3ACCESS_ALL (NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY \ | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE \ | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP) /* * SMP Locking Note : * The list of locks after the description of the lock is the ordering * of other locks acquired with the lock held. * np->n_mtx : Protects the fields in the nfsnode. VM Object Lock VI_MTX (acquired indirectly) * nmp->nm_mtx : Protects the fields in the nfsmount. rep->r_mtx * nfs_iod_mtx : Global lock, protects shared nfsiod state. * nfs_reqq_mtx : Global lock, protects the nfs_reqq list. nmp->nm_mtx rep->r_mtx * rep->r_mtx : Protects the fields in an nfsreq. */ static int nfs3_access_otw(struct vnode *vp, int wmode, struct thread *td, struct ucred *cred) { const int v3 = 1; u_int32_t *tl; int error = 0, attrflag; struct mbuf *mreq, *mrep, *md, *mb; caddr_t bpos, dpos; u_int32_t rmode; struct nfsnode *np = VTONFS(vp); nfsstats.rpccnt[NFSPROC_ACCESS]++; mreq = nfsm_reqhead(vp, NFSPROC_ACCESS, NFSX_FH(v3) + NFSX_UNSIGNED); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(wmode); nfsm_request(vp, NFSPROC_ACCESS, td, cred); nfsm_postop_attr(vp, attrflag); if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); rmode = fxdr_unsigned(u_int32_t, *tl); mtx_lock(&np->n_mtx); np->n_mode = rmode; np->n_modeuid = cred->cr_uid; np->n_modestamp = time_second; mtx_unlock(&np->n_mtx); } m_freem(mrep); nfsmout: return (error); } /* * nfs access vnode op. * For nfs version 2, just return ok. File accesses may fail later. * For nfs version 3, use the access rpc to check accessibility. If file modes * are changed on the server, accesses might still fail later. */ static int nfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; int error = 0; u_int32_t mode, wmode; int v3 = NFS_ISV3(vp); struct nfsnode *np = VTONFS(vp); /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((ap->a_accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } /* * For nfs v3, check to see if we have done this recently, and if * so return our cached result instead of making an ACCESS call. * If not, do an access rpc, otherwise you are stuck emulating * ufs_access() locally using the vattr. This may not be correct, * since the server may apply other access criteria such as * client uid-->server uid mapping that we do not know about. */ if (v3) { if (ap->a_accmode & VREAD) mode = NFSV3ACCESS_READ; else mode = 0; if (vp->v_type != VDIR) { if (ap->a_accmode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND); if (ap->a_accmode & VEXEC) mode |= NFSV3ACCESS_EXECUTE; } else { if (ap->a_accmode & VWRITE) mode |= (NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_DELETE); if (ap->a_accmode & VEXEC) mode |= NFSV3ACCESS_LOOKUP; } /* XXX safety belt, only make blanket request if caching */ if (nfsaccess_cache_timeout > 0) { wmode = NFSV3ACCESS_READ | NFSV3ACCESS_MODIFY | NFSV3ACCESS_EXTEND | NFSV3ACCESS_EXECUTE | NFSV3ACCESS_DELETE | NFSV3ACCESS_LOOKUP; } else { wmode = mode; } /* * Does our cached result allow us to give a definite yes to * this request? */ mtx_lock(&np->n_mtx); if ((time_second < (np->n_modestamp + nfsaccess_cache_timeout)) && (ap->a_cred->cr_uid == np->n_modeuid) && ((np->n_mode & mode) == mode)) { nfsstats.accesscache_hits++; } else { /* * Either a no, or a don't know. Go to the wire. */ nfsstats.accesscache_misses++; mtx_unlock(&np->n_mtx); error = nfs3_access_otw(vp, wmode, ap->a_td,ap->a_cred); mtx_lock(&np->n_mtx); if (!error) { if ((np->n_mode & mode) != mode) { error = EACCES; } } } mtx_unlock(&np->n_mtx); return (error); } else { if ((error = nfsspec_access(ap)) != 0) { return (error); } /* * Attempt to prevent a mapped root from accessing a file * which it shouldn't. We try to read a byte from the file * if the user is root and the file is not zero length. * After calling nfsspec_access, we should have the correct * file size cached. */ mtx_lock(&np->n_mtx); if (ap->a_cred->cr_uid == 0 && (ap->a_accmode & VREAD) && VTONFS(vp)->n_size > 0) { struct iovec aiov; struct uio auio; char buf[1]; mtx_unlock(&np->n_mtx); aiov.iov_base = buf; aiov.iov_len = 1; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_resid = 1; auio.uio_segflg = UIO_SYSSPACE; auio.uio_rw = UIO_READ; auio.uio_td = ap->a_td; if (vp->v_type == VREG) error = nfs_readrpc(vp, &auio, ap->a_cred); else if (vp->v_type == VDIR) { char* bp; bp = malloc(NFS_DIRBLKSIZ, M_TEMP, M_WAITOK); aiov.iov_base = bp; aiov.iov_len = auio.uio_resid = NFS_DIRBLKSIZ; error = nfs_readdirrpc(vp, &auio, ap->a_cred); free(bp, M_TEMP); } else if (vp->v_type == VLNK) error = nfs_readlinkrpc(vp, &auio, ap->a_cred); else error = EACCES; } else mtx_unlock(&np->n_mtx); return (error); } } int nfs_otw_getattr_avoid = 0; /* * nfs open vnode op * Check to see if the type is ok * and that deletion is not in progress. * For paged in text files, you will need to flush the page cache * if consistency is lost. */ /* ARGSUSED */ static int nfs_open(struct vop_open_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; int error; int fmode = ap->a_mode; if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK) return (EOPNOTSUPP); /* * Get a valid lease. If cached data is stale, flush it. */ mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) return (error); np->n_attrstamp = 0; if (vp->v_type == VDIR) np->n_direofoffset = 0; error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return (error); mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; mtx_unlock(&np->n_mtx); } else { struct thread *td = curthread; if (np->n_ac_ts_syscalls != td->td_syscalls || np->n_ac_ts_tid != td->td_tid || td->td_proc == NULL || np->n_ac_ts_pid != td->td_proc->p_pid) { np->n_attrstamp = 0; } mtx_unlock(&np->n_mtx); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return (error); mtx_lock(&np->n_mtx); if (NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { if (vp->v_type == VDIR) np->n_direofoffset = 0; mtx_unlock(&np->n_mtx); error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error == EINTR || error == EIO) { return (error); } mtx_lock(&np->n_mtx); np->n_mtime = vattr.va_mtime; } mtx_unlock(&np->n_mtx); } /* * If the object has >= 1 O_DIRECT active opens, we disable caching. */ if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { if (np->n_directio_opens == 0) { error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); if (error) return (error); mtx_lock(&np->n_mtx); np->n_flag |= NNONCACHE; mtx_unlock(&np->n_mtx); } np->n_directio_opens++; } vnode_create_vobject(vp, vattr.va_size, ap->a_td); return (0); } /* * nfs close vnode op * What an NFS client should do upon close after writing is a debatable issue. * Most NFS clients push delayed writes to the server upon close, basically for * two reasons: * 1 - So that any write errors may be reported back to the client process * doing the close system call. By far the two most likely errors are * NFSERR_NOSPC and NFSERR_DQUOT to indicate space allocation failure. * 2 - To put a worst case upper bound on cache inconsistency between * multiple clients for the file. * There is also a consistency problem for Version 2 of the protocol w.r.t. * not being able to tell if other clients are writing a file concurrently, * since there is no way of knowing if the changed modify time in the reply * is only due to the write for this client. * (NFS Version 3 provides weak cache consistency data in the reply that * should be sufficient to detect and handle this case.) * * The current code does the following: * for NFS Version 2 - play it safe and flush/invalidate all dirty buffers * for NFS Version 3 - flush dirty buffers to the server but don't invalidate * or commit them (this satisfies 1 and 2 except for the * case where the server crashes after this close but * before the commit RPC, which is felt to be "good * enough". Changing the last argument to nfs_flush() to * a 1 would force a commit operation, if it is felt a * commit is necessary now. */ /* ARGSUSED */ static int nfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); int error = 0; int fmode = ap->a_fflag; if (vp->v_type == VREG) { /* * Examine and clean dirty pages, regardless of NMODIFIED. * This closes a major hole in close-to-open consistency. * We want to push out all dirty pages (and buffers) on * close, regardless of whether they were dirtied by * mmap'ed writes or via write(). */ if (nfs_clean_pages_on_close && vp->v_object) { VM_OBJECT_LOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_UNLOCK(vp->v_object); } mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { mtx_unlock(&np->n_mtx); if (NFS_ISV3(vp)) { /* * Under NFSv3 we have dirty buffers to dispose of. We * must flush them to the NFS server. We have the option * of waiting all the way through the commit rpc or just * waiting for the initial write. The default is to only * wait through the initial write so the data is in the * server's cache, which is roughly similar to the state * a standard disk subsystem leaves the file in on close(). * * We cannot clear the NMODIFIED bit in np->n_flag due to * potential races with other processes, and certainly * cannot clear it if we don't commit. */ int cm = nfsv3_commit_on_close ? 1 : 0; error = nfs_flush(vp, MNT_WAIT, cm); /* np->n_flag &= ~NMODIFIED; */ } else error = nfs_vinvalbuf(vp, V_SAVE, ap->a_td, 1); mtx_lock(&np->n_mtx); } /* * Invalidate the attribute cache in all cases. * An open is going to fetch fresh attrs any way, other procs * on this node that have file open will be forced to do an * otw attr fetch, but this is safe. */ np->n_attrstamp = 0; if (np->n_flag & NWRITEERR) { np->n_flag &= ~NWRITEERR; error = np->n_error; } mtx_unlock(&np->n_mtx); } if (nfs_directio_enable) KASSERT((np->n_directio_asyncwr == 0), ("nfs_close: dirty unflushed (%d) directio buffers\n", np->n_directio_asyncwr)); if (nfs_directio_enable && (fmode & O_DIRECT) && (vp->v_type == VREG)) { mtx_lock(&np->n_mtx); KASSERT((np->n_directio_opens > 0), ("nfs_close: unexpectedly value (0) of n_directio_opens\n")); np->n_directio_opens--; if (np->n_directio_opens == 0) np->n_flag &= ~NNONCACHE; mtx_unlock(&np->n_mtx); } return (error); } /* * nfs getattr call from vfs. */ static int nfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct thread *td = curthread; struct vattr *vap = ap->a_vap; struct vattr vattr; caddr_t bpos, dpos; int error = 0; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); /* * Update local times for special files. */ mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) np->n_flag |= NCHG; mtx_unlock(&np->n_mtx); /* * First look in the cache. */ if (nfs_getattrcache(vp, &vattr) == 0) goto nfsmout; if (v3 && nfsaccess_cache_timeout > 0) { nfsstats.accesscache_misses++; nfs3_access_otw(vp, NFSV3ACCESS_ALL, td, ap->a_cred); if (nfs_getattrcache(vp, &vattr) == 0) goto nfsmout; } nfsstats.rpccnt[NFSPROC_GETATTR]++; mreq = nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_GETATTR, td, ap->a_cred); if (!error) { nfsm_loadattr(vp, &vattr); } m_freem(mrep); nfsmout: vap->va_type = vattr.va_type; vap->va_mode = vattr.va_mode; vap->va_nlink = vattr.va_nlink; vap->va_uid = vattr.va_uid; vap->va_gid = vattr.va_gid; vap->va_fsid = vattr.va_fsid; vap->va_fileid = vattr.va_fileid; vap->va_size = vattr.va_size; vap->va_blocksize = vattr.va_blocksize; vap->va_atime = vattr.va_atime; vap->va_mtime = vattr.va_mtime; vap->va_ctime = vattr.va_ctime; vap->va_gen = vattr.va_gen; vap->va_flags = vattr.va_flags; vap->va_rdev = vattr.va_rdev; vap->va_bytes = vattr.va_bytes; vap->va_filerev = vattr.va_filerev; return (error); } /* * nfs setattr call. */ static int nfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr *vap = ap->a_vap; struct thread *td = curthread; int error = 0; u_quad_t tsize; #ifndef nolint tsize = (u_quad_t)0; #endif /* * Setting of flags is not supported. */ if (vap->va_flags != VNOVAL) return (EOPNOTSUPP); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { error = EROFS; goto out; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VCHR: case VBLK: case VSOCK: case VFIFO: if (vap->va_mtime.tv_sec == VNOVAL && vap->va_atime.tv_sec == VNOVAL && vap->va_mode == (mode_t)VNOVAL && vap->va_uid == (uid_t)VNOVAL && vap->va_gid == (gid_t)VNOVAL) return (0); vap->va_size = VNOVAL; break; default: /* * Disallow write attempts if the filesystem is * mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * We run vnode_pager_setsize() early (why?), * we must set np->n_size now to avoid vinvalbuf * V_SAVE races that might setsize a lower * value. */ mtx_lock(&np->n_mtx); tsize = np->n_size; mtx_unlock(&np->n_mtx); error = nfs_meta_setsize(vp, ap->a_cred, td, vap->va_size); mtx_lock(&np->n_mtx); if (np->n_flag & NMODIFIED) { tsize = np->n_size; mtx_unlock(&np->n_mtx); if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, td, 1); else error = nfs_vinvalbuf(vp, V_SAVE, td, 1); if (error) { vnode_pager_setsize(vp, tsize); goto out; } } else mtx_unlock(&np->n_mtx); /* * np->n_size has already been set to vap->va_size * in nfs_meta_setsize(). We must set it again since * nfs_loadattrcache() could be called through * nfs_meta_setsize() and could modify np->n_size. */ mtx_lock(&np->n_mtx); np->n_vattr.va_size = np->n_size = vap->va_size; mtx_unlock(&np->n_mtx); }; } else { mtx_lock(&np->n_mtx); if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && vp->v_type == VREG) { mtx_unlock(&np->n_mtx); if ((error = nfs_vinvalbuf(vp, V_SAVE, td, 1)) != 0 && (error == EINTR || error == EIO)) return error; } else mtx_unlock(&np->n_mtx); } error = nfs_setattrrpc(vp, vap, ap->a_cred); if (error && vap->va_size != VNOVAL) { mtx_lock(&np->n_mtx); np->n_size = np->n_vattr.va_size = tsize; vnode_pager_setsize(vp, tsize); mtx_unlock(&np->n_mtx); } out: return (error); } /* * Do an nfs setattr rpc. */ static int nfs_setattrrpc(struct vnode *vp, struct vattr *vap, struct ucred *cred) { struct nfsv2_sattr *sp; struct nfsnode *np = VTONFS(vp); caddr_t bpos, dpos; u_int32_t *tl; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_SETATTR]++; mreq = nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH(v3) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { nfsm_v3attrbuild(vap, TRUE); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = nfs_false; } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); if (vap->va_mode == (mode_t)VNOVAL) sp->sa_mode = nfs_xdrneg1; else sp->sa_mode = vtonfsv2_mode(vp->v_type, vap->va_mode); if (vap->va_uid == (uid_t)VNOVAL) sp->sa_uid = nfs_xdrneg1; else sp->sa_uid = txdr_unsigned(vap->va_uid); if (vap->va_gid == (gid_t)VNOVAL) sp->sa_gid = nfs_xdrneg1; else sp->sa_gid = txdr_unsigned(vap->va_gid); sp->sa_size = txdr_unsigned(vap->va_size); txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(vp, NFSPROC_SETATTR, curthread, cred); if (v3) { np->n_modestamp = 0; nfsm_wcc_data(vp, wccflag); } else nfsm_loadattr(vp, NULL); m_freem(mrep); nfsmout: return (error); } /* * nfs lookup call, one step at a time... * First look in cache * If not found, unlock the directory nfsnode and do the rpc */ static int nfs_lookup(struct vop_lookup_args *ap) { struct componentname *cnp = ap->a_cnp; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; int flags = cnp->cn_flags; struct vnode *newvp; struct nfsmount *nmp; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; long len; nfsfh_t *fhp; struct nfsnode *np; int error = 0, attrflag, fhsize; int v3 = NFS_ISV3(dvp); struct thread *td = cnp->cn_thread; *vpp = NULLVP; if ((flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) return (EROFS); if (dvp->v_type != VDIR) return (ENOTDIR); nmp = VFSTONFS(dvp->v_mount); np = VTONFS(dvp); if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) { *vpp = NULLVP; return (error); } error = cache_lookup(dvp, vpp, cnp); if (error > 0 && error != ENOENT) return (error); if (error == -1) { struct vattr vattr; newvp = *vpp; if (!VOP_GETATTR(newvp, &vattr, cnp->cn_cred) && vattr.va_ctime.tv_sec == VTONFS(newvp)->n_ctime) { nfsstats.lookupcache_hits++; if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; return (0); } cache_purge(newvp); if (dvp != newvp) vput(newvp); else vrele(newvp); *vpp = NULLVP; } error = 0; newvp = NULLVP; nfsstats.lookupcache_misses++; nfsstats.rpccnt[NFSPROC_LOOKUP]++; len = cnp->cn_namelen; mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_thread, cnp->cn_cred); if (error) { if (v3) { nfsm_postop_attr(dvp, attrflag); m_freem(mrep); } goto nfsmout; } nfsm_getfh(fhp, fhsize, v3); /* * Handle RENAME case... */ if (cnp->cn_nameiop == RENAME && (flags & ISLASTCN)) { if (NFS_CMPFH(np, fhp, fhsize)) { m_freem(mrep); return (EISDIR); } error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, NULL); *vpp = newvp; m_freem(mrep); cnp->cn_flags |= SAVENAME; return (0); } if (flags & ISDOTDOT) { VOP_UNLOCK(dvp, 0); error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); if (error) return (error); newvp = NFSTOV(np); } else if (NFS_CMPFH(np, fhp, fhsize)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, fhp, fhsize, &np, cnp->cn_lkflags); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); nfsm_postop_attr(dvp, attrflag); } else nfsm_loadattr(newvp, NULL); if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; if ((cnp->cn_flags & MAKEENTRY) && (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) { np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(dvp, newvp, cnp); } *vpp = newvp; m_freem(mrep); nfsmout: if (error) { if (newvp != NULLVP) { vput(newvp); *vpp = NULLVP; } if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) && (flags & ISLASTCN) && error == ENOENT) { if (dvp->v_mount->mnt_flag & MNT_RDONLY) error = EROFS; else error = EJUSTRETURN; } if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN)) cnp->cn_flags |= SAVENAME; } return (error); } /* * nfs read call. * Just call nfs_bioread() to do the work. */ static int nfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; switch (vp->v_type) { case VREG: return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred)); case VDIR: return (EISDIR); default: return (EOPNOTSUPP); } } /* * nfs readlink call */ static int nfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; if (vp->v_type != VLNK) return (EINVAL); return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred)); } /* * Do a readlink rpc. * Called by nfs_doio() from below the buffer cache. */ int nfs_readlinkrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { caddr_t bpos, dpos; int error = 0, len, attrflag; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_READLINK]++; mreq = nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_request(vp, NFSPROC_READLINK, uiop->uio_td, cred); if (v3) nfsm_postop_attr(vp, attrflag); if (!error) { nfsm_strsiz(len, NFS_MAXPATHLEN); if (len == NFS_MAXPATHLEN) { struct nfsnode *np = VTONFS(vp); mtx_lock(&np->n_mtx); if (np->n_size && np->n_size < NFS_MAXPATHLEN) len = np->n_size; mtx_unlock(&np->n_mtx); } nfsm_mtouio(uiop, len); } m_freem(mrep); nfsmout: return (error); } /* * nfs read rpc call * Ditto above */ int nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { u_int32_t *tl; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; struct nfsmount *nmp; int error = 0, len, retlen, tsiz, eof, attrflag; int v3 = NFS_ISV3(vp); int rsize; #ifndef nolint eof = 0; #endif nmp = VFSTONFS(vp->v_mount); tsiz = uiop->uio_resid; mtx_lock(&nmp->nm_mtx); if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { mtx_unlock(&nmp->nm_mtx); return (EFBIG); } rsize = nmp->nm_rsize; mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_READ]++; len = (tsiz > rsize) ? rsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH(v3) + NFSX_UNSIGNED * 3); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED * 3); if (v3) { txdr_hyper(uiop->uio_offset, tl); *(tl + 2) = txdr_unsigned(len); } else { *tl++ = txdr_unsigned(uiop->uio_offset); *tl++ = txdr_unsigned(len); *tl = 0; } nfsm_request(vp, NFSPROC_READ, uiop->uio_td, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); eof = fxdr_unsigned(int, *(tl + 1)); } else { nfsm_loadattr(vp, NULL); } nfsm_strsiz(retlen, rsize); nfsm_mtouio(uiop, retlen); m_freem(mrep); tsiz -= retlen; if (v3) { if (eof || retlen == 0) { tsiz = 0; } } else if (retlen < len) { tsiz = 0; } } nfsmout: return (error); } /* * nfs write call */ int nfs_writerpc(struct vnode *vp, struct uio *uiop, struct ucred *cred, int *iomode, int *must_commit) { u_int32_t *tl; int32_t backup; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, len, tsiz, wccflag = NFSV3_WCCRATTR, rlen, commit; int v3 = NFS_ISV3(vp), committed = NFSV3WRITE_FILESYNC; int wsize; #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1) panic("nfs: writerpc iovcnt > 1"); #endif *must_commit = 0; tsiz = uiop->uio_resid; mtx_lock(&nmp->nm_mtx); if (uiop->uio_offset + tsiz > nmp->nm_maxfilesize) { mtx_unlock(&nmp->nm_mtx); return (EFBIG); } wsize = nmp->nm_wsize; mtx_unlock(&nmp->nm_mtx); while (tsiz > 0) { nfsstats.rpccnt[NFSPROC_WRITE]++; len = (tsiz > wsize) ? wsize : tsiz; mreq = nfsm_reqhead(vp, NFSPROC_WRITE, NFSX_FH(v3) + 5 * NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); txdr_hyper(uiop->uio_offset, tl); tl += 2; *tl++ = txdr_unsigned(len); *tl++ = txdr_unsigned(*iomode); *tl = txdr_unsigned(len); } else { u_int32_t x; tl = nfsm_build(u_int32_t *, 4 * NFSX_UNSIGNED); /* Set both "begin" and "current" to non-garbage. */ x = txdr_unsigned((u_int32_t)uiop->uio_offset); *tl++ = x; /* "begin offset" */ *tl++ = x; /* "current offset" */ x = txdr_unsigned(len); *tl++ = x; /* total to this offset */ *tl = x; /* size of this write */ } nfsm_uiotom(uiop, len); nfsm_request(vp, NFSPROC_WRITE, uiop->uio_td, cred); if (v3) { wccflag = NFSV3_WCCCHK; nfsm_wcc_data(vp, wccflag); if (!error) { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED + NFSX_V3WRITEVERF); rlen = fxdr_unsigned(int, *tl++); if (rlen == 0) { error = NFSERR_IO; m_freem(mrep); break; } else if (rlen < len) { backup = len - rlen; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base - backup; uiop->uio_iov->iov_len += backup; uiop->uio_offset -= backup; uiop->uio_resid += backup; len = rlen; } commit = fxdr_unsigned(int, *tl++); /* * Return the lowest committment level * obtained by any of the RPCs. */ if (committed == NFSV3WRITE_FILESYNC) committed = commit; else if (committed == NFSV3WRITE_DATASYNC && commit == NFSV3WRITE_UNSTABLE) committed = commit; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0){ bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); nmp->nm_state |= NFSSTA_HASWRITEVERF; } else if (bcmp((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF)) { *must_commit = 1; bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); } mtx_unlock(&nmp->nm_mtx); } } else { nfsm_loadattr(vp, NULL); } if (wccflag) { mtx_lock(&(VTONFS(vp))->n_mtx); VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime; mtx_unlock(&(VTONFS(vp))->n_mtx); } m_freem(mrep); if (error) break; tsiz -= len; } nfsmout: if (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) committed = NFSV3WRITE_FILESYNC; *iomode = committed; if (error) uiop->uio_resid = tsiz; return (error); } /* * nfs mknod rpc * For NFS v2 this is a kludge. Use a create rpc but with the IFMT bits of the * mode set to specify the file type and the size field for rdev. */ static int nfs_mknodrpc(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap) { struct nfsv2_sattr *sp; u_int32_t *tl; struct vnode *newvp = NULL; struct nfsnode *np = NULL; struct vattr vattr; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb; u_int32_t rdev; int v3 = NFS_ISV3(dvp); if (vap->va_type == VCHR || vap->va_type == VBLK) rdev = txdr_unsigned(vap->va_rdev); else if (vap->va_type == VFIFO || vap->va_type == VSOCK) rdev = nfs_xdrneg1; else { return (EOPNOTSUPP); } if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); nfsstats.rpccnt[NFSPROC_MKNOD]++; mreq = nfsm_reqhead(dvp, NFSPROC_MKNOD, NFSX_FH(v3) + 4 * NFSX_UNSIGNED + + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl++ = vtonfsv3_type(vap->va_type); nfsm_v3attrbuild(vap, FALSE); if (vap->va_type == VCHR || vap->va_type == VBLK) { tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); - *tl++ = txdr_unsigned(umajor(vap->va_rdev)); - *tl = txdr_unsigned(uminor(vap->va_rdev)); + *tl++ = txdr_unsigned(major(vap->va_rdev)); + *tl = txdr_unsigned(minor(vap->va_rdev)); } } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = rdev; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKNOD, cnp->cn_thread, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = NULL; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: if (error) { if (newvp) vput(newvp); } else { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs mknod vop * just call nfs_mknodrpc() to do the work. */ /* ARGSUSED */ static int nfs_mknod(struct vop_mknod_args *ap) { return (nfs_mknodrpc(ap->a_dvp, ap->a_vpp, ap->a_cnp, ap->a_vap)); } static u_long create_verf; /* * nfs file create call */ static int nfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; u_int32_t *tl; struct nfsnode *np = NULL; struct vnode *newvp = NULL; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, gotvp = 0, fmode = 0; struct mbuf *mreq, *mrep, *md, *mb; struct vattr vattr; int v3 = NFS_ISV3(dvp); /* * Oops, not for me.. */ if (vap->va_type == VSOCK) return (nfs_mknodrpc(dvp, ap->a_vpp, cnp, vap)); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); if (vap->va_vaflags & VA_EXCLUSIVE) fmode |= O_EXCL; again: nfsstats.rpccnt[NFSPROC_CREATE]++; mreq = nfsm_reqhead(dvp, NFSPROC_CREATE, NFSX_FH(v3) + 2 * NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); if (fmode & O_EXCL) { CURVNET_SET(VFSTONFS(dvp->v_mount)->nm_so->so_vnet); *tl = txdr_unsigned(NFSV3CREATE_EXCLUSIVE); tl = nfsm_build(u_int32_t *, NFSX_V3CREATEVERF); #ifdef INET INIT_VNET_INET(curvnet); if (!TAILQ_EMPTY(&V_in_ifaddrhead)) *tl++ = IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr.s_addr; else #endif *tl++ = create_verf; *tl = ++create_verf; CURVNET_RESTORE(); } else { *tl = txdr_unsigned(NFSV3CREATE_UNCHECKED); nfsm_v3attrbuild(vap, FALSE); } } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = 0; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_thread, cnp->cn_cred); if (!error) { nfsm_mtofh(dvp, newvp, v3, gotvp); if (!gotvp) { if (newvp) { vput(newvp); newvp = NULL; } error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } } if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: if (error) { if (v3 && (fmode & O_EXCL) && error == NFSERR_NOTSUPP) { fmode &= ~O_EXCL; goto again; } if (newvp) vput(newvp); } else if (v3 && (fmode & O_EXCL)) { /* * We are normally called with only a partially initialized * VAP. Since the NFSv3 spec says that server may use the * file attributes to store the verifier, the spec requires * us to do a SETATTR RPC. FreeBSD servers store the verifier * in atime, but we can't really assume that all servers will * so we ensure that our SETATTR sets both atime and mtime. */ if (vap->va_mtime.tv_sec == VNOVAL) vfs_timestamp(&vap->va_mtime); if (vap->va_atime.tv_sec == VNOVAL) vap->va_atime = vap->va_mtime; error = nfs_setattrrpc(newvp, vap, cnp->cn_cred); if (error) vput(newvp); } if (!error) { if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, newvp, cnp); *ap->a_vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs file remove call * To try and make nfs semantics closer to ufs semantics, a file that has * other processes using the vnode is renamed instead of removed and then * removed later on the last close. * - If v_usecount > 1 * If a rename is not already in the works * call nfs_sillyrename() to set it up * else * do the remove rpc */ static int nfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nfsnode *np = VTONFS(vp); int error = 0; struct vattr vattr; #ifndef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("nfs_remove: no name"); if (vrefcnt(vp) < 1) panic("nfs_remove: bad v_usecount"); #endif if (vp->v_type == VDIR) error = EPERM; else if (vrefcnt(vp) == 1 || (np->n_sillyrename && !VOP_GETATTR(vp, &vattr, cnp->cn_cred) && vattr.va_nlink > 1)) { /* * Purge the name cache so that the chance of a lookup for * the name succeeding while the remove is in progress is * minimized. Without node locking it can still happen, such * that an I/O op returns ESTALE, but since you get this if * another host removes the file.. */ cache_purge(vp); /* * throw away biocache buffers, mainly to avoid * unnecessary delayed writes later. */ error = nfs_vinvalbuf(vp, 0, cnp->cn_thread, 1); /* Do the rpc */ if (error != EINTR && error != EIO) error = nfs_removerpc(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread); /* * Kludge City: If the first reply to the remove rpc is lost.. * the reply to the retransmitted request will be ENOENT * since the file was in fact removed * Therefore, we cheat and return success. */ if (error == ENOENT) error = 0; } else if (!np->n_sillyrename) error = nfs_sillyrename(dvp, vp, cnp); np->n_attrstamp = 0; return (error); } /* * nfs file remove rpc called from nfs_inactive */ int nfs_removeit(struct sillyrename *sp) { /* * Make sure that the directory vnode is still valid. * XXX we should lock sp->s_dvp here. */ if (sp->s_dvp->v_type == VBAD) return (0); return (nfs_removerpc(sp->s_dvp, sp->s_name, sp->s_namlen, sp->s_cred, NULL)); } /* * Nfs remove rpc, called from nfs_remove() and nfs_removeit(). */ static int nfs_removerpc(struct vnode *dvp, const char *name, int namelen, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_REMOVE]++; mreq = nfsm_reqhead(dvp, NFSPROC_REMOVE, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(name, namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_REMOVE, td, cred); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; mtx_unlock(&(VTONFS(dvp))->n_mtx); return (error); } /* * nfs file rename call */ static int nfs_rename(struct vop_rename_args *ap) { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; int error; #ifndef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("nfs_rename: no name"); #endif /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (fvp == tvp) { nfs_printf("nfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto out; } if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto out; /* * We have to flush B_DELWRI data prior to renaming * the file. If we don't, the delayed-write buffers * can be flushed out later after the file has gone stale * under NFSV3. NFSV2 does not have this problem because * ( as far as I can tell ) it flushes dirty buffers more * often. * * Skip the rename operation if the fsync fails, this can happen * due to the server's volume being full, when we pushed out data * that was written back to our cache earlier. Not checking for * this condition can result in potential (silent) data loss. */ error = VOP_FSYNC(fvp, MNT_WAIT, fcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (!error && tvp) error = VOP_FSYNC(tvp, MNT_WAIT, tcnp->cn_thread); if (error) goto out; /* * If the tvp exists and is in use, sillyrename it before doing the * rename of the new file over it. * XXX Can't sillyrename a directory. */ if (tvp && vrefcnt(tvp) > 1 && !VTONFS(tvp)->n_sillyrename && tvp->v_type != VDIR && !nfs_sillyrename(tdvp, tvp, tcnp)) { vput(tvp); tvp = NULL; } error = nfs_renamerpc(fdvp, fcnp->cn_nameptr, fcnp->cn_namelen, tdvp, tcnp->cn_nameptr, tcnp->cn_namelen, tcnp->cn_cred, tcnp->cn_thread); if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); /* * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs file rename rpc called from nfs_remove() above */ static int nfs_renameit(struct vnode *sdvp, struct componentname *scnp, struct sillyrename *sp) { return (nfs_renamerpc(sdvp, scnp->cn_nameptr, scnp->cn_namelen, sdvp, sp->s_name, sp->s_namlen, scnp->cn_cred, scnp->cn_thread)); } /* * Do an nfs rename rpc. Called from nfs_rename() and nfs_renameit(). */ static int nfs_renamerpc(struct vnode *fdvp, const char *fnameptr, int fnamelen, struct vnode *tdvp, const char *tnameptr, int tnamelen, struct ucred *cred, struct thread *td) { caddr_t bpos, dpos; int error = 0, fwccflag = NFSV3_WCCRATTR, twccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(fdvp); nfsstats.rpccnt[NFSPROC_RENAME]++; mreq = nfsm_reqhead(fdvp, NFSPROC_RENAME, (NFSX_FH(v3) + NFSX_UNSIGNED)*2 + nfsm_rndup(fnamelen) + nfsm_rndup(tnamelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(fdvp, v3); nfsm_strtom(fnameptr, fnamelen, NFS_MAXNAMLEN); nfsm_fhtom(tdvp, v3); nfsm_strtom(tnameptr, tnamelen, NFS_MAXNAMLEN); nfsm_request(fdvp, NFSPROC_RENAME, td, cred); if (v3) { nfsm_wcc_data(fdvp, fwccflag); nfsm_wcc_data(tdvp, twccflag); } m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(fdvp))->n_mtx); VTONFS(fdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(fdvp))->n_mtx); mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!fwccflag) VTONFS(fdvp)->n_attrstamp = 0; if (!twccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs hard link create call */ static int nfs_link(struct vop_link_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *tdvp = ap->a_tdvp; struct componentname *cnp = ap->a_cnp; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR, attrflag = 0; struct mbuf *mreq, *mrep, *md, *mb; int v3; if (vp->v_mount != tdvp->v_mount) { return (EXDEV); } /* * Push all writes to the server, so that the attribute cache * doesn't get "out of sync" with the server. * XXX There should be a better way! */ VOP_FSYNC(vp, MNT_WAIT, cnp->cn_thread); v3 = NFS_ISV3(vp); nfsstats.rpccnt[NFSPROC_LINK]++; mreq = nfsm_reqhead(vp, NFSPROC_LINK, NFSX_FH(v3)*2 + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); nfsm_fhtom(tdvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(vp, NFSPROC_LINK, cnp->cn_thread, cnp->cn_cred); if (v3) { nfsm_postop_attr(vp, attrflag); nfsm_wcc_data(tdvp, wccflag); } m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(tdvp))->n_mtx); VTONFS(tdvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(tdvp))->n_mtx); if (!attrflag) VTONFS(vp)->n_attrstamp = 0; if (!wccflag) VTONFS(tdvp)->n_attrstamp = 0; return (error); } /* * nfs symbolic link create call */ static int nfs_symlink(struct vop_symlink_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; caddr_t bpos, dpos; int slen, error = 0, wccflag = NFSV3_WCCRATTR, gotvp; struct mbuf *mreq, *mrep, *md, *mb; struct vnode *newvp = NULL; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_SYMLINK]++; slen = strlen(ap->a_target); mreq = nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH(v3) + 2*NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen) + nfsm_rndup(slen) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); if (v3) { nfsm_v3attrbuild(vap, FALSE); } nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN); if (!v3) { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VLNK, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = nfs_xdrneg1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } /* * Issue the NFS request and get the rpc response. * * Only NFSv3 responses returning an error of 0 actually return * a file handle that can be converted into newvp without having * to do an extra lookup rpc. */ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_thread, cnp->cn_cred); if (v3) { if (error == 0) nfsm_mtofh(dvp, newvp, v3, gotvp); nfsm_wcc_data(dvp, wccflag); } /* * out code jumps -> here, mrep is also freed. */ m_freem(mrep); nfsmout: /* * If we do not have an error and we could not extract the newvp from * the response due to the request being NFSv2, we have to do a * lookup in order to obtain a newvp to return. */ if (error == 0 && newvp == NULL) { struct nfsnode *np = NULL; error = nfs_lookitup(dvp, cnp->cn_nameptr, cnp->cn_namelen, cnp->cn_cred, cnp->cn_thread, &np); if (!error) newvp = NFSTOV(np); } if (error) { if (newvp) vput(newvp); } else { *ap->a_vpp = newvp; } mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; return (error); } /* * nfs make dir call */ static int nfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct componentname *cnp = ap->a_cnp; struct nfsv2_sattr *sp; int len; struct nfsnode *np = NULL; struct vnode *newvp = NULL; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; int gotvp = 0; struct mbuf *mreq, *mrep, *md, *mb; struct vattr vattr; int v3 = NFS_ISV3(dvp); if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred)) != 0) return (error); len = cnp->cn_namelen; nfsstats.rpccnt[NFSPROC_MKDIR]++; mreq = nfsm_reqhead(dvp, NFSPROC_MKDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len) + NFSX_SATTR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN); if (v3) { nfsm_v3attrbuild(vap, FALSE); } else { sp = nfsm_build(struct nfsv2_sattr *, NFSX_V2SATTR); sp->sa_mode = vtonfsv2_mode(VDIR, vap->va_mode); sp->sa_uid = nfs_xdrneg1; sp->sa_gid = nfs_xdrneg1; sp->sa_size = nfs_xdrneg1; txdr_nfsv2time(&vap->va_atime, &sp->sa_atime); txdr_nfsv2time(&vap->va_mtime, &sp->sa_mtime); } nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_thread, cnp->cn_cred); if (!error) nfsm_mtofh(dvp, newvp, v3, gotvp); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; if (error == 0 && newvp == NULL) { error = nfs_lookitup(dvp, cnp->cn_nameptr, len, cnp->cn_cred, cnp->cn_thread, &np); if (!error) { newvp = NFSTOV(np); if (newvp->v_type != VDIR) error = EEXIST; } } if (error) { if (newvp) vput(newvp); } else *ap->a_vpp = newvp; return (error); } /* * nfs remove directory call */ static int nfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; int v3 = NFS_ISV3(dvp); if (dvp == vp) return (EINVAL); nfsstats.rpccnt[NFSPROC_RMDIR]++; mreq = nfsm_reqhead(dvp, NFSPROC_RMDIR, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(cnp->cn_namelen)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_thread, cnp->cn_cred); if (v3) nfsm_wcc_data(dvp, wccflag); m_freem(mrep); nfsmout: mtx_lock(&(VTONFS(dvp))->n_mtx); VTONFS(dvp)->n_flag |= NMODIFIED; mtx_unlock(&(VTONFS(dvp))->n_mtx); if (!wccflag) VTONFS(dvp)->n_attrstamp = 0; cache_purge(dvp); cache_purge(vp); /* * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry. */ if (error == ENOENT) error = 0; return (error); } /* * nfs readdir call */ static int nfs_readdir(struct vop_readdir_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct uio *uio = ap->a_uio; int tresid, error = 0; struct vattr vattr; if (vp->v_type != VDIR) return(EPERM); /* * First, check for hit on the EOF offset cache */ if (np->n_direofoffset > 0 && uio->uio_offset >= np->n_direofoffset && (np->n_flag & NMODIFIED) == 0) { if (VOP_GETATTR(vp, &vattr, ap->a_cred) == 0) { mtx_lock(&np->n_mtx); if (!NFS_TIMESPEC_COMPARE(&np->n_mtime, &vattr.va_mtime)) { mtx_unlock(&np->n_mtx); nfsstats.direofcache_hits++; goto out; } else mtx_unlock(&np->n_mtx); } } /* * Call nfs_bioread() to do the real work. */ tresid = uio->uio_resid; error = nfs_bioread(vp, uio, 0, ap->a_cred); if (!error && uio->uio_resid == tresid) { nfsstats.direofcache_misses++; } out: return (error); } /* * Readdir rpc call. * Called from below the buffer cache by nfs_doio(). */ int nfs_readdirrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int len, left; struct dirent *dp = NULL; u_int32_t *tl; caddr_t cp; nfsuint64 *cookiep; caddr_t bpos, dpos; struct mbuf *mreq, *mrep, *md, *mb; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp); u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, bigenough = 1; int attrflag; int v3 = NFS_ISV3(vp); #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirrpc bad uio"); #endif /* * If there is no cookie, assume directory was stale. */ nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; nfs_dircookie_unlock(dnp); } else { nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIR]++; mreq = nfsm_reqhead(vp, NFSPROC_READDIR, NFSX_FH(v3) + NFSX_READDIR(v3)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, v3); if (v3) { tl = nfsm_build(u_int32_t *, 5 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; mtx_unlock(&dnp->n_mtx); } else { tl = nfsm_build(u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; } *tl = txdr_unsigned(nmp->nm_readdirsize); nfsm_request(vp, NFSPROC_READDIR, uiop->uio_td, cred); if (v3) { nfsm_postop_attr(vp, attrflag); if (!error) { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl; mtx_unlock(&dnp->n_mtx); } else { m_freem(mrep); goto nfsmout; } } tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { if (v3) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); fileno = fxdr_hyper(tl); len = fxdr_unsigned(int, *(tl + 2)); } else { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); fileno = fxdr_unsigned(u_quad_t, *tl++); len = fxdr_unsigned(int, *tl); } if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination */ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; /* null terminate */ uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); if (v3) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); } else { tl = nfsm_dissect(u_int32_t *, 2 * NFSX_UNSIGNED); } if (bigenough) { cookie.nfsuquad[0] = *tl++; if (v3) cookie.nfsuquad[1] = *tl++; } else if (v3) tl += 2; else tl++; more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) nfs_printf("EEK! readdirrpc resid > 0\n"); nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; nfs_dircookie_unlock(dnp); } nfsmout: return (error); } /* * NFS V3 readdir plus RPC. Used in place of nfs_readdirrpc(). */ int nfs_readdirplusrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) { int len, left; struct dirent *dp; u_int32_t *tl; caddr_t cp; struct vnode *newvp; nfsuint64 *cookiep; caddr_t bpos, dpos, dpossav1, dpossav2; struct mbuf *mreq, *mrep, *md, *mb, *mdsav1, *mdsav2; struct nameidata nami, *ndp = &nami; struct componentname *cnp = &ndp->ni_cnd; nfsuint64 cookie; struct nfsmount *nmp = VFSTONFS(vp->v_mount); struct nfsnode *dnp = VTONFS(vp), *np; nfsfh_t *fhp; u_quad_t fileno; int error = 0, tlen, more_dirs = 1, blksiz = 0, doit, bigenough = 1, i; int attrflag, fhsize; #ifndef nolint dp = NULL; #endif #ifndef DIAGNOSTIC if (uiop->uio_iovcnt != 1 || (uiop->uio_offset & (DIRBLKSIZ - 1)) || (uiop->uio_resid & (DIRBLKSIZ - 1))) panic("nfs readdirplusrpc bad uio"); #endif ndp->ni_dvp = vp; newvp = NULLVP; /* * If there is no cookie, assume directory was stale. */ nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 0); if (cookiep) { cookie = *cookiep; nfs_dircookie_unlock(dnp); } else { nfs_dircookie_unlock(dnp); return (NFSERR_BAD_COOKIE); } /* * Loop around doing readdir rpc's of size nm_readdirsize * truncated to a multiple of DIRBLKSIZ. * The stopping criteria is EOF or buffer full. */ while (more_dirs && bigenough) { nfsstats.rpccnt[NFSPROC_READDIRPLUS]++; mreq = nfsm_reqhead(vp, NFSPROC_READDIRPLUS, NFSX_FH(1) + 6 * NFSX_UNSIGNED); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, 1); tl = nfsm_build(u_int32_t *, 6 * NFSX_UNSIGNED); *tl++ = cookie.nfsuquad[0]; *tl++ = cookie.nfsuquad[1]; mtx_lock(&dnp->n_mtx); *tl++ = dnp->n_cookieverf.nfsuquad[0]; *tl++ = dnp->n_cookieverf.nfsuquad[1]; mtx_unlock(&dnp->n_mtx); *tl++ = txdr_unsigned(nmp->nm_readdirsize); *tl = txdr_unsigned(nmp->nm_rsize); nfsm_request(vp, NFSPROC_READDIRPLUS, uiop->uio_td, cred); nfsm_postop_attr(vp, attrflag); if (error) { m_freem(mrep); goto nfsmout; } tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); mtx_lock(&dnp->n_mtx); dnp->n_cookieverf.nfsuquad[0] = *tl++; dnp->n_cookieverf.nfsuquad[1] = *tl++; mtx_unlock(&dnp->n_mtx); more_dirs = fxdr_unsigned(int, *tl); /* loop thru the dir entries, doctoring them to 4bsd form */ while (more_dirs && bigenough) { tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); fileno = fxdr_hyper(tl); len = fxdr_unsigned(int, *(tl + 2)); if (len <= 0 || len > NFS_MAXNAMLEN) { error = EBADRPC; m_freem(mrep); goto nfsmout; } tlen = nfsm_rndup(len); if (tlen == len) tlen += 4; /* To ensure null termination*/ left = DIRBLKSIZ - blksiz; if ((tlen + DIRHDSIZ) > left) { dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; blksiz = 0; } if ((tlen + DIRHDSIZ) > uiop->uio_resid) bigenough = 0; if (bigenough) { dp = (struct dirent *)uiop->uio_iov->iov_base; dp->d_fileno = (int)fileno; dp->d_namlen = len; dp->d_reclen = tlen + DIRHDSIZ; dp->d_type = DT_UNKNOWN; blksiz += dp->d_reclen; if (blksiz == DIRBLKSIZ) blksiz = 0; uiop->uio_offset += DIRHDSIZ; uiop->uio_resid -= DIRHDSIZ; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + DIRHDSIZ; uiop->uio_iov->iov_len -= DIRHDSIZ; cnp->cn_nameptr = uiop->uio_iov->iov_base; cnp->cn_namelen = len; nfsm_mtouio(uiop, len); cp = uiop->uio_iov->iov_base; tlen -= len; *cp = '\0'; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + tlen; uiop->uio_iov->iov_len -= tlen; uiop->uio_offset += tlen; uiop->uio_resid -= tlen; } else nfsm_adv(nfsm_rndup(len)); tl = nfsm_dissect(u_int32_t *, 3 * NFSX_UNSIGNED); if (bigenough) { cookie.nfsuquad[0] = *tl++; cookie.nfsuquad[1] = *tl++; } else tl += 2; /* * Since the attributes are before the file handle * (sigh), we must skip over the attributes and then * come back and get them. */ attrflag = fxdr_unsigned(int, *tl); if (attrflag) { dpossav1 = dpos; mdsav1 = md; nfsm_adv(NFSX_V3FATTR); tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); doit = fxdr_unsigned(int, *tl); /* * Skip loading the attrs for "..". There's a * race between loading the attrs here and * lookups that look for the directory currently * being read (in the parent). We try to acquire * the exclusive lock on ".." here, owning the * lock on the directory being read. Lookup will * hold the lock on ".." and try to acquire the * lock on the directory being read. * * There are other ways of fixing this, one would * be to do a trylock on the ".." vnode and skip * loading the attrs on ".." if it happens to be * locked by another process. But skipping the * attrload on ".." seems the easiest option. */ if (strcmp(dp->d_name, "..") == 0) { doit = 0; /* * We've already skipped over the attrs, * skip over the filehandle. And store d_type * as VDIR. */ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(i)); dp->d_type = IFTODT(VTTOIF(VDIR)); } if (doit) { nfsm_getfh(fhp, fhsize, 1); if (NFS_CMPFH(dnp, fhp, fhsize)) { VREF(vp); newvp = vp; np = dnp; } else { error = nfs_nget(vp->v_mount, fhp, fhsize, &np, LK_EXCLUSIVE); if (error) doit = 0; else newvp = NFSTOV(np); } } if (doit && bigenough) { dpossav2 = dpos; dpos = dpossav1; mdsav2 = md; md = mdsav1; nfsm_loadattr(newvp, NULL); dpos = dpossav2; md = mdsav2; dp->d_type = IFTODT(VTTOIF(np->n_vattr.va_type)); ndp->ni_vp = newvp; /* Update n_ctime, so subsequent lookup doesn't purge entry */ np->n_ctime = np->n_vattr.va_ctime.tv_sec; cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp); } } else { /* Just skip over the file handle */ tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); fhsize = fxdr_unsigned(int, *tl); nfsm_adv(nfsm_rndup(fhsize)); } } if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = fxdr_unsigned(int, *tl); } /* * If at end of rpc data, get the eof boolean */ if (!more_dirs) { tl = nfsm_dissect(u_int32_t *, NFSX_UNSIGNED); more_dirs = (fxdr_unsigned(int, *tl) == 0); } m_freem(mrep); } /* * Fill last record, iff any, out to a multiple of DIRBLKSIZ * by increasing d_reclen for the last record. */ if (blksiz > 0) { left = DIRBLKSIZ - blksiz; dp->d_reclen += left; uiop->uio_iov->iov_base = (char *)uiop->uio_iov->iov_base + left; uiop->uio_iov->iov_len -= left; uiop->uio_offset += left; uiop->uio_resid -= left; } /* * We are now either at the end of the directory or have filled the * block. */ if (bigenough) dnp->n_direofoffset = uiop->uio_offset; else { if (uiop->uio_resid > 0) nfs_printf("EEK! readdirplusrpc resid > 0\n"); nfs_dircookie_lock(dnp); cookiep = nfs_getcookie(dnp, uiop->uio_offset, 1); *cookiep = cookie; nfs_dircookie_unlock(dnp); } nfsmout: if (newvp != NULLVP) { if (newvp == vp) vrele(newvp); else vput(newvp); newvp = NULLVP; } return (error); } /* * Silly rename. To make the NFS filesystem that is stateless look a little * more like the "ufs" a remove of an active vnode is translated to a rename * to a funny looking filename that is removed by nfs_inactive on the * nfsnode. There is the potential for another process on a different client * to create the same funny name between the nfs_lookitup() fails and the * nfs_rename() completes, but... */ static int nfs_sillyrename(struct vnode *dvp, struct vnode *vp, struct componentname *cnp) { struct sillyrename *sp; struct nfsnode *np; int error; short pid; unsigned int lticks; cache_purge(dvp); np = VTONFS(vp); #ifndef DIAGNOSTIC if (vp->v_type == VDIR) panic("nfs: sillyrename dir"); #endif sp = malloc(sizeof (struct sillyrename), M_NFSREQ, M_WAITOK); sp->s_cred = crhold(cnp->cn_cred); sp->s_dvp = dvp; sp->s_removeit = nfs_removeit; VREF(dvp); /* * Fudge together a funny name. * Changing the format of the funny name to accomodate more * sillynames per directory. * The name is now changed to .nfs...4, where ticks is * CPU ticks since boot. */ pid = cnp->cn_thread->td_proc->p_pid; lticks = (unsigned int)ticks; for ( ; ; ) { sp->s_namlen = sprintf(sp->s_name, ".nfs.%08x.%04x4.4", lticks, pid); if (nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, NULL)) break; lticks++; } error = nfs_renameit(dvp, cnp, sp); if (error) goto bad; error = nfs_lookitup(dvp, sp->s_name, sp->s_namlen, sp->s_cred, cnp->cn_thread, &np); np->n_sillyrename = sp; return (0); bad: vrele(sp->s_dvp); crfree(sp->s_cred); free((caddr_t)sp, M_NFSREQ); return (error); } /* * Look up a file name and optionally either update the file handle or * allocate an nfsnode, depending on the value of npp. * npp == NULL --> just do the lookup * *npp == NULL --> allocate a new nfsnode and make sure attributes are * handled too * *npp != NULL --> update the file handle in the vnode */ static int nfs_lookitup(struct vnode *dvp, const char *name, int len, struct ucred *cred, struct thread *td, struct nfsnode **npp) { struct vnode *newvp = NULL; struct nfsnode *np, *dnp = VTONFS(dvp); caddr_t bpos, dpos; int error = 0, fhlen, attrflag; struct mbuf *mreq, *mrep, *md, *mb; nfsfh_t *nfhp; int v3 = NFS_ISV3(dvp); nfsstats.rpccnt[NFSPROC_LOOKUP]++; mreq = nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH(v3) + NFSX_UNSIGNED + nfsm_rndup(len)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(dvp, v3); nfsm_strtom(name, len, NFS_MAXNAMLEN); nfsm_request(dvp, NFSPROC_LOOKUP, td, cred); if (npp && !error) { nfsm_getfh(nfhp, fhlen, v3); if (*npp) { np = *npp; if (np->n_fhsize > NFS_SMALLFH && fhlen <= NFS_SMALLFH) { free((caddr_t)np->n_fhp, M_NFSBIGFH); np->n_fhp = &np->n_fh; } else if (np->n_fhsize <= NFS_SMALLFH && fhlen>NFS_SMALLFH) np->n_fhp =(nfsfh_t *)malloc(fhlen, M_NFSBIGFH, M_WAITOK); bcopy((caddr_t)nfhp, (caddr_t)np->n_fhp, fhlen); np->n_fhsize = fhlen; newvp = NFSTOV(np); } else if (NFS_CMPFH(dnp, nfhp, fhlen)) { VREF(dvp); newvp = dvp; } else { error = nfs_nget(dvp->v_mount, nfhp, fhlen, &np, LK_EXCLUSIVE); if (error) { m_freem(mrep); return (error); } newvp = NFSTOV(np); } if (v3) { nfsm_postop_attr(newvp, attrflag); if (!attrflag && *npp == NULL) { m_freem(mrep); if (newvp == dvp) vrele(newvp); else vput(newvp); return (ENOENT); } } else nfsm_loadattr(newvp, NULL); } m_freem(mrep); nfsmout: if (npp && *npp == NULL) { if (error) { if (newvp) { if (newvp == dvp) vrele(newvp); else vput(newvp); } } else *npp = np; } return (error); } /* * Nfs Version 3 commit rpc */ int nfs_commit(struct vnode *vp, u_quad_t offset, int cnt, struct ucred *cred, struct thread *td) { u_int32_t *tl; struct nfsmount *nmp = VFSTONFS(vp->v_mount); caddr_t bpos, dpos; int error = 0, wccflag = NFSV3_WCCRATTR; struct mbuf *mreq, *mrep, *md, *mb; mtx_lock(&nmp->nm_mtx); if ((nmp->nm_state & NFSSTA_HASWRITEVERF) == 0) { mtx_unlock(&nmp->nm_mtx); return (0); } mtx_unlock(&nmp->nm_mtx); nfsstats.rpccnt[NFSPROC_COMMIT]++; mreq = nfsm_reqhead(vp, NFSPROC_COMMIT, NFSX_FH(1)); mb = mreq; bpos = mtod(mb, caddr_t); nfsm_fhtom(vp, 1); tl = nfsm_build(u_int32_t *, 3 * NFSX_UNSIGNED); txdr_hyper(offset, tl); tl += 2; *tl = txdr_unsigned(cnt); nfsm_request(vp, NFSPROC_COMMIT, td, cred); nfsm_wcc_data(vp, wccflag); if (!error) { tl = nfsm_dissect(u_int32_t *, NFSX_V3WRITEVERF); if (bcmp((caddr_t)nmp->nm_verf, (caddr_t)tl, NFSX_V3WRITEVERF)) { bcopy((caddr_t)tl, (caddr_t)nmp->nm_verf, NFSX_V3WRITEVERF); error = NFSERR_STALEWRITEVERF; } } m_freem(mrep); nfsmout: return (error); } /* * Strategy routine. * For async requests when nfsiod(s) are running, queue the request by * calling nfs_asyncio(), otherwise just all nfs_doio() to do the * request. */ static int nfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct ucred *cr; KASSERT(!(bp->b_flags & B_DONE), ("nfs_strategy: buffer %p unexpectedly marked B_DONE", bp)); BUF_ASSERT_HELD(bp); if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; /* * If the op is asynchronous and an i/o daemon is waiting * queue the request, wake it up and wait for completion * otherwise just do it ourselves. */ if ((bp->b_flags & B_ASYNC) == 0 || nfs_asyncio(VFSTONFS(ap->a_vp->v_mount), bp, NOCRED, curthread)) (void)nfs_doio(ap->a_vp, bp, cr, curthread); return (0); } /* * fsync vnode op. Just call nfs_flush() with commit == 1. */ /* ARGSUSED */ static int nfs_fsync(struct vop_fsync_args *ap) { return (nfs_flush(ap->a_vp, ap->a_waitfor, 1)); } /* * Flush all the blocks associated with a vnode. * Walk through the buffer pool and push any dirty pages * associated with the vnode. */ static int nfs_flush(struct vnode *vp, int waitfor, int commit) { struct nfsnode *np = VTONFS(vp); struct buf *bp; int i; struct buf *nbp; struct nfsmount *nmp = VFSTONFS(vp->v_mount); int error = 0, slptimeo = 0, slpflag = 0, retv, bvecpos; int passone = 1; u_quad_t off, endoff, toff; struct ucred* wcred = NULL; struct buf **bvec = NULL; struct bufobj *bo; struct thread *td = curthread; #ifndef NFS_COMMITBVECSIZ #define NFS_COMMITBVECSIZ 20 #endif struct buf *bvec_on_stack[NFS_COMMITBVECSIZ]; int bvecsize = 0, bveccount; if (nmp->nm_flag & NFSMNT_INT) slpflag = PCATCH; if (!commit) passone = 0; bo = &vp->v_bufobj; /* * A b_flags == (B_DELWRI | B_NEEDCOMMIT) block has been written to the * server, but has not been committed to stable storage on the server * yet. On the first pass, the byte range is worked out and the commit * rpc is done. On the second pass, nfs_writebp() is called to do the * job. */ again: off = (u_quad_t)-1; endoff = 0; bvecpos = 0; if (NFS_ISV3(vp) && commit) { if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); /* * Count up how many buffers waiting for a commit. */ bveccount = 0; BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (!BUF_ISLOCKED(bp) && (bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) == (B_DELWRI | B_NEEDCOMMIT)) bveccount++; } /* * Allocate space to remember the list of bufs to commit. It is * important to use M_NOWAIT here to avoid a race with nfs_write. * If we can't get memory (for whatever reason), we will end up * committing the buffers one-by-one in the loop below. */ if (bveccount > NFS_COMMITBVECSIZ) { /* * Release the vnode interlock to avoid a lock * order reversal. */ BO_UNLOCK(bo); bvec = (struct buf **) malloc(bveccount * sizeof(struct buf *), M_TEMP, M_NOWAIT); BO_LOCK(bo); if (bvec == NULL) { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } else bvecsize = bveccount; } else { bvec = bvec_on_stack; bvecsize = NFS_COMMITBVECSIZ; } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bvecpos >= bvecsize) break; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { nbp = TAILQ_NEXT(bp, b_bobufs); continue; } if ((bp->b_flags & (B_DELWRI | B_NEEDCOMMIT)) != (B_DELWRI | B_NEEDCOMMIT)) { BUF_UNLOCK(bp); nbp = TAILQ_NEXT(bp, b_bobufs); continue; } BO_UNLOCK(bo); bremfree(bp); /* * Work out if all buffers are using the same cred * so we can deal with them all with one commit. * * NOTE: we are not clearing B_DONE here, so we have * to do it later on in this routine if we intend to * initiate I/O on the bp. * * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ if (wcred == NULL) wcred = bp->b_wcred; else if (wcred != bp->b_wcred) wcred = NOCRED; vfs_busy_pages(bp, 1); BO_LOCK(bo); /* * bp is protected by being locked, but nbp is not * and vfs_busy_pages() may sleep. We have to * recalculate nbp. */ nbp = TAILQ_NEXT(bp, b_bobufs); /* * A list of these buffers is kept so that the * second loop knows which buffers have actually * been committed. This is necessary, since there * may be a race between the commit rpc and new * uncommitted writes on the file. */ bvec[bvecpos++] = bp; toff = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; if (toff < off) off = toff; toff += (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); if (toff > endoff) endoff = toff; } BO_UNLOCK(bo); } if (bvecpos > 0) { /* * Commit data on the server, as required. * If all bufs are using the same wcred, then use that with * one call for all of them, otherwise commit each one * separately. */ if (wcred != NOCRED) retv = nfs_commit(vp, off, (int)(endoff - off), wcred, td); else { retv = 0; for (i = 0; i < bvecpos; i++) { off_t off, size; bp = bvec[i]; off = ((u_quad_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; size = (u_quad_t)(bp->b_dirtyend - bp->b_dirtyoff); retv = nfs_commit(vp, off, (int)size, bp->b_wcred, td); if (retv) break; } } if (retv == NFSERR_STALEWRITEVERF) nfs_clearcommit(vp->v_mount); /* * Now, either mark the blocks I/O done or mark the * blocks dirty, depending on whether the commit * succeeded. */ for (i = 0; i < bvecpos; i++) { bp = bvec[i]; bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); if (retv) { /* * Error, leave B_DELWRI intact */ vfs_unbusy_pages(bp); brelse(bp); } else { /* * Success, remove B_DELWRI ( bundirty() ). * * b_dirtyoff/b_dirtyend seem to be NFS * specific. We should probably move that * into bundirty(). XXX */ bufobj_wref(bo); bp->b_flags |= B_ASYNC; bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_dirtyoff = bp->b_dirtyend = 0; bufdone(bp); } } } /* * Start/do any write(s) that are required. */ loop: BO_LOCK(bo); TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) { if (waitfor != MNT_WAIT || passone) continue; error = BUF_TIMELOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_MTX(bo), "nfsfsync", slpflag, slptimeo); if (error == 0) { BUF_UNLOCK(bp); goto loop; } if (error == ENOLCK) { error = 0; goto loop; } if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } goto loop; } if ((bp->b_flags & B_DELWRI) == 0) panic("nfs_fsync: not dirty"); if ((passone || !commit) && (bp->b_flags & B_NEEDCOMMIT)) { BUF_UNLOCK(bp); continue; } BO_UNLOCK(bo); bremfree(bp); if (passone || !commit) bp->b_flags |= B_ASYNC; else bp->b_flags |= B_ASYNC; bwrite(bp); if (nfs_sigintr(nmp, NULL, td)) { error = EINTR; goto done; } goto loop; } if (passone) { passone = 0; BO_UNLOCK(bo); goto again; } if (waitfor == MNT_WAIT) { while (bo->bo_numoutput) { error = bufobj_wwait(bo, slpflag, slptimeo); if (error) { BO_UNLOCK(bo); error = nfs_sigintr(nmp, NULL, td); if (error) goto done; if (slpflag == PCATCH) { slpflag = 0; slptimeo = 2 * hz; } BO_LOCK(bo); } } if (bo->bo_dirty.bv_cnt != 0 && commit) { BO_UNLOCK(bo); goto loop; } /* * Wait for all the async IO requests to drain */ BO_UNLOCK(bo); mtx_lock(&np->n_mtx); while (np->n_directio_asyncwr > 0) { np->n_flag |= NFSYNCWAIT; error = nfs_msleep(td, (caddr_t)&np->n_directio_asyncwr, &np->n_mtx, slpflag | (PRIBIO + 1), "nfsfsync", 0); if (error) { if (nfs_sigintr(nmp, (struct nfsreq *)0, td)) { mtx_unlock(&np->n_mtx); error = EINTR; goto done; } } } mtx_unlock(&np->n_mtx); } else BO_UNLOCK(bo); mtx_lock(&np->n_mtx); if (np->n_flag & NWRITEERR) { error = np->n_error; np->n_flag &= ~NWRITEERR; } if (commit && bo->bo_dirty.bv_cnt == 0 && bo->bo_numoutput == 0 && np->n_directio_asyncwr == 0) np->n_flag &= ~NMODIFIED; mtx_unlock(&np->n_mtx); done: if (bvec != NULL && bvec != bvec_on_stack) free(bvec, M_TEMP); return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlock(struct vop_advlock_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; error = vn_lock(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; VOP_UNLOCK(vp, 0); error = lf_advlock(ap, &(vp->v_lockf), size); } else { if (nfs_advlock_p) error = nfs_advlock_p(ap); else error = ENOLCK; } return (error); } /* * NFS advisory byte-level locks. */ static int nfs_advlockasync(struct vop_advlockasync_args *ap) { struct vnode *vp = ap->a_vp; u_quad_t size; int error; error = vn_lock(vp, LK_SHARED); if (error) return (error); if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NOLOCKD) != 0) { size = VTONFS(vp)->n_size; VOP_UNLOCK(vp, 0); error = lf_advlockasync(ap, &(vp->v_lockf), size); } else { VOP_UNLOCK(vp, 0); error = EOPNOTSUPP; } return (error); } /* * Print out the contents of an nfsnode. */ static int nfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); nfs_printf("\tfileid %ld fsid 0x%x", np->n_vattr.va_fileid, np->n_vattr.va_fsid); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } /* * This is the "real" nfs::bwrite(struct buf*). * We set B_CACHE if this is a VMIO buffer. */ int nfs_writebp(struct buf *bp, int force __unused, struct thread *td) { int s; int oldflags = bp->b_flags; #if 0 int retv = 1; off_t off; #endif BUF_ASSERT_HELD(bp); if (bp->b_flags & B_INVAL) { brelse(bp); return(0); } bp->b_flags |= B_CACHE; /* * Undirty the bp. We will redirty it later if the I/O fails. */ s = splbio(); bundirty(bp); bp->b_flags &= ~B_DONE; bp->b_ioflags &= ~BIO_ERROR; bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); curthread->td_ru.ru_oublock++; splx(s); /* * Note: to avoid loopback deadlocks, we do not * assign b_runningbufspace. */ vfs_busy_pages(bp, 1); BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); if( (oldflags & B_ASYNC) == 0) { int rtval = bufwait(bp); if (oldflags & B_DELWRI) { s = splbio(); reassignbuf(bp); splx(s); } brelse(bp); return (rtval); } return (0); } /* * nfs special file access vnode op. * Essentially just get vattr and then imitate iaccess() since the device is * local to the client. */ static int nfsspec_access(struct vop_access_args *ap) { struct vattr *vap; struct ucred *cred = ap->a_cred; struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct vattr vattr; int error; /* * Disallow write attempts on filesystems mounted read-only; * unless the file is a socket, fifo, or a block or character * device resident on the filesystem. */ if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return (EROFS); default: break; } } vap = &vattr; error = VOP_GETATTR(vp, vap, cred); if (error) goto out; error = vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, accmode, cred, NULL); out: return error; } /* * Read wrapper for fifos. */ static int nfsfifo_read(struct vop_read_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); int error; /* * Set access flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NACC; getnanotime(&np->n_atim); mtx_unlock(&np->n_mtx); error = fifo_specops.vop_read(ap); return error; } /* * Write wrapper for fifos. */ static int nfsfifo_write(struct vop_write_args *ap) { struct nfsnode *np = VTONFS(ap->a_vp); /* * Set update flag. */ mtx_lock(&np->n_mtx); np->n_flag |= NUPD; getnanotime(&np->n_mtim); mtx_unlock(&np->n_mtx); return(fifo_specops.vop_write(ap)); } /* * Close wrapper for fifos. * * Update the times on the nfsnode then do fifo close. */ static int nfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nfsnode *np = VTONFS(vp); struct vattr vattr; struct timespec ts; mtx_lock(&np->n_mtx); if (np->n_flag & (NACC | NUPD)) { getnanotime(&ts); if (np->n_flag & NACC) np->n_atim = ts; if (np->n_flag & NUPD) np->n_mtim = ts; np->n_flag |= NCHG; if (vrefcnt(vp) == 1 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { VATTR_NULL(&vattr); if (np->n_flag & NACC) vattr.va_atime = np->n_atim; if (np->n_flag & NUPD) vattr.va_mtime = np->n_mtim; mtx_unlock(&np->n_mtx); (void)VOP_SETATTR(vp, &vattr, ap->a_cred); goto out; } } mtx_unlock(&np->n_mtx); out: return (fifo_specops.vop_close(ap)); } /* * Just call nfs_writebp() with the force argument set to 1. * * NOTE: B_DONE may or may not be set in a_bp on call. */ static int nfs_bwrite(struct buf *bp) { return (nfs_writebp(bp, 1, curthread)); } struct buf_ops buf_ops_nfs = { .bop_name = "buf_ops_nfs", .bop_write = nfs_bwrite, .bop_strategy = bufstrategy, .bop_sync = bufsync, .bop_bdflush = bufbdflush, }; diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c index 49c6c1608dc1..00e52166ebbf 100644 --- a/sys/nfsserver/nfs_srvsubs.c +++ b/sys/nfsserver/nfs_srvsubs.c @@ -1,1532 +1,1532 @@ /*- * Copyright (c) 1989, 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Rick Macklem at The University of Guelph. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)nfs_subs.c 8.8 (Berkeley) 5/22/95 */ #include __FBSDID("$FreeBSD$"); /* * These functions support the macros and help fiddle mbuf chains for * the nfs op functions. They do things like create the rpc header and * copy data between mbuf chains and uio lists. */ #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Data items converted to xdr at startup, since they are constant * This is kinda hokey, but may save a little time doing byte swaps */ u_int32_t nfsrv_nfs_xdrneg1; u_int32_t nfsrv_rpc_call, nfsrv_rpc_vers, nfsrv_rpc_reply, nfsrv_rpc_msgdenied, nfsrv_rpc_autherr, nfsrv_rpc_mismatch, nfsrv_rpc_auth_unix, nfsrv_rpc_msgaccepted; u_int32_t nfsrv_nfs_prog, nfsrv_nfs_true, nfsrv_nfs_false; /* And other global data */ static const nfstype nfsv2_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON, NFCHR, NFNON }; #define vtonfsv2_type(a) txdr_unsigned(nfsv2_type[((int32_t)(a))]) #define vtonfsv3_mode(m) txdr_unsigned((m) & ALLPERMS) int nfsrv_ticks; #ifdef NFS_LEGACYRPC struct nfssvc_sockhead nfssvc_sockhead; int nfssvc_sockhead_flag; struct nfsd_head nfsd_head; int nfsd_head_flag; #endif static int nfssvc_offset = SYS_nfssvc; static struct sysent nfssvc_prev_sysent; MAKE_SYSENT(nfssvc); struct mtx nfsd_mtx; /* * Mapping of old NFS Version 2 RPC numbers to generic numbers. */ const int nfsrv_nfsv3_procid[NFS_NPROCS] = { NFSPROC_NULL, NFSPROC_GETATTR, NFSPROC_SETATTR, NFSPROC_NOOP, NFSPROC_LOOKUP, NFSPROC_READLINK, NFSPROC_READ, NFSPROC_NOOP, NFSPROC_WRITE, NFSPROC_CREATE, NFSPROC_REMOVE, NFSPROC_RENAME, NFSPROC_LINK, NFSPROC_SYMLINK, NFSPROC_MKDIR, NFSPROC_RMDIR, NFSPROC_READDIR, NFSPROC_FSSTAT, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, NFSPROC_NOOP, }; /* * and the reverse mapping from generic to Version 2 procedure numbers */ const int nfsrvv2_procid[NFS_NPROCS] = { NFSV2PROC_NULL, NFSV2PROC_GETATTR, NFSV2PROC_SETATTR, NFSV2PROC_LOOKUP, NFSV2PROC_NOOP, NFSV2PROC_READLINK, NFSV2PROC_READ, NFSV2PROC_WRITE, NFSV2PROC_CREATE, NFSV2PROC_MKDIR, NFSV2PROC_SYMLINK, NFSV2PROC_CREATE, NFSV2PROC_REMOVE, NFSV2PROC_RMDIR, NFSV2PROC_RENAME, NFSV2PROC_LINK, NFSV2PROC_READDIR, NFSV2PROC_NOOP, NFSV2PROC_STATFS, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, NFSV2PROC_NOOP, }; /* * Maps errno values to nfs error numbers. * Use 0 (which gets converted to NFSERR_IO) as the catch all for ones not * specifically defined in RFC 1094. */ static const u_char nfsrv_v2errmap[ELAST] = { NFSERR_PERM, NFSERR_NOENT, 0, 0, 0, NFSERR_NXIO, 0, 0, 0, 0, 0, 0, NFSERR_ACCES, 0, 0, 0, NFSERR_EXIST, 0, NFSERR_NODEV, NFSERR_NOTDIR, NFSERR_ISDIR, 0, 0, 0, 0, 0, NFSERR_FBIG, NFSERR_NOSPC, 0, NFSERR_ROFS, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NFSERR_NAMETOL, 0, 0, NFSERR_NOTEMPTY, 0, 0, NFSERR_DQUOT, NFSERR_STALE, 0 }; /* * Maps errno values to nfs error numbers. * Although it is not obvious whether or not NFS clients really care if * a returned error value is in the specified list for the procedure, the * safest thing to do is filter them appropriately. For Version 2, the * X/Open XNFS document is the only specification that defines error values * for each RPC (The RFC simply lists all possible error values for all RPCs), * so I have decided to not do this for Version 2. * The first entry is the default error return and the rest are the valid * errors for that RPC in increasing numeric order. */ static const short nfsv3err_null[] = { 0, 0, }; static const short nfsv3err_getattr[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_setattr[] = { NFSERR_IO, NFSERR_PERM, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOT_SYNC, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_lookup[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_access[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_readlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_read[] = { NFSERR_IO, NFSERR_IO, NFSERR_NXIO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_write[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_INVAL, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_create[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_mkdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_symlink[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_mknod[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, NFSERR_BADTYPE, 0, }; static const short nfsv3err_remove[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_rmdir[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_ROFS, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_rename[] = { NFSERR_IO, NFSERR_NOENT, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_ISDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_NOTEMPTY, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_link[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_EXIST, NFSERR_XDEV, NFSERR_NOTDIR, NFSERR_INVAL, NFSERR_NOSPC, NFSERR_ROFS, NFSERR_MLINK, NFSERR_NAMETOL, NFSERR_DQUOT, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_NOTSUPP, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_readdir[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_readdirplus[] = { NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_NOTDIR, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_BAD_COOKIE, NFSERR_NOTSUPP, NFSERR_TOOSMALL, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_fsstat[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_fsinfo[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_pathconf[] = { NFSERR_STALE, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short nfsv3err_commit[] = { NFSERR_IO, NFSERR_IO, NFSERR_STALE, NFSERR_BADHANDLE, NFSERR_SERVERFAULT, 0, }; static const short *nfsrv_v3errmap[] = { nfsv3err_null, nfsv3err_getattr, nfsv3err_setattr, nfsv3err_lookup, nfsv3err_access, nfsv3err_readlink, nfsv3err_read, nfsv3err_write, nfsv3err_create, nfsv3err_mkdir, nfsv3err_symlink, nfsv3err_mknod, nfsv3err_remove, nfsv3err_rmdir, nfsv3err_rename, nfsv3err_link, nfsv3err_readdir, nfsv3err_readdirplus, nfsv3err_fsstat, nfsv3err_fsinfo, nfsv3err_pathconf, nfsv3err_commit, }; /* * Called once to initialize data structures... */ static int nfsrv_modevent(module_t mod, int type, void *data) { static int registered; int error = 0; switch (type) { case MOD_LOAD: mtx_init(&nfsd_mtx, "nfsd_mtx", NULL, MTX_DEF); nfsrv_rpc_vers = txdr_unsigned(RPC_VER2); nfsrv_rpc_call = txdr_unsigned(RPC_CALL); nfsrv_rpc_reply = txdr_unsigned(RPC_REPLY); nfsrv_rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED); nfsrv_rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED); nfsrv_rpc_mismatch = txdr_unsigned(RPC_MISMATCH); nfsrv_rpc_autherr = txdr_unsigned(RPC_AUTHERR); nfsrv_rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX); nfsrv_nfs_prog = txdr_unsigned(NFS_PROG); nfsrv_nfs_true = txdr_unsigned(TRUE); nfsrv_nfs_false = txdr_unsigned(FALSE); nfsrv_nfs_xdrneg1 = txdr_unsigned(-1); nfsrv_ticks = (hz * NFS_TICKINTVL + 500) / 1000; if (nfsrv_ticks < 1) nfsrv_ticks = 1; #ifdef NFS_LEGACYRPC nfsrv_initcache(); /* Init the server request cache */ NFSD_LOCK(); nfsrv_init(0); /* Init server data structures */ callout_init(&nfsrv_callout, CALLOUT_MPSAFE); NFSD_UNLOCK(); nfsrv_timer(0); #else NFSD_LOCK(); nfsrv_init(0); /* Init server data structures */ NFSD_UNLOCK(); #endif error = syscall_register(&nfssvc_offset, &nfssvc_sysent, &nfssvc_prev_sysent); if (error) break; registered = 1; break; case MOD_UNLOAD: if (nfsrv_numnfsd != 0) { error = EBUSY; break; } if (registered) syscall_deregister(&nfssvc_offset, &nfssvc_prev_sysent); callout_drain(&nfsrv_callout); #ifdef NFS_LEGACYRPC nfsrv_destroycache(); /* Free the server request cache */ #endif mtx_destroy(&nfsd_mtx); break; default: error = EOPNOTSUPP; break; } return error; } static moduledata_t nfsserver_mod = { "nfsserver", nfsrv_modevent, NULL, }; DECLARE_MODULE(nfsserver, nfsserver_mod, SI_SUB_VFS, SI_ORDER_ANY); /* So that loader and kldload(2) can find us, wherever we are.. */ MODULE_VERSION(nfsserver, 1); #ifndef NFS_LEGACYRPC MODULE_DEPEND(nfsserver, krpc, 1, 1, 1); #endif /* * Set up nameidata for a lookup() call and do it. * * If pubflag is set, this call is done for a lookup operation on the * public filehandle. In that case we allow crossing mountpoints and * absolute pathnames. However, the caller is expected to check that * the lookup result is within the public fs, and deny access if * it is not. * * nfs_namei() clears out garbage fields that namei() might leave garbage. * This is mainly ni_vp and ni_dvp when an error occurs, and ni_dvp when no * error occurs but the parent was not requested. * * dirp may be set whether an error is returned or not, and must be * released by the caller. */ int nfs_namei(struct nameidata *ndp, struct nfsrv_descript *nfsd, fhandle_t *fhp, int len, struct nfssvc_sock *slp, struct sockaddr *nam, struct mbuf **mdp, caddr_t *dposp, struct vnode **retdirp, int v3, struct vattr *retdirattrp, int *retdirattr_retp, int pubflag) { int i, rem; struct mbuf *md; char *fromcp, *tocp, *cp; struct iovec aiov; struct uio auio; struct vnode *dp; int error, rdonly, linklen; struct componentname *cnp = &ndp->ni_cnd; int lockleaf = (cnp->cn_flags & LOCKLEAF) != 0; int dvfslocked; int vfslocked; vfslocked = 0; dvfslocked = 0; *retdirp = NULL; cnp->cn_flags |= NOMACCHECK; cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); /* * Copy the name from the mbuf list to ndp->ni_pnbuf * and set the various ndp fields appropriately. */ fromcp = *dposp; tocp = cnp->cn_pnbuf; md = *mdp; rem = mtod(md, caddr_t) + md->m_len - fromcp; for (i = 0; i < len; i++) { while (rem == 0) { md = md->m_next; if (md == NULL) { error = EBADRPC; goto out; } fromcp = mtod(md, caddr_t); rem = md->m_len; } if (*fromcp == '\0' || (!pubflag && *fromcp == '/')) { error = EACCES; goto out; } *tocp++ = *fromcp++; rem--; } *tocp = '\0'; *mdp = md; *dposp = fromcp; len = nfsm_rndup(len)-len; if (len > 0) { if (rem >= len) *dposp += len; else if ((error = nfs_adv(mdp, dposp, len, rem)) != 0) goto out; } /* * Extract and set starting directory. */ error = nfsrv_fhtovp(fhp, FALSE, &dp, &dvfslocked, nfsd, slp, nam, &rdonly, pubflag); if (error) goto out; vfslocked = VFS_LOCK_GIANT(dp->v_mount); if (dp->v_type != VDIR) { vrele(dp); error = ENOTDIR; goto out; } if (rdonly) cnp->cn_flags |= RDONLY; /* * Set return directory. Reference to dp is implicitly transfered * to the returned pointer */ *retdirp = dp; if (v3) { vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); *retdirattr_retp = VOP_GETATTR(dp, retdirattrp, ndp->ni_cnd.cn_cred); VOP_UNLOCK(dp, 0); } if (pubflag) { /* * Oh joy. For WebNFS, handle those pesky '%' escapes, * and the 'native path' indicator. */ cp = uma_zalloc(namei_zone, M_WAITOK); fromcp = cnp->cn_pnbuf; tocp = cp; if ((unsigned char)*fromcp >= WEBNFS_SPECCHAR_START) { switch ((unsigned char)*fromcp) { case WEBNFS_NATIVE_CHAR: /* * 'Native' path for us is the same * as a path according to the NFS spec, * just skip the escape char. */ fromcp++; break; /* * More may be added in the future, range 0x80-0xff */ default: error = EIO; uma_zfree(namei_zone, cp); goto out; } } /* * Translate the '%' escapes, URL-style. */ while (*fromcp != '\0') { if (*fromcp == WEBNFS_ESC_CHAR) { if (fromcp[1] != '\0' && fromcp[2] != '\0') { fromcp++; *tocp++ = HEXSTRTOI(fromcp); fromcp += 2; continue; } else { error = ENOENT; uma_zfree(namei_zone, cp); goto out; } } else *tocp++ = *fromcp++; } *tocp = '\0'; uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } ndp->ni_pathlen = (tocp - cnp->cn_pnbuf) + 1; ndp->ni_segflg = UIO_SYSSPACE; if (pubflag) { ndp->ni_rootdir = rootvnode; ndp->ni_loopcnt = 0; if (cnp->cn_pnbuf[0] == '/') { int tvfslocked; tvfslocked = VFS_LOCK_GIANT(rootvnode->v_mount); VFS_UNLOCK_GIANT(vfslocked); dp = rootvnode; vfslocked = tvfslocked; } } else { cnp->cn_flags |= NOCROSSMOUNT; } /* * Initialize for scan, set ni_startdir and bump ref on dp again * because lookup() will dereference ni_startdir. */ cnp->cn_thread = curthread; VREF(dp); ndp->ni_startdir = dp; if (!lockleaf) cnp->cn_flags |= LOCKLEAF; for (;;) { cnp->cn_nameptr = cnp->cn_pnbuf; /* * Call lookup() to do the real work. If an error occurs, * ndp->ni_vp and ni_dvp are left uninitialized or NULL and * we do not have to dereference anything before returning. * In either case ni_startdir will be dereferenced and NULLed * out. */ if (vfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; error = lookup(ndp); vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; ndp->ni_cnd.cn_flags &= ~GIANTHELD; if (error) break; /* * Check for encountering a symbolic link. Trivial * termination occurs if no symlink encountered. * Note: zfree is safe because error is 0, so we will * not zfree it again when we break. */ if ((cnp->cn_flags & ISSYMLINK) == 0) { if (cnp->cn_flags & (SAVENAME | SAVESTART)) cnp->cn_flags |= HASBUF; else uma_zfree(namei_zone, cnp->cn_pnbuf); if (ndp->ni_vp && !lockleaf) VOP_UNLOCK(ndp->ni_vp, 0); break; } /* * Validate symlink */ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) VOP_UNLOCK(ndp->ni_dvp, 0); if (!pubflag) { error = EINVAL; goto badlink2; } if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { error = ELOOP; goto badlink2; } if (ndp->ni_pathlen > 1) cp = uma_zalloc(namei_zone, M_WAITOK); else cp = cnp->cn_pnbuf; aiov.iov_base = cp; aiov.iov_len = MAXPATHLEN; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = 0; auio.uio_rw = UIO_READ; auio.uio_segflg = UIO_SYSSPACE; auio.uio_td = NULL; auio.uio_resid = MAXPATHLEN; error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); if (error) { badlink1: if (ndp->ni_pathlen > 1) uma_zfree(namei_zone, cp); badlink2: vput(ndp->ni_vp); vrele(ndp->ni_dvp); break; } linklen = MAXPATHLEN - auio.uio_resid; if (linklen == 0) { error = ENOENT; goto badlink1; } if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { error = ENAMETOOLONG; goto badlink1; } /* * Adjust or replace path */ if (ndp->ni_pathlen > 1) { bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); uma_zfree(namei_zone, cnp->cn_pnbuf); cnp->cn_pnbuf = cp; } else cnp->cn_pnbuf[linklen] = '\0'; ndp->ni_pathlen += linklen; /* * Cleanup refs for next loop and check if root directory * should replace current directory. Normally ni_dvp * becomes the new base directory and is cleaned up when * we loop. Explicitly null pointers after invalidation * to clarify operation. */ vput(ndp->ni_vp); ndp->ni_vp = NULL; if (cnp->cn_pnbuf[0] == '/') { vrele(ndp->ni_dvp); ndp->ni_dvp = ndp->ni_rootdir; VREF(ndp->ni_dvp); } ndp->ni_startdir = ndp->ni_dvp; ndp->ni_dvp = NULL; } if (!lockleaf) cnp->cn_flags &= ~LOCKLEAF; if (cnp->cn_flags & GIANTHELD) { mtx_unlock(&Giant); cnp->cn_flags &= ~GIANTHELD; } /* * nfs_namei() guarentees that fields will not contain garbage * whether an error occurs or not. This allows the caller to track * cleanup state trivially. */ out: if (error) { uma_zfree(namei_zone, cnp->cn_pnbuf); ndp->ni_vp = NULL; ndp->ni_dvp = NULL; ndp->ni_startdir = NULL; cnp->cn_flags &= ~HASBUF; VFS_UNLOCK_GIANT(vfslocked); vfslocked = 0; } else if ((ndp->ni_cnd.cn_flags & (WANTPARENT|LOCKPARENT)) == 0) { ndp->ni_dvp = NULL; } /* * This differs from normal namei() in that even on failure we may * return with Giant held due to the dirp return. Make sure we only * have not recursed however. The calling code only expects to drop * one acquire. */ if (vfslocked || dvfslocked) ndp->ni_cnd.cn_flags |= GIANTHELD; if (vfslocked && dvfslocked) VFS_UNLOCK_GIANT(vfslocked); return (error); } /* * A fiddled version of m_adj() that ensures null fill to a long * boundary and only trims off the back end */ void nfsm_adj(struct mbuf *mp, int len, int nul) { struct mbuf *m; int count, i; char *cp; /* * Trim from tail. Scan the mbuf chain, * calculating its length and finding the last mbuf. * If the adjustment only affects this mbuf, then just * adjust and return. Otherwise, rescan and truncate * after the remaining size. */ count = 0; m = mp; for (;;) { count += m->m_len; if (m->m_next == NULL) break; m = m->m_next; } if (m->m_len > len) { m->m_len -= len; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } return; } count -= len; if (count < 0) count = 0; /* * Correct length for chain is "count". * Find the mbuf with last data, adjust its length, * and toss data from remaining mbufs on chain. */ for (m = mp; m; m = m->m_next) { if (m->m_len >= count) { m->m_len = count; if (nul > 0) { cp = mtod(m, caddr_t)+m->m_len-nul; for (i = 0; i < nul; i++) *cp++ = '\0'; } if (m->m_next != NULL) { m_freem(m->m_next); m->m_next = NULL; } break; } count -= m->m_len; } } /* * Make these functions instead of macros, so that the kernel text size * doesn't get too big... */ void nfsm_srvwcc(struct nfsrv_descript *nfsd, int before_ret, struct vattr *before_vap, int after_ret, struct vattr *after_vap, struct mbuf **mbp, char **bposp) { struct mbuf *mb = *mbp; char *bpos = *bposp; u_int32_t *tl; if (before_ret) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = nfsrv_nfs_false; } else { tl = nfsm_build(u_int32_t *, 7 * NFSX_UNSIGNED); *tl++ = nfsrv_nfs_true; txdr_hyper(before_vap->va_size, tl); tl += 2; txdr_nfsv3time(&(before_vap->va_mtime), tl); tl += 2; txdr_nfsv3time(&(before_vap->va_ctime), tl); } *bposp = bpos; *mbp = mb; nfsm_srvpostopattr(nfsd, after_ret, after_vap, mbp, bposp); } void nfsm_srvpostopattr(struct nfsrv_descript *nfsd, int after_ret, struct vattr *after_vap, struct mbuf **mbp, char **bposp) { struct mbuf *mb = *mbp; char *bpos = *bposp; u_int32_t *tl; struct nfs_fattr *fp; if (after_ret) { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED); *tl = nfsrv_nfs_false; } else { tl = nfsm_build(u_int32_t *, NFSX_UNSIGNED + NFSX_V3FATTR); *tl++ = nfsrv_nfs_true; fp = (struct nfs_fattr *)tl; nfsm_srvfattr(nfsd, after_vap, fp); } *mbp = mb; *bposp = bpos; } void nfsm_srvfattr(struct nfsrv_descript *nfsd, struct vattr *vap, struct nfs_fattr *fp) { fp->fa_nlink = txdr_unsigned(vap->va_nlink); fp->fa_uid = txdr_unsigned(vap->va_uid); fp->fa_gid = txdr_unsigned(vap->va_gid); if (nfsd->nd_flag & ND_NFSV3) { fp->fa_type = vtonfsv3_type(vap->va_type); fp->fa_mode = vtonfsv3_mode(vap->va_mode); txdr_hyper(vap->va_size, &fp->fa3_size); txdr_hyper(vap->va_bytes, &fp->fa3_used); - fp->fa3_rdev.specdata1 = txdr_unsigned(umajor(vap->va_rdev)); - fp->fa3_rdev.specdata2 = txdr_unsigned(uminor(vap->va_rdev)); + fp->fa3_rdev.specdata1 = txdr_unsigned(major(vap->va_rdev)); + fp->fa3_rdev.specdata2 = txdr_unsigned(minor(vap->va_rdev)); fp->fa3_fsid.nfsuquad[0] = 0; fp->fa3_fsid.nfsuquad[1] = txdr_unsigned(vap->va_fsid); fp->fa3_fileid.nfsuquad[0] = 0; fp->fa3_fileid.nfsuquad[1] = txdr_unsigned(vap->va_fileid); txdr_nfsv3time(&vap->va_atime, &fp->fa3_atime); txdr_nfsv3time(&vap->va_mtime, &fp->fa3_mtime); txdr_nfsv3time(&vap->va_ctime, &fp->fa3_ctime); } else { fp->fa_type = vtonfsv2_type(vap->va_type); fp->fa_mode = vtonfsv2_mode(vap->va_type, vap->va_mode); fp->fa2_size = txdr_unsigned(vap->va_size); fp->fa2_blocksize = txdr_unsigned(vap->va_blocksize); if (vap->va_type == VFIFO) fp->fa2_rdev = 0xffffffff; else fp->fa2_rdev = txdr_unsigned(vap->va_rdev); fp->fa2_blocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); fp->fa2_fsid = txdr_unsigned(vap->va_fsid); fp->fa2_fileid = txdr_unsigned(vap->va_fileid); txdr_nfsv2time(&vap->va_atime, &fp->fa2_atime); txdr_nfsv2time(&vap->va_mtime, &fp->fa2_mtime); txdr_nfsv2time(&vap->va_ctime, &fp->fa2_ctime); } } /* * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked) * - look up fsid in mount list (if not found ret error) * - get vp and export rights by calling VFS_FHTOVP() * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon * - if not lockflag unlock it with VOP_UNLOCK() */ int nfsrv_fhtovp(fhandle_t *fhp, int lockflag, struct vnode **vpp, int *vfslockedp, struct nfsrv_descript *nfsd, struct nfssvc_sock *slp, struct sockaddr *nam, int *rdonlyp, int pubflag) { struct mount *mp; int i; struct ucred *cred, *credanon; int error, exflags; #ifdef MNT_EXNORESPORT /* XXX needs mountd and /etc/exports help yet */ struct sockaddr_int *saddr; #endif int credflavor; int vfslocked; int numsecflavors, *secflavors; int authsys; int v3 = nfsd->nd_flag & ND_NFSV3; int mountreq; *vfslockedp = 0; *vpp = NULL; if (nfs_ispublicfh(fhp)) { if (!pubflag || !nfs_pub.np_valid) return (ESTALE); fhp = &nfs_pub.np_handle; } mp = vfs_busyfs(&fhp->fh_fsid); if (!mp) return (ESTALE); vfslocked = VFS_LOCK_GIANT(mp); error = VFS_CHECKEXP(mp, nam, &exflags, &credanon, &numsecflavors, &secflavors); if (error) { vfs_unbusy(mp); goto out; } if (numsecflavors == 0) { /* * This can happen if the system is running with an * old mountd that doesn't pass in a secflavor list. */ numsecflavors = 1; authsys = RPCAUTH_UNIX; secflavors = &authsys; } credflavor = nfsd->nd_credflavor; for (i = 0; i < numsecflavors; i++) { if (secflavors[i] == credflavor) break; } if (i == numsecflavors) { /* * RFC 2623 section 2.3.2 - allow certain procedures * used at NFS client mount time even if they have * weak authentication. */ mountreq = FALSE; if (v3) { if (nfsd->nd_procnum == NFSPROC_FSINFO || nfsd->nd_procnum == NFSPROC_GETATTR) mountreq = TRUE; } else { if (nfsd->nd_procnum == NFSPROC_FSSTAT || nfsd->nd_procnum == NFSPROC_GETATTR) mountreq = TRUE; } if (!mountreq) { error = NFSERR_AUTHERR | AUTH_TOOWEAK; vfs_unbusy(mp); goto out; } } error = VFS_FHTOVP(mp, &fhp->fh_fid, vpp); vfs_unbusy(mp); if (error) goto out; #ifdef MNT_EXNORESPORT if (!(exflags & (MNT_EXNORESPORT|MNT_EXPUBLIC))) { saddr = (struct sockaddr_in *)nam; if ((saddr->sin_family == AF_INET || saddr->sin_family == AF_INET6) && /* same code for INET and INET6: sin*_port at same offet */ ntohs(saddr->sin_port) >= IPPORT_RESERVED) { vput(*vpp); *vpp = NULL; error = NFSERR_AUTHERR | AUTH_TOOWEAK; } } #endif /* * Check/setup credentials. */ cred = nfsd->nd_cr; if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) { cred->cr_uid = credanon->cr_uid; for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++) cred->cr_groups[i] = credanon->cr_groups[i]; cred->cr_ngroups = i; } if (exflags & MNT_EXRDONLY) *rdonlyp = 1; else *rdonlyp = 0; if (!lockflag) VOP_UNLOCK(*vpp, 0); out: if (error) { VFS_UNLOCK_GIANT(vfslocked); } else *vfslockedp = vfslocked; return (error); } /* * WebNFS: check if a filehandle is a public filehandle. For v3, this * means a length of 0, for v2 it means all zeroes. nfsm_srvmtofh has * transformed this to all zeroes in both cases, so check for it. */ int nfs_ispublicfh(fhandle_t *fhp) { char *cp = (char *)fhp; int i; NFSD_LOCK_DONTCARE(); for (i = 0; i < NFSX_V3FH; i++) if (*cp++ != 0) return (FALSE); return (TRUE); } #ifdef NFS_LEGACYRPC /* * This function compares two net addresses by family and returns TRUE * if they are the same host. * If there is any doubt, return FALSE. * The AF_INET family is handled as a special case so that address mbufs * don't need to be saved to store "struct in_addr", which is only 4 bytes. */ int netaddr_match(int family, union nethostaddr *haddr, struct sockaddr *nam) { struct sockaddr_in *inetaddr; NFSD_LOCK_DONTCARE(); switch (family) { case AF_INET: inetaddr = (struct sockaddr_in *)nam; if (inetaddr->sin_family == AF_INET && inetaddr->sin_addr.s_addr == haddr->had_inetaddr) return (1); break; #ifdef INET6 case AF_INET6: { register struct sockaddr_in6 *inet6addr1, *inet6addr2; inet6addr1 = (struct sockaddr_in6 *)nam; inet6addr2 = (struct sockaddr_in6 *)haddr->had_nam; /* XXX - should test sin6_scope_id ? */ if (inet6addr1->sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&inet6addr1->sin6_addr, &inet6addr2->sin6_addr)) return (1); break; } #endif default: break; }; return (0); } #endif /* * Map errnos to NFS error numbers. For Version 3 also filter out error * numbers not specified for the associated procedure. */ int nfsrv_errmap(struct nfsrv_descript *nd, int err) { const short *defaulterrp, *errp; int e; if (nd->nd_flag & ND_NFSV3) { if (nd->nd_procnum <= NFSPROC_COMMIT) { errp = defaulterrp = nfsrv_v3errmap[nd->nd_procnum]; while (*++errp) { if (*errp == err) return (err); else if (*errp > err) break; } return ((int)*defaulterrp); } else return (err & 0xffff); } e = 0; if (err <= ELAST) e = nfsrv_v2errmap[err - 1]; if (e != 0) return (e); return (NFSERR_IO); } /* * Sort the group list in increasing numerical order. * (Insertion sort by Chris Torek, who was grossed out by the bubble sort * that used to be here.) */ void nfsrvw_sort(gid_t *list, int num) { int i, j; gid_t v; /* Insertion sort. */ for (i = 1; i < num; i++) { v = list[i]; /* find correct slot for value v, moving others up */ for (j = i; --j >= 0 && v < list[j];) list[j + 1] = list[j]; list[j + 1] = v; } } /* * Helper functions for macros. */ void nfsm_srvfhtom_xx(fhandle_t *f, int v3, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; if (v3) { tl = nfsm_build_xx(NFSX_UNSIGNED + NFSX_V3FH, mb, bpos); *tl++ = txdr_unsigned(NFSX_V3FH); bcopy(f, tl, NFSX_V3FH); } else { tl = nfsm_build_xx(NFSX_V2FH, mb, bpos); bcopy(f, tl, NFSX_V2FH); } } void nfsm_srvpostop_fh_xx(fhandle_t *f, struct mbuf **mb, caddr_t *bpos) { u_int32_t *tl; tl = nfsm_build_xx(2 * NFSX_UNSIGNED + NFSX_V3FH, mb, bpos); *tl++ = nfsrv_nfs_true; *tl++ = txdr_unsigned(NFSX_V3FH); bcopy(f, tl, NFSX_V3FH); } int nfsm_srvstrsiz_xx(int *s, int m, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *s = fxdr_unsigned(int32_t, *tl); if (*s > m || *s <= 0) return EBADRPC; return 0; } int nfsm_srvnamesiz_xx(int *s, int m, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; NFSD_LOCK_DONTCARE(); tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *s = fxdr_unsigned(int32_t, *tl); if (*s > m) return NFSERR_NAMETOL; if (*s <= 0) return EBADRPC; return 0; } int nfsm_srvnamesiz0_xx(int *s, int m, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; *s = fxdr_unsigned(int32_t, *tl); if (*s > m) return NFSERR_NAMETOL; if (*s < 0) return EBADRPC; return 0; } void nfsm_clget_xx(u_int32_t **tl, struct mbuf *mb, struct mbuf **mp, char **bp, char **be, caddr_t bpos) { struct mbuf *nmp; NFSD_UNLOCK_ASSERT(); if (*bp >= *be) { if (*mp == mb) (*mp)->m_len += *bp - bpos; MGET(nmp, M_WAIT, MT_DATA); MCLGET(nmp, M_WAIT); nmp->m_len = NFSMSIZ(nmp); (*mp)->m_next = nmp; *mp = nmp; *bp = mtod(*mp, caddr_t); *be = *bp + (*mp)->m_len; } *tl = (u_int32_t *)*bp; } int nfsm_srvmtofh_xx(fhandle_t *f, int v3, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; int fhlen; if (v3) { tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; fhlen = fxdr_unsigned(int, *tl); if (fhlen != 0 && fhlen != NFSX_V3FH) return EBADRPC; } else { fhlen = NFSX_V2FH; } if (fhlen != 0) { tl = nfsm_dissect_xx_nonblock(fhlen, md, dpos); if (tl == NULL) return EBADRPC; bcopy((caddr_t)tl, (caddr_t)(f), fhlen); } else { bzero((caddr_t)(f), NFSX_V3FH); } return 0; } int nfsm_srvsattr_xx(struct vattr *a, struct mbuf **md, caddr_t *dpos) { u_int32_t *tl; int toclient = 0; tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*tl == nfsrv_nfs_true) { tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; (a)->va_mode = nfstov_mode(*tl); } tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*tl == nfsrv_nfs_true) { tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; (a)->va_uid = fxdr_unsigned(uid_t, *tl); } tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*tl == nfsrv_nfs_true) { tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; (a)->va_gid = fxdr_unsigned(gid_t, *tl); } tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; if (*tl == nfsrv_nfs_true) { tl = nfsm_dissect_xx_nonblock(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; (a)->va_size = fxdr_hyper(tl); } tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: tl = nfsm_dissect_xx_nonblock(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; fxdr_nfsv3time(tl, &(a)->va_atime); toclient = 1; break; case NFSV3SATTRTIME_TOSERVER: getnanotime(&(a)->va_atime); a->va_vaflags |= VA_UTIMES_NULL; break; } tl = nfsm_dissect_xx_nonblock(NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; switch (fxdr_unsigned(int, *tl)) { case NFSV3SATTRTIME_TOCLIENT: tl = nfsm_dissect_xx_nonblock(2 * NFSX_UNSIGNED, md, dpos); if (tl == NULL) return EBADRPC; fxdr_nfsv3time(tl, &(a)->va_mtime); a->va_vaflags &= ~VA_UTIMES_NULL; break; case NFSV3SATTRTIME_TOSERVER: getnanotime(&(a)->va_mtime); if (toclient == 0) a->va_vaflags |= VA_UTIMES_NULL; break; } return 0; } diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 3fc0777583b7..043c9df8397a 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -1,326 +1,323 @@ /*- * Copyright (c) 1990, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2000 * Poul-Henning Kamp. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)conf.h 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #ifndef _SYS_CONF_H_ #define _SYS_CONF_H_ #ifdef _KERNEL #include #else #include #endif struct snapdata; struct devfs_dirent; struct cdevsw; struct file; struct cdev { void *__si_reserved; u_int si_flags; #define SI_ALIAS 0x0002 /* carrier of alias name */ #define SI_NAMED 0x0004 /* make_dev{_alias} has been called */ #define SI_CHEAPCLONE 0x0008 /* can be removed_dev'ed when vnode reclaims */ #define SI_CHILD 0x0010 /* child of another struct cdev **/ #define SI_DEVOPEN 0x0020 /* opened by device */ #define SI_CONSOPEN 0x0040 /* opened by console */ #define SI_DUMPDEV 0x0080 /* is kernel dumpdev */ #define SI_CANDELETE 0x0100 /* can do BIO_DELETE */ #define SI_CLONELIST 0x0200 /* on a clone list */ struct timespec si_atime; struct timespec si_ctime; struct timespec si_mtime; uid_t si_uid; gid_t si_gid; mode_t si_mode; struct ucred *si_cred; /* cached clone-time credential */ int si_drv0; int si_refcount; LIST_ENTRY(cdev) si_list; LIST_ENTRY(cdev) si_clone; LIST_HEAD(, cdev) si_children; LIST_ENTRY(cdev) si_siblings; struct cdev *si_parent; char *si_name; void *si_drv1, *si_drv2; struct cdevsw *si_devsw; int si_iosize_max; /* maximum I/O size (for physio &al) */ u_long si_usecount; u_long si_threadcount; union { struct snapdata *__sid_snapdata; } __si_u; char __si_namebuf[SPECNAMELEN + 1]; }; #define si_snapdata __si_u.__sid_snapdata #ifdef _KERNEL /* * Definitions of device driver entry switches */ struct bio; struct buf; struct thread; struct uio; struct knote; struct clonedevs; struct vnode; /* * Note: d_thread_t is provided as a transition aid for those drivers * that treat struct proc/struct thread as an opaque data type and * exist in substantially the same form in both 4.x and 5.x. Writers * of drivers that dips into the d_thread_t structure should use * struct thread or struct proc as appropriate for the version of the * OS they are using. It is provided in lieu of each device driver * inventing its own way of doing this. While it does violate style(9) * in a number of ways, this violation is deemed to be less * important than the benefits that a uniform API between releases * gives. * * Users of struct thread/struct proc that aren't device drivers should * not use d_thread_t. */ typedef struct thread d_thread_t; typedef int d_open_t(struct cdev *dev, int oflags, int devtype, struct thread *td); typedef int d_fdopen_t(struct cdev *dev, int oflags, struct thread *td, struct file *fp); typedef int d_close_t(struct cdev *dev, int fflag, int devtype, struct thread *td); typedef void d_strategy_t(struct bio *bp); typedef int d_ioctl_t(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td); typedef int d_read_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_write_t(struct cdev *dev, struct uio *uio, int ioflag); typedef int d_poll_t(struct cdev *dev, int events, struct thread *td); typedef int d_kqfilter_t(struct cdev *dev, struct knote *kn); typedef int d_mmap_t(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr, int nprot); typedef void d_purge_t(struct cdev *dev); typedef int d_spare2_t(struct cdev *dev); typedef int dumper_t( void *priv, /* Private to the driver. */ void *virtual, /* Virtual (mapped) address. */ vm_offset_t physical, /* Physical address of virtual. */ off_t offset, /* Byte-offset to write at. */ size_t length); /* Number of bytes to dump. */ #endif /* _KERNEL */ /* * Types for d_flags. */ #define D_TAPE 0x0001 #define D_DISK 0x0002 #define D_TTY 0x0004 #define D_MEM 0x0008 #ifdef _KERNEL #define D_TYPEMASK 0xffff /* * Flags for d_flags which the drivers can set. */ #define D_TRACKCLOSE 0x00080000 /* track all closes */ #define D_MMAP_ANON 0x00100000 /* special treatment in vm_mmap.c */ #define D_PSEUDO 0x00200000 /* make_dev() can return NULL */ #define D_NEEDGIANT 0x00400000 /* driver want Giant */ #define D_NEEDMINOR 0x00800000 /* driver uses clone_create() */ /* * Version numbers. */ #define D_VERSION_00 0x20011966 #define D_VERSION_01 0x17032005 /* Add d_uid,gid,mode & kind */ #define D_VERSION D_VERSION_01 /* * Flags used for internal housekeeping */ #define D_INIT 0x80000000 /* cdevsw initialized */ /* * Character device switch table */ struct cdevsw { int d_version; u_int d_flags; const char *d_name; d_open_t *d_open; d_fdopen_t *d_fdopen; d_close_t *d_close; d_read_t *d_read; d_write_t *d_write; d_ioctl_t *d_ioctl; d_poll_t *d_poll; d_mmap_t *d_mmap; d_strategy_t *d_strategy; dumper_t *d_dump; d_kqfilter_t *d_kqfilter; d_purge_t *d_purge; d_spare2_t *d_spare2; uid_t d_uid; gid_t d_gid; mode_t d_mode; const char *d_kind; /* These fields should not be messed with by drivers */ LIST_ENTRY(cdevsw) d_list; LIST_HEAD(, cdev) d_devs; int d_spare3; union { struct cdevsw *gianttrick; SLIST_ENTRY(cdevsw) postfree_list; } __d_giant; }; #define d_gianttrick __d_giant.gianttrick #define d_postfree_list __d_giant.postfree_list #define NUMCDEVSW 256 struct module; struct devsw_module_data { int (*chainevh)(struct module *, int, void *); /* next handler */ void *chainarg; /* arg for next event handler */ /* Do not initialize fields hereafter */ }; #define DEV_MODULE(name, evh, arg) \ static moduledata_t name##_mod = { \ #name, \ evh, \ arg \ }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE) void clone_setup(struct clonedevs **cdp); void clone_cleanup(struct clonedevs **); #define CLONE_UNITMASK 0xfffff #define CLONE_FLAG0 (CLONE_UNITMASK + 1) int clone_create(struct clonedevs **, struct cdevsw *, int *unit, struct cdev **dev, int extra); int count_dev(struct cdev *_dev); void destroy_dev(struct cdev *_dev); int destroy_dev_sched(struct cdev *dev); int destroy_dev_sched_cb(struct cdev *dev, void (*cb)(void *), void *arg); void destroy_dev_drain(struct cdevsw *csw); void drain_dev_clone_events(void); struct cdevsw *dev_refthread(struct cdev *_dev); struct cdevsw *devvn_refthread(struct vnode *vp, struct cdev **devp); void dev_relthread(struct cdev *_dev); void dev_depends(struct cdev *_pdev, struct cdev *_cdev); void dev_ref(struct cdev *dev); void dev_refl(struct cdev *dev); void dev_rel(struct cdev *dev); void dev_strategy(struct cdev *dev, struct buf *bp); struct cdev *make_dev(struct cdevsw *_devsw, int _unit, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(6, 7); struct cdev *make_dev_cred(struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _perms, const char *_fmt, ...) __printflike(7, 8); #define MAKEDEV_REF 0x1 #define MAKEDEV_WHTOUT 0x2 struct cdev *make_dev_credf(int _flags, struct cdevsw *_devsw, int _unit, struct ucred *_cr, uid_t _uid, gid_t _gid, int _mode, const char *_fmt, ...) __printflike(8, 9); struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3); void dev_lock(void); void dev_unlock(void); void setconf(void); -#define dev2unit(d) ((d) ? (d)->si_drv0 : NODEV) -#define minor(d) ((d) ? (d)->si_drv0 : NODEV) -#define unit2minor(u) (u) -#define minor2unit(m) (m) +#define dev2unit(d) ((d)->si_drv0) typedef void (*cdevpriv_dtr_t)(void *data); int devfs_get_cdevpriv(void **datap); int devfs_set_cdevpriv(void *priv, cdevpriv_dtr_t dtr); void devfs_clear_cdevpriv(void); void devfs_fpdrop(struct file *fp); /* XXX This is not public KPI */ #define UID_ROOT 0 #define UID_BIN 3 #define UID_UUCP 66 #define UID_NOBODY 65534 #define GID_WHEEL 0 #define GID_KMEM 2 #define GID_TTY 4 #define GID_OPERATOR 5 #define GID_BIN 7 #define GID_GAMES 13 #define GID_DIALER 68 #define GID_NOBODY 65534 typedef void (*dev_clone_fn)(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **result); int dev_stdclone(char *_name, char **_namep, const char *_stem, int *_unit); EVENTHANDLER_DECLARE(dev_clone, dev_clone_fn); /* Stuff relating to kernel-dump */ struct dumperinfo { dumper_t *dumper; /* Dumping function. */ void *priv; /* Private parts. */ u_int blocksize; /* Size of block in bytes. */ u_int maxiosize; /* Max size allowed for an individual I/O */ off_t mediaoffset; /* Initial offset in bytes. */ off_t mediasize; /* Space available in bytes. */ }; int set_dumper(struct dumperinfo *); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); void dumpsys(struct dumperinfo *); extern int dumping; /* system is dumping */ #endif /* _KERNEL */ #endif /* !_SYS_CONF_H_ */ diff --git a/sys/sys/param.h b/sys/sys/param.h index 770067e32a68..6a8e4b081363 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -1,318 +1,318 @@ /*- * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)param.h 8.3 (Berkeley) 4/4/95 * $FreeBSD$ */ #ifndef _SYS_PARAM_H_ #define _SYS_PARAM_H_ #include #define BSD 199506 /* System version (year & month). */ #define BSD4_3 1 #define BSD4_4 1 /* * __FreeBSD_version numbers are documented in the Porter's Handbook. * If you bump the version for any reason, you should update the documentation * there. * Currently this lives here: * * doc/en_US.ISO8859-1/books/porters-handbook/book.sgml * * scheme is: Rxx * 'R' is 0 if release branch or x.0-CURRENT before RELENG_*_0 * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 800061 /* Master, propagated to newvers */ +#define __FreeBSD_version 800062 /* Master, propagated to newvers */ #ifndef LOCORE #include #endif /* * Machine-independent constants (some used in following include files). * Redefined constants are from POSIX 1003.1 limits file. * * MAXCOMLEN should be >= sizeof(ac_comm) (see ) * MAXLOGNAME should be == UT_NAMESIZE+1 (see ) */ #include #define MAXCOMLEN 19 /* max command name remembered */ #define MAXINTERP 32 /* max interpreter file name length */ #define MAXLOGNAME 17 /* max login name length (incl. NUL) */ #define MAXUPRC CHILD_MAX /* max simultaneous processes */ #define NCARGS ARG_MAX /* max bytes for an exec function */ #define NGROUPS NGROUPS_MAX /* max number groups */ #define NOFILE OPEN_MAX /* max open files per process */ #define NOGROUP 65535 /* marker for empty group set member */ #define MAXHOSTNAMELEN 256 /* max hostname size */ #define SPECNAMELEN 63 /* max length of devicename */ /* More types and definitions used throughout the kernel. */ #ifdef _KERNEL #include #include #ifndef LOCORE #include #include #endif #ifndef FALSE #define FALSE 0 #endif #ifndef TRUE #define TRUE 1 #endif #endif #ifndef _KERNEL /* Signals. */ #include #endif /* Machine type dependent parameters. */ #include #ifndef _KERNEL #include #endif #ifndef _NO_NAMESPACE_POLLUTION #ifndef DEV_BSHIFT #define DEV_BSHIFT 9 /* log2(DEV_BSIZE) */ #endif #define DEV_BSIZE (1<>PAGE_SHIFT) #endif /* * btodb() is messy and perhaps slow because `bytes' may be an off_t. We * want to shift an unsigned type to avoid sign extension and we don't * want to widen `bytes' unnecessarily. Assume that the result fits in * a daddr_t. */ #ifndef btodb #define btodb(bytes) /* calculates (bytes / DEV_BSIZE) */ \ (sizeof (bytes) > sizeof(long) \ ? (daddr_t)((unsigned long long)(bytes) >> DEV_BSHIFT) \ : (daddr_t)((unsigned long)(bytes) >> DEV_BSHIFT)) #endif #ifndef dbtob #define dbtob(db) /* calculates (db * DEV_BSIZE) */ \ ((off_t)(db) << DEV_BSHIFT) #endif #endif /* _NO_NAMESPACE_POLLUTION */ #define PRIMASK 0x0ff #define PCATCH 0x100 /* OR'd with pri for tsleep to check signals */ #define PDROP 0x200 /* OR'd with pri to stop re-entry of interlock mutex */ #define NZERO 0 /* default "nice" */ #define NBBY 8 /* number of bits in a byte */ #define NBPW sizeof(int) /* number of bytes per word (integer) */ #define CMASK 022 /* default file mask: S_IWGRP|S_IWOTH */ #define NODEV (dev_t)(-1) /* non-existent device */ #define CBLOCK 128 /* Clist block size, must be a power of 2. */ /* Data chars/clist. */ #define CBSIZE (CBLOCK - sizeof(struct cblock *)) #define CROUND (CBLOCK - 1) /* Clist rounding. */ /* * File system parameters and macros. * * MAXBSIZE - Filesystems are made out of blocks of at most MAXBSIZE bytes * per block. MAXBSIZE may be made larger without effecting * any existing filesystems as long as it does not exceed MAXPHYS, * and may be made smaller at the risk of not being able to use * filesystems which require a block size exceeding MAXBSIZE. * * BKVASIZE - Nominal buffer space per buffer, in bytes. BKVASIZE is the * minimum KVM memory reservation the kernel is willing to make. * Filesystems can of course request smaller chunks. Actual * backing memory uses a chunk size of a page (PAGE_SIZE). * * If you make BKVASIZE too small you risk seriously fragmenting * the buffer KVM map which may slow things down a bit. If you * make it too big the kernel will not be able to optimally use * the KVM memory reserved for the buffer cache and will wind * up with too-few buffers. * * The default is 16384, roughly 2x the block size used by a * normal UFS filesystem. */ #define MAXBSIZE 65536 /* must be power of 2 */ #define BKVASIZE 16384 /* must be power of 2 */ #define BKVAMASK (BKVASIZE-1) /* * MAXPATHLEN defines the longest permissible path length after expanding * symbolic links. It is used to allocate a temporary buffer from the buffer * pool in which to do the name expansion, hence should be a power of two, * and must be less than or equal to MAXBSIZE. MAXSYMLINKS defines the * maximum number of symbolic links that may be expanded in a path name. * It should be set high enough to allow all legitimate uses, but halt * infinite loops reasonably quickly. */ #define MAXPATHLEN PATH_MAX #define MAXSYMLINKS 32 /* Bit map related macros. */ #define setbit(a,i) (((unsigned char *)(a))[(i)/NBBY] |= 1<<((i)%NBBY)) #define clrbit(a,i) (((unsigned char *)(a))[(i)/NBBY] &= ~(1<<((i)%NBBY))) #define isset(a,i) \ (((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) #define isclr(a,i) \ ((((const unsigned char *)(a))[(i)/NBBY] & (1<<((i)%NBBY))) == 0) /* Macros for counting and rounding. */ #ifndef howmany #define howmany(x, y) (((x)+((y)-1))/(y)) #endif #define rounddown(x, y) (((x)/(y))*(y)) #define roundup(x, y) ((((x)+((y)-1))/(y))*(y)) /* to any y */ #define roundup2(x, y) (((x)+((y)-1))&(~((y)-1))) /* if y is powers of two */ #define powerof2(x) ((((x)-1)&(x))==0) /* Macros for min/max. */ #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) #ifdef _KERNEL /* * Basic byte order function prototypes for non-inline functions. */ #ifndef LOCORE #ifndef _BYTEORDER_PROTOTYPED #define _BYTEORDER_PROTOTYPED __BEGIN_DECLS __uint32_t htonl(__uint32_t); __uint16_t htons(__uint16_t); __uint32_t ntohl(__uint32_t); __uint16_t ntohs(__uint16_t); __END_DECLS #endif #endif #ifndef lint #ifndef _BYTEORDER_FUNC_DEFINED #define _BYTEORDER_FUNC_DEFINED #define htonl(x) __htonl(x) #define htons(x) __htons(x) #define ntohl(x) __ntohl(x) #define ntohs(x) __ntohs(x) #endif /* !_BYTEORDER_FUNC_DEFINED */ #endif /* lint */ #endif /* _KERNEL */ /* * Scale factor for scaled integers used to count %cpu time and load avgs. * * The number of CPU `tick's that map to a unique `%age' can be expressed * by the formula (1 / (2 ^ (FSHIFT - 11))). The maximum load average that * can be calculated (assuming 32 bits) can be closely approximated using * the formula (2 ^ (2 * (16 - FSHIFT))) for (FSHIFT < 15). * * For the scheduler to maintain a 1:1 mapping of CPU `tick' to `%age', * FSHIFT must be at least 11; this gives us a maximum load avg of ~1024. */ #define FSHIFT 11 /* bits to right of fixed binary point */ #define FSCALE (1<> (PAGE_SHIFT - DEV_BSHIFT)) #define ctodb(db) /* calculates pages to devblks */ \ ((db) << (PAGE_SHIFT - DEV_BSHIFT)) /* * Given the pointer x to the member m of the struct s, return * a pointer to the containing structure. */ #define member2struct(s, m, x) \ ((struct s *)(void *)((char *)(x) - offsetof(struct s, m))) #endif /* _SYS_PARAM_H_ */ diff --git a/sys/sys/types.h b/sys/sys/types.h index cf9264a599b7..66be699d6e07 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -1,361 +1,353 @@ /*- * Copyright (c) 1982, 1986, 1991, 1993, 1994 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. * All or some portions of this file are derived from material licensed * to the University of California by American Telephone and Telegraph * Co. or Unix System Laboratories, Inc. and are reproduced herein with * the permission of UNIX System Laboratories, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)types.h 8.6 (Berkeley) 2/19/95 * $FreeBSD$ */ #ifndef _SYS_TYPES_H_ #define _SYS_TYPES_H_ #include /* Machine type dependent parameters. */ #include #include #include #if __BSD_VISIBLE typedef unsigned char u_char; typedef unsigned short u_short; typedef unsigned int u_int; typedef unsigned long u_long; #ifndef _KERNEL typedef unsigned short ushort; /* Sys V compatibility */ typedef unsigned int uint; /* Sys V compatibility */ #endif #endif /* * XXX POSIX sized integrals that should appear only in . */ #ifndef _INT8_T_DECLARED typedef __int8_t int8_t; #define _INT8_T_DECLARED #endif #ifndef _INT16_T_DECLARED typedef __int16_t int16_t; #define _INT16_T_DECLARED #endif #ifndef _INT32_T_DECLARED typedef __int32_t int32_t; #define _INT32_T_DECLARED #endif #ifndef _INT64_T_DECLARED typedef __int64_t int64_t; #define _INT64_T_DECLARED #endif #ifndef _UINT8_T_DECLARED typedef __uint8_t uint8_t; #define _UINT8_T_DECLARED #endif #ifndef _UINT16_T_DECLARED typedef __uint16_t uint16_t; #define _UINT16_T_DECLARED #endif #ifndef _UINT32_T_DECLARED typedef __uint32_t uint32_t; #define _UINT32_T_DECLARED #endif #ifndef _UINT64_T_DECLARED typedef __uint64_t uint64_t; #define _UINT64_T_DECLARED #endif #ifndef _INTPTR_T_DECLARED typedef __intptr_t intptr_t; typedef __uintptr_t uintptr_t; #define _INTPTR_T_DECLARED #endif typedef __uint8_t u_int8_t; /* unsigned integrals (deprecated) */ typedef __uint16_t u_int16_t; typedef __uint32_t u_int32_t; typedef __uint64_t u_int64_t; typedef __uint64_t u_quad_t; /* quads (deprecated) */ typedef __int64_t quad_t; typedef quad_t * qaddr_t; typedef char * caddr_t; /* core address */ typedef __const char * c_caddr_t; /* core address, pointer to const */ typedef __volatile char *v_caddr_t; /* core address, pointer to volatile */ #ifndef _BLKSIZE_T_DECLARED typedef __blksize_t blksize_t; #define _BLKSIZE_T_DECLARED #endif typedef __cpuwhich_t cpuwhich_t; typedef __cpulevel_t cpulevel_t; typedef __cpusetid_t cpusetid_t; #ifndef _BLKCNT_T_DECLARED typedef __blkcnt_t blkcnt_t; #define _BLKCNT_T_DECLARED #endif #ifndef _CLOCK_T_DECLARED typedef __clock_t clock_t; #define _CLOCK_T_DECLARED #endif #ifndef _CLOCKID_T_DECLARED typedef __clockid_t clockid_t; #define _CLOCKID_T_DECLARED #endif typedef __cpumask_t cpumask_t; typedef __critical_t critical_t; /* Critical section value */ typedef __int64_t daddr_t; /* disk address */ #ifndef _DEV_T_DECLARED typedef __dev_t dev_t; /* device number or struct cdev */ #define _DEV_T_DECLARED #endif #ifndef _FFLAGS_T_DECLARED typedef __fflags_t fflags_t; /* file flags */ #define _FFLAGS_T_DECLARED #endif typedef __fixpt_t fixpt_t; /* fixed point number */ #ifndef _FSBLKCNT_T_DECLARED /* for statvfs() */ typedef __fsblkcnt_t fsblkcnt_t; typedef __fsfilcnt_t fsfilcnt_t; #define _FSBLKCNT_T_DECLARED #endif #ifndef _GID_T_DECLARED typedef __gid_t gid_t; /* group id */ #define _GID_T_DECLARED #endif #ifndef _IN_ADDR_T_DECLARED typedef __uint32_t in_addr_t; /* base type for internet address */ #define _IN_ADDR_T_DECLARED #endif #ifndef _IN_PORT_T_DECLARED typedef __uint16_t in_port_t; #define _IN_PORT_T_DECLARED #endif #ifndef _ID_T_DECLARED typedef __id_t id_t; /* can hold a uid_t or pid_t */ #define _ID_T_DECLARED #endif #ifndef _INO_T_DECLARED typedef __ino_t ino_t; /* inode number */ #define _INO_T_DECLARED #endif #ifndef _KEY_T_DECLARED typedef __key_t key_t; /* IPC key (for Sys V IPC) */ #define _KEY_T_DECLARED #endif #ifndef _LWPID_T_DECLARED typedef __lwpid_t lwpid_t; /* Thread ID (a.k.a. LWP) */ #define _LWPID_T_DECLARED #endif #ifndef _MODE_T_DECLARED typedef __mode_t mode_t; /* permissions */ #define _MODE_T_DECLARED #endif #ifndef _ACCMODE_T_DECLARED typedef __accmode_t accmode_t; /* access permissions */ #define _ACCMODE_T_DECLARED #endif #ifndef _NLINK_T_DECLARED typedef __nlink_t nlink_t; /* link count */ #define _NLINK_T_DECLARED #endif #ifndef _OFF_T_DECLARED typedef __off_t off_t; /* file offset */ #define _OFF_T_DECLARED #endif #ifndef _PID_T_DECLARED typedef __pid_t pid_t; /* process id */ #define _PID_T_DECLARED #endif typedef __register_t register_t; #ifndef _RLIM_T_DECLARED typedef __rlim_t rlim_t; /* resource limit */ #define _RLIM_T_DECLARED #endif typedef __segsz_t segsz_t; /* segment size (in pages) */ #ifndef _SIZE_T_DECLARED typedef __size_t size_t; #define _SIZE_T_DECLARED #endif #ifndef _SSIZE_T_DECLARED typedef __ssize_t ssize_t; #define _SSIZE_T_DECLARED #endif #ifndef _SUSECONDS_T_DECLARED typedef __suseconds_t suseconds_t; /* microseconds (signed) */ #define _SUSECONDS_T_DECLARED #endif #ifndef _TIME_T_DECLARED typedef __time_t time_t; #define _TIME_T_DECLARED #endif #ifndef _TIMER_T_DECLARED typedef __timer_t timer_t; #define _TIMER_T_DECLARED #endif #ifndef _MQD_T_DECLARED typedef __mqd_t mqd_t; #define _MQD_T_DECLARED #endif typedef __u_register_t u_register_t; #ifndef _UID_T_DECLARED typedef __uid_t uid_t; /* user id */ #define _UID_T_DECLARED #endif #ifndef _USECONDS_T_DECLARED typedef __useconds_t useconds_t; /* microseconds (unsigned) */ #define _USECONDS_T_DECLARED #endif typedef __vm_offset_t vm_offset_t; typedef __vm_ooffset_t vm_ooffset_t; typedef __vm_paddr_t vm_paddr_t; typedef __vm_pindex_t vm_pindex_t; typedef __vm_size_t vm_size_t; #ifdef _KERNEL typedef int boolean_t; typedef struct device *device_t; typedef __intfptr_t intfptr_t; /*- * XXX this is fixed width for historical reasons. It should have had type * __int_fast32_t. Fixed-width types should not be used unless binary * compatibility is essential. Least-width types should be used even less * since they provide smaller benefits. * XXX should be MD. * XXX this is bogus in -current, but still used for spl*(). */ typedef __uint32_t intrmask_t; /* Interrupt mask (spl, xxx_imask...) */ typedef __uintfptr_t uintfptr_t; typedef __uint64_t uoff_t; typedef struct vm_page *vm_page_t; #define offsetof(type, field) __offsetof(type, field) #endif /* !_KERNEL */ /* * The following are all things that really shouldn't exist in this header, * since its purpose is to provide typedefs, not miscellaneous doodads. */ #if __BSD_VISIBLE #include /* * minor() gives a cookie instead of an index since we don't want to * change the meanings of bits 0-15 or waste time and space shifting * bits 16-31 for devices that don't use them. - * - * XXX: In the kernel we must name it umajor() and uminor(), because - * minor() is still in use by . */ -#ifdef _KERNEL -#define umajor(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */ -#define uminor(x) ((int)((x)&0xffff00ff)) /* minor number */ -#else /* !_KERNEL */ -#define major(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */ +#define major(x) ((int)(((u_int)(x) >> 8)&0xff)) /* major number */ #define minor(x) ((int)((x)&0xffff00ff)) /* minor number */ -#endif /* _KERNEL */ #define makedev(x,y) ((dev_t)(((x) << 8) | (y))) /* create dev_t */ /* * These declarations belong elsewhere, but are repeated here and in * to give broken programs a better chance of working with * 64-bit off_t's. */ #ifndef _KERNEL __BEGIN_DECLS #ifndef _FTRUNCATE_DECLARED #define _FTRUNCATE_DECLARED int ftruncate(int, off_t); #endif #ifndef _LSEEK_DECLARED #define _LSEEK_DECLARED off_t lseek(int, off_t, int); #endif #ifndef _MMAP_DECLARED #define _MMAP_DECLARED void * mmap(void *, size_t, int, int, int, off_t); #endif #ifndef _TRUNCATE_DECLARED #define _TRUNCATE_DECLARED int truncate(const char *, off_t); #endif __END_DECLS #endif /* !_KERNEL */ #endif /* __BSD_VISIBLE */ #endif /* !_SYS_TYPES_H_ */