Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h =================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h (revision 324004) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/zfs_znode.h (revision 324005) @@ -1,368 +1,368 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ #ifndef _SYS_FS_ZFS_ZNODE_H #define _SYS_FS_ZFS_ZNODE_H #ifdef _KERNEL #include #include #include #include #include #include #include #endif #include #include #ifdef __cplusplus extern "C" { #endif /* * Additional file level attributes, that are stored * in the upper half of zp_flags */ #define ZFS_READONLY 0x0000000100000000 #define ZFS_HIDDEN 0x0000000200000000 #define ZFS_SYSTEM 0x0000000400000000 #define ZFS_ARCHIVE 0x0000000800000000 #define ZFS_IMMUTABLE 0x0000001000000000 #define ZFS_NOUNLINK 0x0000002000000000 #define ZFS_APPENDONLY 0x0000004000000000 #define ZFS_NODUMP 0x0000008000000000 #define ZFS_OPAQUE 0x0000010000000000 #define ZFS_AV_QUARANTINED 0x0000020000000000 #define ZFS_AV_MODIFIED 0x0000040000000000 #define ZFS_REPARSE 0x0000080000000000 #define ZFS_OFFLINE 0x0000100000000000 #define ZFS_SPARSE 0x0000200000000000 #define ZFS_ATTR_SET(zp, attr, value, pflags, tx) \ { \ if (value) \ pflags |= attr; \ else \ pflags &= ~attr; \ VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_FLAGS(zp->z_zfsvfs), \ &pflags, sizeof (pflags), tx)); \ } /* * Define special zfs pflags */ #define ZFS_XATTR 0x1 /* is an extended attribute */ #define ZFS_INHERIT_ACE 0x2 /* ace has inheritable ACEs */ #define ZFS_ACL_TRIVIAL 0x4 /* files ACL is trivial */ #define ZFS_ACL_OBJ_ACE 0x8 /* ACL has CMPLX Object ACE */ #define ZFS_ACL_PROTECTED 0x10 /* ACL protected */ #define ZFS_ACL_DEFAULTED 0x20 /* ACL should be defaulted */ #define ZFS_ACL_AUTO_INHERIT 0x40 /* ACL should be inherited */ #define ZFS_BONUS_SCANSTAMP 0x80 /* Scanstamp in bonus area */ #define ZFS_NO_EXECS_DENIED 0x100 /* exec was given to everyone */ #define SA_ZPL_ATIME(z) z->z_attr_table[ZPL_ATIME] #define SA_ZPL_MTIME(z) z->z_attr_table[ZPL_MTIME] #define SA_ZPL_CTIME(z) z->z_attr_table[ZPL_CTIME] #define SA_ZPL_CRTIME(z) z->z_attr_table[ZPL_CRTIME] #define SA_ZPL_GEN(z) z->z_attr_table[ZPL_GEN] #define SA_ZPL_DACL_ACES(z) z->z_attr_table[ZPL_DACL_ACES] #define SA_ZPL_XATTR(z) z->z_attr_table[ZPL_XATTR] #define SA_ZPL_SYMLINK(z) z->z_attr_table[ZPL_SYMLINK] #define SA_ZPL_RDEV(z) z->z_attr_table[ZPL_RDEV] #define SA_ZPL_SCANSTAMP(z) z->z_attr_table[ZPL_SCANSTAMP] #define SA_ZPL_UID(z) z->z_attr_table[ZPL_UID] #define SA_ZPL_GID(z) z->z_attr_table[ZPL_GID] #define SA_ZPL_PARENT(z) z->z_attr_table[ZPL_PARENT] #define SA_ZPL_LINKS(z) z->z_attr_table[ZPL_LINKS] #define SA_ZPL_MODE(z) z->z_attr_table[ZPL_MODE] #define SA_ZPL_DACL_COUNT(z) z->z_attr_table[ZPL_DACL_COUNT] #define SA_ZPL_FLAGS(z) z->z_attr_table[ZPL_FLAGS] #define SA_ZPL_SIZE(z) z->z_attr_table[ZPL_SIZE] #define SA_ZPL_ZNODE_ACL(z) z->z_attr_table[ZPL_ZNODE_ACL] #define SA_ZPL_PAD(z) z->z_attr_table[ZPL_PAD] /* * Is ID ephemeral? */ #define IS_EPHEMERAL(x) (x > MAXUID) /* * Should we use FUIDs? */ #define USE_FUIDS(version, os) (version >= ZPL_VERSION_FUID && \ spa_version(dmu_objset_spa(os)) >= SPA_VERSION_FUID) #define USE_SA(version, os) (version >= ZPL_VERSION_SA && \ spa_version(dmu_objset_spa(os)) >= SPA_VERSION_SA) #define MASTER_NODE_OBJ 1 /* * Special attributes for master node. * "userquota@" and "groupquota@" are also valid (from * zfs_userquota_prop_prefixes[]). */ #define ZFS_FSID "FSID" #define ZFS_UNLINKED_SET "DELETE_QUEUE" #define ZFS_ROOT_OBJ "ROOT" #define ZPL_VERSION_STR "VERSION" #define ZFS_FUID_TABLES "FUID" #define ZFS_SHARES_DIR "SHARES" #define ZFS_SA_ATTRS "SA_ATTRS" /* * Convert mode bits (zp_mode) to BSD-style DT_* values for storing in * the directory entries. */ #ifndef IFTODT #define IFTODT(mode) (((mode) & S_IFMT) >> 12) #endif /* * The directory entry has the type (currently unused on Solaris) in the * top 4 bits, and the object number in the low 48 bits. The "middle" * 12 bits are unused. */ #define ZFS_DIRENT_TYPE(de) BF64_GET(de, 60, 4) #define ZFS_DIRENT_OBJ(de) BF64_GET(de, 0, 48) /* * Directory entry locks control access to directory entries. * They are used to protect creates, deletes, and renames. * Each directory znode has a mutex and a list of locked names. */ #ifdef _KERNEL typedef struct zfs_dirlock { char *dl_name; /* directory entry being locked */ uint32_t dl_sharecnt; /* 0 if exclusive, > 0 if shared */ uint8_t dl_namelock; /* 1 if z_name_lock is NOT held */ uint16_t dl_namesize; /* set if dl_name was allocated */ kcondvar_t dl_cv; /* wait for entry to be unlocked */ struct znode *dl_dzp; /* directory znode */ struct zfs_dirlock *dl_next; /* next in z_dirlocks list */ } zfs_dirlock_t; typedef struct znode { struct zfsvfs *z_zfsvfs; vnode_t *z_vnode; uint64_t z_id; /* object ID for this znode */ #ifdef illumos kmutex_t z_lock; /* znode modification lock */ krwlock_t z_parent_lock; /* parent lock for directories */ krwlock_t z_name_lock; /* "master" lock for dirent locks */ zfs_dirlock_t *z_dirlocks; /* directory entry lock list */ #endif kmutex_t z_range_lock; /* protects changes to z_range_avl */ avl_tree_t z_range_avl; /* avl tree of file range locks */ uint8_t z_unlinked; /* file has been unlinked */ uint8_t z_atime_dirty; /* atime needs to be synced */ uint8_t z_zn_prefetch; /* Prefetch znodes? */ uint8_t z_moved; /* Has this znode been moved? */ uint_t z_blksz; /* block size in bytes */ uint_t z_seq; /* modification sequence number */ uint64_t z_mapcnt; /* number of pages mapped to file */ uint64_t z_gen; /* generation (cached) */ uint64_t z_size; /* file size (cached) */ uint64_t z_atime[2]; /* atime (cached) */ uint64_t z_links; /* file links (cached) */ uint64_t z_pflags; /* pflags (cached) */ uint64_t z_uid; /* uid fuid (cached) */ uint64_t z_gid; /* gid fuid (cached) */ mode_t z_mode; /* mode (cached) */ uint32_t z_sync_cnt; /* synchronous open count */ kmutex_t z_acl_lock; /* acl data lock */ zfs_acl_t *z_acl_cached; /* cached acl */ list_node_t z_link_node; /* all znodes in fs link */ sa_handle_t *z_sa_hdl; /* handle to sa data */ boolean_t z_is_sa; /* are we native sa? */ } znode_t; /* * Range locking rules * -------------------- * 1. When truncating a file (zfs_create, zfs_setattr, zfs_space) the whole * file range needs to be locked as RL_WRITER. Only then can the pages be * freed etc and zp_size reset. zp_size must be set within range lock. * 2. For writes and punching holes (zfs_write & zfs_space) just the range * being written or freed needs to be locked as RL_WRITER. * Multiple writes at the end of the file must coordinate zp_size updates * to ensure data isn't lost. A compare and swap loop is currently used * to ensure the file size is at least the offset last written. * 3. For reads (zfs_read, zfs_get_data & zfs_putapage) just the range being * read needs to be locked as RL_READER. A check against zp_size can then * be made for reading beyond end of file. */ /* * Convert between znode pointers and vnode pointers */ #ifdef DEBUG static __inline vnode_t * ZTOV(znode_t *zp) { vnode_t *vp = zp->z_vnode; - ASSERT(vp == NULL || vp->v_data == NULL || vp->v_data == zp); + ASSERT(vp != NULL && vp->v_data == zp); return (vp); } static __inline znode_t * VTOZ(vnode_t *vp) { znode_t *zp = (znode_t *)vp->v_data; - ASSERT(zp == NULL || zp->z_vnode == NULL || zp->z_vnode == vp); + ASSERT(zp != NULL && zp->z_vnode == vp); return (zp); } #else #define ZTOV(ZP) ((ZP)->z_vnode) #define VTOZ(VP) ((znode_t *)(VP)->v_data) #endif /* Called on entry to each ZFS vnode and vfs operation */ #define ZFS_ENTER(zfsvfs) \ { \ rrm_enter_read(&(zfsvfs)->z_teardown_lock, FTAG); \ if ((zfsvfs)->z_unmounted) { \ ZFS_EXIT(zfsvfs); \ return (EIO); \ } \ } /* Must be called before exiting the vop */ #define ZFS_EXIT(zfsvfs) rrm_exit(&(zfsvfs)->z_teardown_lock, FTAG) /* Verifies the znode is valid */ #define ZFS_VERIFY_ZP(zp) \ if ((zp)->z_sa_hdl == NULL) { \ ZFS_EXIT((zp)->z_zfsvfs); \ return (EIO); \ } \ /* * Macros for dealing with dmu_buf_hold */ #define ZFS_OBJ_HASH(obj_num) ((obj_num) & (ZFS_OBJ_MTX_SZ - 1)) #define ZFS_OBJ_MUTEX(zfsvfs, obj_num) \ (&(zfsvfs)->z_hold_mtx[ZFS_OBJ_HASH(obj_num)]) #define ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num) \ mutex_enter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) #define ZFS_OBJ_HOLD_TRYENTER(zfsvfs, obj_num) \ mutex_tryenter(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) #define ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num) \ mutex_exit(ZFS_OBJ_MUTEX((zfsvfs), (obj_num))) /* Encode ZFS stored time values from a struct timespec */ #define ZFS_TIME_ENCODE(tp, stmp) \ { \ (stmp)[0] = (uint64_t)(tp)->tv_sec; \ (stmp)[1] = (uint64_t)(tp)->tv_nsec; \ } /* Decode ZFS stored time values to a struct timespec */ #define ZFS_TIME_DECODE(tp, stmp) \ { \ (tp)->tv_sec = (time_t)(stmp)[0]; \ (tp)->tv_nsec = (long)(stmp)[1]; \ } /* * Timestamp defines */ #define ACCESSED (AT_ATIME) #define STATE_CHANGED (AT_CTIME) #define CONTENT_MODIFIED (AT_MTIME | AT_CTIME) #define ZFS_ACCESSTIME_STAMP(zfsvfs, zp) \ if ((zfsvfs)->z_atime && !((zfsvfs)->z_vfs->vfs_flag & VFS_RDONLY)) \ zfs_tstamp_update_setup(zp, ACCESSED, NULL, NULL, B_FALSE); extern int zfs_init_fs(zfsvfs_t *, znode_t **); extern void zfs_set_dataprop(objset_t *); extern void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *, dmu_tx_t *tx); extern void zfs_tstamp_update_setup(znode_t *, uint_t, uint64_t [2], uint64_t [2], boolean_t); extern void zfs_grow_blocksize(znode_t *, uint64_t, dmu_tx_t *); extern int zfs_freesp(znode_t *, uint64_t, uint64_t, int, boolean_t); extern void zfs_znode_init(void); extern void zfs_znode_fini(void); extern int zfs_zget(zfsvfs_t *, uint64_t, znode_t **); extern int zfs_rezget(znode_t *); extern void zfs_zinactive(znode_t *); extern void zfs_znode_delete(znode_t *, dmu_tx_t *); extern void zfs_znode_free(znode_t *); extern void zfs_remove_op_tables(); extern int zfs_create_op_tables(); extern dev_t zfs_cmpldev(uint64_t); extern int zfs_get_zplprop(objset_t *os, zfs_prop_t prop, uint64_t *value); extern int zfs_get_stats(objset_t *os, nvlist_t *nv); extern void zfs_znode_dmu_fini(znode_t *); extern void zfs_log_create(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name, vsecattr_t *, zfs_fuid_info_t *, vattr_t *vap); extern int zfs_log_create_txtype(zil_create_t, vsecattr_t *vsecp, vattr_t *vap); extern void zfs_log_remove(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, char *name, uint64_t foid); #define ZFS_NO_OBJECT 0 /* no object id */ extern void zfs_log_link(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name); extern void zfs_log_symlink(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *dzp, znode_t *zp, char *name, char *link); extern void zfs_log_rename(zilog_t *zilog, dmu_tx_t *tx, uint64_t txtype, znode_t *sdzp, char *sname, znode_t *tdzp, char *dname, znode_t *szp); extern void zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, offset_t off, ssize_t len, int ioflag); extern void zfs_log_truncate(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, uint64_t off, uint64_t len); extern void zfs_log_setattr(zilog_t *zilog, dmu_tx_t *tx, int txtype, znode_t *zp, vattr_t *vap, uint_t mask_applied, zfs_fuid_info_t *fuidp); #ifndef ZFS_NO_ACL extern void zfs_log_acl(zilog_t *zilog, dmu_tx_t *tx, znode_t *zp, vsecattr_t *vsecp, zfs_fuid_info_t *fuidp); #endif extern void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx); extern void zfs_upgrade(zfsvfs_t *zfsvfs, dmu_tx_t *tx); extern int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx); extern zil_get_data_t zfs_get_data; extern zil_replay_func_t *zfs_replay_vector[TX_MAX_TYPE]; extern int zfsfstype; extern int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf); #endif /* _KERNEL */ extern int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len); #ifdef __cplusplus } #endif #endif /* _SYS_FS_ZFS_ZNODE_H */ Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c =================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c (revision 324004) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_acl.c (revision 324005) @@ -1,2710 +1,2713 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define ALLOW ACE_ACCESS_ALLOWED_ACE_TYPE #define DENY ACE_ACCESS_DENIED_ACE_TYPE #define MAX_ACE_TYPE ACE_SYSTEM_ALARM_CALLBACK_OBJECT_ACE_TYPE #define MIN_ACE_TYPE ALLOW #define OWNING_GROUP (ACE_GROUP|ACE_IDENTIFIER_GROUP) #define EVERYONE_ALLOW_MASK (ACE_READ_ACL|ACE_READ_ATTRIBUTES | \ ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE) #define EVERYONE_DENY_MASK (ACE_WRITE_ACL|ACE_WRITE_OWNER | \ ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS) #define OWNER_ALLOW_MASK (ACE_WRITE_ACL | ACE_WRITE_OWNER | \ ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS) #define ZFS_CHECKED_MASKS (ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_DATA| \ ACE_READ_NAMED_ATTRS|ACE_WRITE_DATA|ACE_WRITE_ATTRIBUTES| \ ACE_WRITE_NAMED_ATTRS|ACE_APPEND_DATA|ACE_EXECUTE|ACE_WRITE_OWNER| \ ACE_WRITE_ACL|ACE_DELETE|ACE_DELETE_CHILD|ACE_SYNCHRONIZE) #define WRITE_MASK_DATA (ACE_WRITE_DATA|ACE_APPEND_DATA|ACE_WRITE_NAMED_ATTRS) #define WRITE_MASK_ATTRS (ACE_WRITE_ACL|ACE_WRITE_OWNER|ACE_WRITE_ATTRIBUTES| \ ACE_DELETE|ACE_DELETE_CHILD) #define WRITE_MASK (WRITE_MASK_DATA|WRITE_MASK_ATTRS) #define OGE_CLEAR (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE) #define OKAY_MASK_BITS (ACE_READ_DATA|ACE_LIST_DIRECTORY|ACE_WRITE_DATA| \ ACE_ADD_FILE|ACE_APPEND_DATA|ACE_ADD_SUBDIRECTORY|ACE_EXECUTE) #define ALL_INHERIT (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE | \ ACE_NO_PROPAGATE_INHERIT_ACE|ACE_INHERIT_ONLY_ACE|ACE_INHERITED_ACE) #define RESTRICTED_CLEAR (ACE_WRITE_ACL|ACE_WRITE_OWNER) #define V4_ACL_WIDE_FLAGS (ZFS_ACL_AUTO_INHERIT|ZFS_ACL_DEFAULTED|\ ZFS_ACL_PROTECTED) #define ZFS_ACL_WIDE_FLAGS (V4_ACL_WIDE_FLAGS|ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|\ ZFS_ACL_OBJ_ACE) #define ALL_MODE_EXECS (S_IXUSR | S_IXGRP | S_IXOTH) static uint16_t zfs_ace_v0_get_type(void *acep) { return (((zfs_oldace_t *)acep)->z_type); } static uint16_t zfs_ace_v0_get_flags(void *acep) { return (((zfs_oldace_t *)acep)->z_flags); } static uint32_t zfs_ace_v0_get_mask(void *acep) { return (((zfs_oldace_t *)acep)->z_access_mask); } static uint64_t zfs_ace_v0_get_who(void *acep) { return (((zfs_oldace_t *)acep)->z_fuid); } static void zfs_ace_v0_set_type(void *acep, uint16_t type) { ((zfs_oldace_t *)acep)->z_type = type; } static void zfs_ace_v0_set_flags(void *acep, uint16_t flags) { ((zfs_oldace_t *)acep)->z_flags = flags; } static void zfs_ace_v0_set_mask(void *acep, uint32_t mask) { ((zfs_oldace_t *)acep)->z_access_mask = mask; } static void zfs_ace_v0_set_who(void *acep, uint64_t who) { ((zfs_oldace_t *)acep)->z_fuid = who; } /*ARGSUSED*/ static size_t zfs_ace_v0_size(void *acep) { return (sizeof (zfs_oldace_t)); } static size_t zfs_ace_v0_abstract_size(void) { return (sizeof (zfs_oldace_t)); } static int zfs_ace_v0_mask_off(void) { return (offsetof(zfs_oldace_t, z_access_mask)); } /*ARGSUSED*/ static int zfs_ace_v0_data(void *acep, void **datap) { *datap = NULL; return (0); } static acl_ops_t zfs_acl_v0_ops = { zfs_ace_v0_get_mask, zfs_ace_v0_set_mask, zfs_ace_v0_get_flags, zfs_ace_v0_set_flags, zfs_ace_v0_get_type, zfs_ace_v0_set_type, zfs_ace_v0_get_who, zfs_ace_v0_set_who, zfs_ace_v0_size, zfs_ace_v0_abstract_size, zfs_ace_v0_mask_off, zfs_ace_v0_data }; static uint16_t zfs_ace_fuid_get_type(void *acep) { return (((zfs_ace_hdr_t *)acep)->z_type); } static uint16_t zfs_ace_fuid_get_flags(void *acep) { return (((zfs_ace_hdr_t *)acep)->z_flags); } static uint32_t zfs_ace_fuid_get_mask(void *acep) { return (((zfs_ace_hdr_t *)acep)->z_access_mask); } static uint64_t zfs_ace_fuid_get_who(void *args) { uint16_t entry_type; zfs_ace_t *acep = args; entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS; if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || entry_type == ACE_EVERYONE) return (-1); return (((zfs_ace_t *)acep)->z_fuid); } static void zfs_ace_fuid_set_type(void *acep, uint16_t type) { ((zfs_ace_hdr_t *)acep)->z_type = type; } static void zfs_ace_fuid_set_flags(void *acep, uint16_t flags) { ((zfs_ace_hdr_t *)acep)->z_flags = flags; } static void zfs_ace_fuid_set_mask(void *acep, uint32_t mask) { ((zfs_ace_hdr_t *)acep)->z_access_mask = mask; } static void zfs_ace_fuid_set_who(void *arg, uint64_t who) { zfs_ace_t *acep = arg; uint16_t entry_type = acep->z_hdr.z_flags & ACE_TYPE_FLAGS; if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || entry_type == ACE_EVERYONE) return; acep->z_fuid = who; } static size_t zfs_ace_fuid_size(void *acep) { zfs_ace_hdr_t *zacep = acep; uint16_t entry_type; switch (zacep->z_type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: return (sizeof (zfs_object_ace_t)); case ALLOW: case DENY: entry_type = (((zfs_ace_hdr_t *)acep)->z_flags & ACE_TYPE_FLAGS); if (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || entry_type == ACE_EVERYONE) return (sizeof (zfs_ace_hdr_t)); /*FALLTHROUGH*/ default: return (sizeof (zfs_ace_t)); } } static size_t zfs_ace_fuid_abstract_size(void) { return (sizeof (zfs_ace_hdr_t)); } static int zfs_ace_fuid_mask_off(void) { return (offsetof(zfs_ace_hdr_t, z_access_mask)); } static int zfs_ace_fuid_data(void *acep, void **datap) { zfs_ace_t *zacep = acep; zfs_object_ace_t *zobjp; switch (zacep->z_hdr.z_type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: zobjp = acep; *datap = (caddr_t)zobjp + sizeof (zfs_ace_t); return (sizeof (zfs_object_ace_t) - sizeof (zfs_ace_t)); default: *datap = NULL; return (0); } } static acl_ops_t zfs_acl_fuid_ops = { zfs_ace_fuid_get_mask, zfs_ace_fuid_set_mask, zfs_ace_fuid_get_flags, zfs_ace_fuid_set_flags, zfs_ace_fuid_get_type, zfs_ace_fuid_set_type, zfs_ace_fuid_get_who, zfs_ace_fuid_set_who, zfs_ace_fuid_size, zfs_ace_fuid_abstract_size, zfs_ace_fuid_mask_off, zfs_ace_fuid_data }; /* * The following three functions are provided for compatibility with * older ZPL version in order to determine if the file use to have * an external ACL and what version of ACL previously existed on the * file. Would really be nice to not need this, sigh. */ uint64_t zfs_external_acl(znode_t *zp) { zfs_acl_phys_t acl_phys; int error; if (zp->z_is_sa) return (0); /* * Need to deal with a potential * race where zfs_sa_upgrade could cause * z_isa_sa to change. * * If the lookup fails then the state of z_is_sa should have * changed. */ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), &acl_phys, sizeof (acl_phys))) == 0) return (acl_phys.z_acl_extern_obj); else { /* * after upgrade the SA_ZPL_ZNODE_ACL should have been * removed */ VERIFY(zp->z_is_sa && error == ENOENT); return (0); } } /* * Determine size of ACL in bytes * * This is more complicated than it should be since we have to deal * with old external ACLs. */ static int zfs_acl_znode_info(znode_t *zp, int *aclsize, int *aclcount, zfs_acl_phys_t *aclphys) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t acl_count; int size; int error; ASSERT(MUTEX_HELD(&zp->z_acl_lock)); if (zp->z_is_sa) { if ((error = sa_size(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zfsvfs), &size)) != 0) return (error); *aclsize = size; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_COUNT(zfsvfs), &acl_count, sizeof (acl_count))) != 0) return (error); *aclcount = acl_count; } else { if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), aclphys, sizeof (*aclphys))) != 0) return (error); if (aclphys->z_acl_version == ZFS_ACL_VERSION_INITIAL) { *aclsize = ZFS_ACL_SIZE(aclphys->z_acl_size); *aclcount = aclphys->z_acl_size; } else { *aclsize = aclphys->z_acl_size; *aclcount = aclphys->z_acl_count; } } return (0); } int zfs_znode_acl_version(znode_t *zp) { zfs_acl_phys_t acl_phys; if (zp->z_is_sa) return (ZFS_ACL_VERSION_FUID); else { int error; /* * Need to deal with a potential * race where zfs_sa_upgrade could cause * z_isa_sa to change. * * If the lookup fails then the state of z_is_sa should have * changed. */ if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zp->z_zfsvfs), &acl_phys, sizeof (acl_phys))) == 0) return (acl_phys.z_acl_version); else { /* * After upgrade SA_ZPL_ZNODE_ACL should have * been removed. */ VERIFY(zp->z_is_sa && error == ENOENT); return (ZFS_ACL_VERSION_FUID); } } } static int zfs_acl_version(int version) { if (version < ZPL_VERSION_FUID) return (ZFS_ACL_VERSION_INITIAL); else return (ZFS_ACL_VERSION_FUID); } static int zfs_acl_version_zp(znode_t *zp) { return (zfs_acl_version(zp->z_zfsvfs->z_version)); } zfs_acl_t * zfs_acl_alloc(int vers) { zfs_acl_t *aclp; aclp = kmem_zalloc(sizeof (zfs_acl_t), KM_SLEEP); list_create(&aclp->z_acl, sizeof (zfs_acl_node_t), offsetof(zfs_acl_node_t, z_next)); aclp->z_version = vers; if (vers == ZFS_ACL_VERSION_FUID) aclp->z_ops = zfs_acl_fuid_ops; else aclp->z_ops = zfs_acl_v0_ops; return (aclp); } zfs_acl_node_t * zfs_acl_node_alloc(size_t bytes) { zfs_acl_node_t *aclnode; aclnode = kmem_zalloc(sizeof (zfs_acl_node_t), KM_SLEEP); if (bytes) { aclnode->z_acldata = kmem_alloc(bytes, KM_SLEEP); aclnode->z_allocdata = aclnode->z_acldata; aclnode->z_allocsize = bytes; aclnode->z_size = bytes; } return (aclnode); } static void zfs_acl_node_free(zfs_acl_node_t *aclnode) { if (aclnode->z_allocsize) kmem_free(aclnode->z_allocdata, aclnode->z_allocsize); kmem_free(aclnode, sizeof (zfs_acl_node_t)); } static void zfs_acl_release_nodes(zfs_acl_t *aclp) { zfs_acl_node_t *aclnode; while (aclnode = list_head(&aclp->z_acl)) { list_remove(&aclp->z_acl, aclnode); zfs_acl_node_free(aclnode); } aclp->z_acl_count = 0; aclp->z_acl_bytes = 0; } void zfs_acl_free(zfs_acl_t *aclp) { zfs_acl_release_nodes(aclp); list_destroy(&aclp->z_acl); kmem_free(aclp, sizeof (zfs_acl_t)); } static boolean_t zfs_acl_valid_ace_type(uint_t type, uint_t flags) { uint16_t entry_type; switch (type) { case ALLOW: case DENY: case ACE_SYSTEM_AUDIT_ACE_TYPE: case ACE_SYSTEM_ALARM_ACE_TYPE: entry_type = flags & ACE_TYPE_FLAGS; return (entry_type == ACE_OWNER || entry_type == OWNING_GROUP || entry_type == ACE_EVERYONE || entry_type == 0 || entry_type == ACE_IDENTIFIER_GROUP); default: if (type >= MIN_ACE_TYPE && type <= MAX_ACE_TYPE) return (B_TRUE); } return (B_FALSE); } static boolean_t zfs_ace_valid(vtype_t obj_type, zfs_acl_t *aclp, uint16_t type, uint16_t iflags) { /* * first check type of entry */ if (!zfs_acl_valid_ace_type(type, iflags)) return (B_FALSE); switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: if (aclp->z_version < ZFS_ACL_VERSION_FUID) return (B_FALSE); aclp->z_hints |= ZFS_ACL_OBJ_ACE; } /* * next check inheritance level flags */ if (obj_type == VDIR && (iflags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))) aclp->z_hints |= ZFS_INHERIT_ACE; if (iflags & (ACE_INHERIT_ONLY_ACE|ACE_NO_PROPAGATE_INHERIT_ACE)) { if ((iflags & (ACE_FILE_INHERIT_ACE| ACE_DIRECTORY_INHERIT_ACE)) == 0) { return (B_FALSE); } } return (B_TRUE); } static void * zfs_acl_next_ace(zfs_acl_t *aclp, void *start, uint64_t *who, uint32_t *access_mask, uint16_t *iflags, uint16_t *type) { zfs_acl_node_t *aclnode; ASSERT(aclp); if (start == NULL) { aclnode = list_head(&aclp->z_acl); if (aclnode == NULL) return (NULL); aclp->z_next_ace = aclnode->z_acldata; aclp->z_curr_node = aclnode; aclnode->z_ace_idx = 0; } aclnode = aclp->z_curr_node; if (aclnode == NULL) return (NULL); if (aclnode->z_ace_idx >= aclnode->z_ace_count) { aclnode = list_next(&aclp->z_acl, aclnode); if (aclnode == NULL) return (NULL); else { aclp->z_curr_node = aclnode; aclnode->z_ace_idx = 0; aclp->z_next_ace = aclnode->z_acldata; } } if (aclnode->z_ace_idx < aclnode->z_ace_count) { void *acep = aclp->z_next_ace; size_t ace_size; /* * Make sure we don't overstep our bounds */ ace_size = aclp->z_ops.ace_size(acep); if (((caddr_t)acep + ace_size) > ((caddr_t)aclnode->z_acldata + aclnode->z_size)) { return (NULL); } *iflags = aclp->z_ops.ace_flags_get(acep); *type = aclp->z_ops.ace_type_get(acep); *access_mask = aclp->z_ops.ace_mask_get(acep); *who = aclp->z_ops.ace_who_get(acep); aclp->z_next_ace = (caddr_t)aclp->z_next_ace + ace_size; aclnode->z_ace_idx++; return ((void *)acep); } return (NULL); } /*ARGSUSED*/ static uint64_t zfs_ace_walk(void *datap, uint64_t cookie, int aclcnt, uint16_t *flags, uint16_t *type, uint32_t *mask) { zfs_acl_t *aclp = datap; zfs_ace_hdr_t *acep = (zfs_ace_hdr_t *)(uintptr_t)cookie; uint64_t who; acep = zfs_acl_next_ace(aclp, acep, &who, mask, flags, type); return ((uint64_t)(uintptr_t)acep); } static zfs_acl_node_t * zfs_acl_curr_node(zfs_acl_t *aclp) { ASSERT(aclp->z_curr_node); return (aclp->z_curr_node); } /* * Copy ACE to internal ZFS format. * While processing the ACL each ACE will be validated for correctness. * ACE FUIDs will be created later. */ int zfs_copy_ace_2_fuid(zfsvfs_t *zfsvfs, vtype_t obj_type, zfs_acl_t *aclp, void *datap, zfs_ace_t *z_acl, uint64_t aclcnt, size_t *size, zfs_fuid_info_t **fuidp, cred_t *cr) { int i; uint16_t entry_type; zfs_ace_t *aceptr = z_acl; ace_t *acep = datap; zfs_object_ace_t *zobjacep; ace_object_t *aceobjp; for (i = 0; i != aclcnt; i++) { aceptr->z_hdr.z_access_mask = acep->a_access_mask; aceptr->z_hdr.z_flags = acep->a_flags; aceptr->z_hdr.z_type = acep->a_type; entry_type = aceptr->z_hdr.z_flags & ACE_TYPE_FLAGS; if (entry_type != ACE_OWNER && entry_type != OWNING_GROUP && entry_type != ACE_EVERYONE) { aceptr->z_fuid = zfs_fuid_create(zfsvfs, acep->a_who, cr, (entry_type == 0) ? ZFS_ACE_USER : ZFS_ACE_GROUP, fuidp); } /* * Make sure ACE is valid */ if (zfs_ace_valid(obj_type, aclp, aceptr->z_hdr.z_type, aceptr->z_hdr.z_flags) != B_TRUE) return (SET_ERROR(EINVAL)); switch (acep->a_type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: zobjacep = (zfs_object_ace_t *)aceptr; aceobjp = (ace_object_t *)acep; bcopy(aceobjp->a_obj_type, zobjacep->z_object_type, sizeof (aceobjp->a_obj_type)); bcopy(aceobjp->a_inherit_obj_type, zobjacep->z_inherit_type, sizeof (aceobjp->a_inherit_obj_type)); acep = (ace_t *)((caddr_t)acep + sizeof (ace_object_t)); break; default: acep = (ace_t *)((caddr_t)acep + sizeof (ace_t)); } aceptr = (zfs_ace_t *)((caddr_t)aceptr + aclp->z_ops.ace_size(aceptr)); } *size = (caddr_t)aceptr - (caddr_t)z_acl; return (0); } /* * Copy ZFS ACEs to fixed size ace_t layout */ static void zfs_copy_fuid_2_ace(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, cred_t *cr, void *datap, int filter) { uint64_t who; uint32_t access_mask; uint16_t iflags, type; zfs_ace_hdr_t *zacep = NULL; ace_t *acep = datap; ace_object_t *objacep; zfs_object_ace_t *zobjacep; size_t ace_size; uint16_t entry_type; while (zacep = zfs_acl_next_ace(aclp, zacep, &who, &access_mask, &iflags, &type)) { switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: if (filter) { continue; } zobjacep = (zfs_object_ace_t *)zacep; objacep = (ace_object_t *)acep; bcopy(zobjacep->z_object_type, objacep->a_obj_type, sizeof (zobjacep->z_object_type)); bcopy(zobjacep->z_inherit_type, objacep->a_inherit_obj_type, sizeof (zobjacep->z_inherit_type)); ace_size = sizeof (ace_object_t); break; default: ace_size = sizeof (ace_t); break; } entry_type = (iflags & ACE_TYPE_FLAGS); if ((entry_type != ACE_OWNER && entry_type != OWNING_GROUP && entry_type != ACE_EVERYONE)) { acep->a_who = zfs_fuid_map_id(zfsvfs, who, cr, (entry_type & ACE_IDENTIFIER_GROUP) ? ZFS_ACE_GROUP : ZFS_ACE_USER); } else { acep->a_who = (uid_t)(int64_t)who; } acep->a_access_mask = access_mask; acep->a_flags = iflags; acep->a_type = type; acep = (ace_t *)((caddr_t)acep + ace_size); } } static int zfs_copy_ace_2_oldace(vtype_t obj_type, zfs_acl_t *aclp, ace_t *acep, zfs_oldace_t *z_acl, int aclcnt, size_t *size) { int i; zfs_oldace_t *aceptr = z_acl; for (i = 0; i != aclcnt; i++, aceptr++) { aceptr->z_access_mask = acep[i].a_access_mask; aceptr->z_type = acep[i].a_type; aceptr->z_flags = acep[i].a_flags; aceptr->z_fuid = acep[i].a_who; /* * Make sure ACE is valid */ if (zfs_ace_valid(obj_type, aclp, aceptr->z_type, aceptr->z_flags) != B_TRUE) return (SET_ERROR(EINVAL)); } *size = (caddr_t)aceptr - (caddr_t)z_acl; return (0); } /* * convert old ACL format to new */ void zfs_acl_xform(znode_t *zp, zfs_acl_t *aclp, cred_t *cr) { zfs_oldace_t *oldaclp; int i; uint16_t type, iflags; uint32_t access_mask; uint64_t who; void *cookie = NULL; zfs_acl_node_t *newaclnode; ASSERT(aclp->z_version == ZFS_ACL_VERSION_INITIAL); /* * First create the ACE in a contiguous piece of memory * for zfs_copy_ace_2_fuid(). * * We only convert an ACL once, so this won't happen * everytime. */ oldaclp = kmem_alloc(sizeof (zfs_oldace_t) * aclp->z_acl_count, KM_SLEEP); i = 0; while (cookie = zfs_acl_next_ace(aclp, cookie, &who, &access_mask, &iflags, &type)) { oldaclp[i].z_flags = iflags; oldaclp[i].z_type = type; oldaclp[i].z_fuid = who; oldaclp[i++].z_access_mask = access_mask; } newaclnode = zfs_acl_node_alloc(aclp->z_acl_count * sizeof (zfs_object_ace_t)); aclp->z_ops = zfs_acl_fuid_ops; VERIFY(zfs_copy_ace_2_fuid(zp->z_zfsvfs, ZTOV(zp)->v_type, aclp, oldaclp, newaclnode->z_acldata, aclp->z_acl_count, &newaclnode->z_size, NULL, cr) == 0); newaclnode->z_ace_count = aclp->z_acl_count; aclp->z_version = ZFS_ACL_VERSION; kmem_free(oldaclp, aclp->z_acl_count * sizeof (zfs_oldace_t)); /* * Release all previous ACL nodes */ zfs_acl_release_nodes(aclp); list_insert_head(&aclp->z_acl, newaclnode); aclp->z_acl_bytes = newaclnode->z_size; aclp->z_acl_count = newaclnode->z_ace_count; } /* * Convert unix access mask to v4 access mask */ static uint32_t zfs_unix_to_v4(uint32_t access_mask) { uint32_t new_mask = 0; if (access_mask & S_IXOTH) new_mask |= ACE_EXECUTE; if (access_mask & S_IWOTH) new_mask |= ACE_WRITE_DATA; if (access_mask & S_IROTH) new_mask |= ACE_READ_DATA; return (new_mask); } static void zfs_set_ace(zfs_acl_t *aclp, void *acep, uint32_t access_mask, uint16_t access_type, uint64_t fuid, uint16_t entry_type) { uint16_t type = entry_type & ACE_TYPE_FLAGS; aclp->z_ops.ace_mask_set(acep, access_mask); aclp->z_ops.ace_type_set(acep, access_type); aclp->z_ops.ace_flags_set(acep, entry_type); if ((type != ACE_OWNER && type != OWNING_GROUP && type != ACE_EVERYONE)) aclp->z_ops.ace_who_set(acep, fuid); } /* * Determine mode of file based on ACL. * Also, create FUIDs for any User/Group ACEs */ uint64_t zfs_mode_compute(uint64_t fmode, zfs_acl_t *aclp, uint64_t *pflags, uint64_t fuid, uint64_t fgid) { int entry_type; mode_t mode; mode_t seen = 0; zfs_ace_hdr_t *acep = NULL; uint64_t who; uint16_t iflags, type; uint32_t access_mask; boolean_t an_exec_denied = B_FALSE; mode = (fmode & (S_IFMT | S_ISUID | S_ISGID | S_ISVTX)); while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, &iflags, &type)) { if (!zfs_acl_valid_ace_type(type, iflags)) continue; entry_type = (iflags & ACE_TYPE_FLAGS); /* * Skip over owner@, group@ or everyone@ inherit only ACEs */ if ((iflags & ACE_INHERIT_ONLY_ACE) && (entry_type == ACE_OWNER || entry_type == ACE_EVERYONE || entry_type == OWNING_GROUP)) continue; if (entry_type == ACE_OWNER || (entry_type == 0 && who == fuid)) { if ((access_mask & ACE_READ_DATA) && (!(seen & S_IRUSR))) { seen |= S_IRUSR; if (type == ALLOW) { mode |= S_IRUSR; } } if ((access_mask & ACE_WRITE_DATA) && (!(seen & S_IWUSR))) { seen |= S_IWUSR; if (type == ALLOW) { mode |= S_IWUSR; } } if ((access_mask & ACE_EXECUTE) && (!(seen & S_IXUSR))) { seen |= S_IXUSR; if (type == ALLOW) { mode |= S_IXUSR; } } } else if (entry_type == OWNING_GROUP || (entry_type == ACE_IDENTIFIER_GROUP && who == fgid)) { if ((access_mask & ACE_READ_DATA) && (!(seen & S_IRGRP))) { seen |= S_IRGRP; if (type == ALLOW) { mode |= S_IRGRP; } } if ((access_mask & ACE_WRITE_DATA) && (!(seen & S_IWGRP))) { seen |= S_IWGRP; if (type == ALLOW) { mode |= S_IWGRP; } } if ((access_mask & ACE_EXECUTE) && (!(seen & S_IXGRP))) { seen |= S_IXGRP; if (type == ALLOW) { mode |= S_IXGRP; } } } else if (entry_type == ACE_EVERYONE) { if ((access_mask & ACE_READ_DATA)) { if (!(seen & S_IRUSR)) { seen |= S_IRUSR; if (type == ALLOW) { mode |= S_IRUSR; } } if (!(seen & S_IRGRP)) { seen |= S_IRGRP; if (type == ALLOW) { mode |= S_IRGRP; } } if (!(seen & S_IROTH)) { seen |= S_IROTH; if (type == ALLOW) { mode |= S_IROTH; } } } if ((access_mask & ACE_WRITE_DATA)) { if (!(seen & S_IWUSR)) { seen |= S_IWUSR; if (type == ALLOW) { mode |= S_IWUSR; } } if (!(seen & S_IWGRP)) { seen |= S_IWGRP; if (type == ALLOW) { mode |= S_IWGRP; } } if (!(seen & S_IWOTH)) { seen |= S_IWOTH; if (type == ALLOW) { mode |= S_IWOTH; } } } if ((access_mask & ACE_EXECUTE)) { if (!(seen & S_IXUSR)) { seen |= S_IXUSR; if (type == ALLOW) { mode |= S_IXUSR; } } if (!(seen & S_IXGRP)) { seen |= S_IXGRP; if (type == ALLOW) { mode |= S_IXGRP; } } if (!(seen & S_IXOTH)) { seen |= S_IXOTH; if (type == ALLOW) { mode |= S_IXOTH; } } } } else { /* * Only care if this IDENTIFIER_GROUP or * USER ACE denies execute access to someone, * mode is not affected */ if ((access_mask & ACE_EXECUTE) && type == DENY) an_exec_denied = B_TRUE; } } /* * Failure to allow is effectively a deny, so execute permission * is denied if it was never mentioned or if we explicitly * weren't allowed it. */ if (!an_exec_denied && ((seen & ALL_MODE_EXECS) != ALL_MODE_EXECS || (mode & ALL_MODE_EXECS) != ALL_MODE_EXECS)) an_exec_denied = B_TRUE; if (an_exec_denied) *pflags &= ~ZFS_NO_EXECS_DENIED; else *pflags |= ZFS_NO_EXECS_DENIED; return (mode); } /* * Read an external acl object. If the intent is to modify, always * create a new acl and leave any cached acl in place. */ static int zfs_acl_node_read(znode_t *zp, zfs_acl_t **aclpp, boolean_t will_modify) { zfs_acl_t *aclp; int aclsize; int acl_count; zfs_acl_node_t *aclnode; zfs_acl_phys_t znode_acl; int version; int error; ASSERT(MUTEX_HELD(&zp->z_acl_lock)); ASSERT_VOP_LOCKED(ZTOV(zp), __func__); if (zp->z_acl_cached && !will_modify) { *aclpp = zp->z_acl_cached; return (0); } version = zfs_znode_acl_version(zp); if ((error = zfs_acl_znode_info(zp, &aclsize, &acl_count, &znode_acl)) != 0) { goto done; } aclp = zfs_acl_alloc(version); aclp->z_acl_count = acl_count; aclp->z_acl_bytes = aclsize; aclnode = zfs_acl_node_alloc(aclsize); aclnode->z_ace_count = aclp->z_acl_count; aclnode->z_size = aclsize; if (!zp->z_is_sa) { if (znode_acl.z_acl_extern_obj) { error = dmu_read(zp->z_zfsvfs->z_os, znode_acl.z_acl_extern_obj, 0, aclnode->z_size, aclnode->z_acldata, DMU_READ_PREFETCH); } else { bcopy(znode_acl.z_ace_data, aclnode->z_acldata, aclnode->z_size); } } else { error = sa_lookup(zp->z_sa_hdl, SA_ZPL_DACL_ACES(zp->z_zfsvfs), aclnode->z_acldata, aclnode->z_size); } if (error != 0) { zfs_acl_free(aclp); zfs_acl_node_free(aclnode); /* convert checksum errors into IO errors */ if (error == ECKSUM) error = SET_ERROR(EIO); goto done; } list_insert_head(&aclp->z_acl, aclnode); *aclpp = aclp; if (!will_modify) zp->z_acl_cached = aclp; done: return (error); } /*ARGSUSED*/ void zfs_acl_data_locator(void **dataptr, uint32_t *length, uint32_t buflen, boolean_t start, void *userdata) { zfs_acl_locator_cb_t *cb = (zfs_acl_locator_cb_t *)userdata; if (start) { cb->cb_acl_node = list_head(&cb->cb_aclp->z_acl); } else { cb->cb_acl_node = list_next(&cb->cb_aclp->z_acl, cb->cb_acl_node); } *dataptr = cb->cb_acl_node->z_acldata; *length = cb->cb_acl_node->z_size; } int zfs_acl_chown_setattr(znode_t *zp) { int error; zfs_acl_t *aclp; ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); ASSERT(MUTEX_HELD(&zp->z_acl_lock)); if ((error = zfs_acl_node_read(zp, &aclp, B_FALSE)) == 0) zp->z_mode = zfs_mode_compute(zp->z_mode, aclp, &zp->z_pflags, zp->z_uid, zp->z_gid); return (error); } /* * common code for setting ACLs. * * This function is called from zfs_mode_update, zfs_perm_init, and zfs_setacl. * zfs_setacl passes a non-NULL inherit pointer (ihp) to indicate that it's * already checked the acl and knows whether to inherit. */ int zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx) { int error; zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_object_type_t otype; zfs_acl_locator_cb_t locate = { 0 }; uint64_t mode; sa_bulk_attr_t bulk[5]; uint64_t ctime[2]; int count = 0; mode = zp->z_mode; mode = zfs_mode_compute(mode, aclp, &zp->z_pflags, zp->z_uid, zp->z_gid); zp->z_mode = mode; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, sizeof (mode)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, sizeof (ctime)); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } /* * Upgrade needed? */ if (!zfsvfs->z_use_fuids) { otype = DMU_OT_OLDACL; } else { if ((aclp->z_version == ZFS_ACL_VERSION_INITIAL) && (zfsvfs->z_version >= ZPL_VERSION_FUID)) zfs_acl_xform(zp, aclp, cr); ASSERT(aclp->z_version >= ZFS_ACL_VERSION_FUID); otype = DMU_OT_ACL; } /* * Arrgh, we have to handle old on disk format * as well as newer (preferred) SA format. */ if (zp->z_is_sa) { /* the easy case, just update the ACL attribute */ locate.cb_aclp = aclp; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, aclp->z_acl_bytes); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &aclp->z_acl_count, sizeof (uint64_t)); } else { /* Painful legacy way */ zfs_acl_node_t *aclnode; uint64_t off = 0; zfs_acl_phys_t acl_phys; uint64_t aoid; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_ZNODE_ACL(zfsvfs), &acl_phys, sizeof (acl_phys))) != 0) return (error); aoid = acl_phys.z_acl_extern_obj; if (aclp->z_acl_bytes > ZFS_ACE_SPACE) { /* * If ACL was previously external and we are now * converting to new ACL format then release old * ACL object and create a new one. */ if (aoid && aclp->z_version != acl_phys.z_acl_version) { error = dmu_object_free(zfsvfs->z_os, aoid, tx); if (error) return (error); aoid = 0; } if (aoid == 0) { aoid = dmu_object_alloc(zfsvfs->z_os, otype, aclp->z_acl_bytes, otype == DMU_OT_ACL ? DMU_OT_SYSACL : DMU_OT_NONE, otype == DMU_OT_ACL ? DN_MAX_BONUSLEN : 0, tx); } else { (void) dmu_object_set_blocksize(zfsvfs->z_os, aoid, aclp->z_acl_bytes, 0, tx); } acl_phys.z_acl_extern_obj = aoid; for (aclnode = list_head(&aclp->z_acl); aclnode; aclnode = list_next(&aclp->z_acl, aclnode)) { if (aclnode->z_ace_count == 0) continue; dmu_write(zfsvfs->z_os, aoid, off, aclnode->z_size, aclnode->z_acldata, tx); off += aclnode->z_size; } } else { void *start = acl_phys.z_ace_data; /* * Migrating back embedded? */ if (acl_phys.z_acl_extern_obj) { error = dmu_object_free(zfsvfs->z_os, acl_phys.z_acl_extern_obj, tx); if (error) return (error); acl_phys.z_acl_extern_obj = 0; } for (aclnode = list_head(&aclp->z_acl); aclnode; aclnode = list_next(&aclp->z_acl, aclnode)) { if (aclnode->z_ace_count == 0) continue; bcopy(aclnode->z_acldata, start, aclnode->z_size); start = (caddr_t)start + aclnode->z_size; } } /* * If Old version then swap count/bytes to match old * layout of znode_acl_phys_t. */ if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) { acl_phys.z_acl_size = aclp->z_acl_count; acl_phys.z_acl_count = aclp->z_acl_bytes; } else { acl_phys.z_acl_size = aclp->z_acl_bytes; acl_phys.z_acl_count = aclp->z_acl_count; } acl_phys.z_acl_version = aclp->z_version; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &acl_phys, sizeof (acl_phys)); } /* * Replace ACL wide bits, but first clear them. */ zp->z_pflags &= ~ZFS_ACL_WIDE_FLAGS; zp->z_pflags |= aclp->z_hints; if (ace_trivial_common(aclp, 0, zfs_ace_walk) == 0) zp->z_pflags |= ZFS_ACL_TRIVIAL; zfs_tstamp_update_setup(zp, STATE_CHANGED, NULL, ctime, B_TRUE); return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx)); } static void zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp) { void *acep = NULL; uint64_t who; int new_count, new_bytes; int ace_size; int entry_type; uint16_t iflags, type; uint32_t access_mask; zfs_acl_node_t *newnode; size_t abstract_size = aclp->z_ops.ace_abstract_size(); void *zacep; boolean_t isdir; trivial_acl_t masks; new_count = new_bytes = 0; isdir = (vtype == VDIR); acl_trivial_access_masks((mode_t)mode, isdir, &masks); newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes); zacep = newnode->z_acldata; if (masks.allow0) { zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; } if (masks.deny1) { zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; } if (masks.deny2) { zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP); zacep = (void *)((uintptr_t)zacep + abstract_size); new_count++; new_bytes += abstract_size; } while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, &iflags, &type)) { uint16_t inherit_flags; entry_type = (iflags & ACE_TYPE_FLAGS); inherit_flags = (iflags & ALL_INHERIT); if ((entry_type == ACE_OWNER || entry_type == ACE_EVERYONE || (entry_type == OWNING_GROUP)) && ((inherit_flags & ACE_INHERIT_ONLY_ACE) == 0)) { continue; } /* * If this ACL has any inheritable ACEs, mark that in * the hints (which are later masked into the pflags) * so create knows to do inheritance. */ if (isdir && (inherit_flags & (ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE))) aclp->z_hints |= ZFS_INHERIT_ACE; if ((type != ALLOW && type != DENY) || (inherit_flags & ACE_INHERIT_ONLY_ACE)) { switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: aclp->z_hints |= ZFS_ACL_OBJ_ACE; break; } } else { /* * Limit permissions to be no greater than * group permissions. * The "aclinherit" and "aclmode" properties * affect policy for create and chmod(2), * respectively. */ if ((type == ALLOW) && trim) access_mask &= masks.group; } zfs_set_ace(aclp, zacep, access_mask, type, who, iflags); ace_size = aclp->z_ops.ace_size(acep); zacep = (void *)((uintptr_t)zacep + ace_size); new_count++; new_bytes += ace_size; } zfs_set_ace(aclp, zacep, masks.owner, 0, -1, ACE_OWNER); zacep = (void *)((uintptr_t)zacep + abstract_size); zfs_set_ace(aclp, zacep, masks.group, 0, -1, OWNING_GROUP); zacep = (void *)((uintptr_t)zacep + abstract_size); zfs_set_ace(aclp, zacep, masks.everyone, 0, -1, ACE_EVERYONE); new_count += 3; new_bytes += abstract_size * 3; zfs_acl_release_nodes(aclp); aclp->z_acl_count = new_count; aclp->z_acl_bytes = new_bytes; newnode->z_ace_count = new_count; newnode->z_size = new_bytes; list_insert_tail(&aclp->z_acl, newnode); } int zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode) { int error = 0; mutex_enter(&zp->z_acl_lock); ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD) *aclp = zfs_acl_alloc(zfs_acl_version_zp(zp)); else error = zfs_acl_node_read(zp, aclp, B_TRUE); if (error == 0) { (*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS; zfs_acl_chmod(ZTOV(zp)->v_type, mode, (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp); } mutex_exit(&zp->z_acl_lock); return (error); } /* * strip off write_owner and write_acl */ static void zfs_restricted_update(zfsvfs_t *zfsvfs, zfs_acl_t *aclp, void *acep) { uint32_t mask = aclp->z_ops.ace_mask_get(acep); if ((zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) && (aclp->z_ops.ace_type_get(acep) == ALLOW)) { mask &= ~RESTRICTED_CLEAR; aclp->z_ops.ace_mask_set(acep, mask); } } /* * Should ACE be inherited? */ static int zfs_ace_can_use(vtype_t vtype, uint16_t acep_flags) { int iflags = (acep_flags & 0xf); if ((vtype == VDIR) && (iflags & ACE_DIRECTORY_INHERIT_ACE)) return (1); else if (iflags & ACE_FILE_INHERIT_ACE) return (!((vtype == VDIR) && (iflags & ACE_NO_PROPAGATE_INHERIT_ACE))); return (0); } /* * inherit inheritable ACEs from parent */ static zfs_acl_t * zfs_acl_inherit(zfsvfs_t *zfsvfs, vtype_t vtype, zfs_acl_t *paclp, uint64_t mode, boolean_t *need_chmod) { void *pacep; void *acep; zfs_acl_node_t *aclnode; zfs_acl_t *aclp = NULL; uint64_t who; uint32_t access_mask; uint16_t iflags, newflags, type; size_t ace_size; void *data1, *data2; size_t data1sz, data2sz; boolean_t vdir = vtype == VDIR; boolean_t vreg = vtype == VREG; boolean_t passthrough, passthrough_x, noallow; passthrough_x = zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH_X; passthrough = passthrough_x || zfsvfs->z_acl_inherit == ZFS_ACL_PASSTHROUGH; noallow = zfsvfs->z_acl_inherit == ZFS_ACL_NOALLOW; *need_chmod = B_TRUE; pacep = NULL; aclp = zfs_acl_alloc(paclp->z_version); if (zfsvfs->z_acl_inherit == ZFS_ACL_DISCARD || vtype == VLNK) return (aclp); while (pacep = zfs_acl_next_ace(paclp, pacep, &who, &access_mask, &iflags, &type)) { /* * don't inherit bogus ACEs */ if (!zfs_acl_valid_ace_type(type, iflags)) continue; if (noallow && type == ALLOW) continue; ace_size = aclp->z_ops.ace_size(pacep); if (!zfs_ace_can_use(vtype, iflags)) continue; /* * If owner@, group@, or everyone@ inheritable * then zfs_acl_chmod() isn't needed. */ if (passthrough && ((iflags & (ACE_OWNER|ACE_EVERYONE)) || ((iflags & OWNING_GROUP) == OWNING_GROUP)) && (vreg || (vdir && (iflags & ACE_DIRECTORY_INHERIT_ACE)))) { *need_chmod = B_FALSE; } if (!vdir && passthrough_x && ((mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) { access_mask &= ~ACE_EXECUTE; } aclnode = zfs_acl_node_alloc(ace_size); list_insert_tail(&aclp->z_acl, aclnode); acep = aclnode->z_acldata; zfs_set_ace(aclp, acep, access_mask, type, who, iflags|ACE_INHERITED_ACE); /* * Copy special opaque data if any */ if ((data1sz = paclp->z_ops.ace_data(pacep, &data1)) != 0) { VERIFY((data2sz = aclp->z_ops.ace_data(acep, &data2)) == data1sz); bcopy(data1, data2, data2sz); } aclp->z_acl_count++; aclnode->z_ace_count++; aclp->z_acl_bytes += aclnode->z_size; newflags = aclp->z_ops.ace_flags_get(acep); if (vdir) aclp->z_hints |= ZFS_INHERIT_ACE; if ((iflags & ACE_NO_PROPAGATE_INHERIT_ACE) || !vdir) { newflags &= ~ALL_INHERIT; aclp->z_ops.ace_flags_set(acep, newflags|ACE_INHERITED_ACE); zfs_restricted_update(zfsvfs, aclp, acep); continue; } ASSERT(vdir); /* * If only FILE_INHERIT is set then turn on * inherit_only */ if ((iflags & (ACE_FILE_INHERIT_ACE | ACE_DIRECTORY_INHERIT_ACE)) == ACE_FILE_INHERIT_ACE) { newflags |= ACE_INHERIT_ONLY_ACE; aclp->z_ops.ace_flags_set(acep, newflags|ACE_INHERITED_ACE); } else { newflags &= ~ACE_INHERIT_ONLY_ACE; aclp->z_ops.ace_flags_set(acep, newflags|ACE_INHERITED_ACE); } } return (aclp); } /* * Create file system object initial permissions * including inheritable ACEs. */ int zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr, vsecattr_t *vsecp, zfs_acl_ids_t *acl_ids) { int error; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; zfs_acl_t *paclp; gid_t gid; boolean_t need_chmod = B_TRUE; boolean_t inherited = B_FALSE; - ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); + if ((flag & IS_ROOT_NODE) == 0) + ASSERT_VOP_ELOCKED(ZTOV(dzp), __func__); + else + ASSERT(dzp->z_vnode == NULL); bzero(acl_ids, sizeof (zfs_acl_ids_t)); acl_ids->z_mode = MAKEIMODE(vap->va_type, vap->va_mode); if (vsecp) if ((error = zfs_vsec_2_aclp(zfsvfs, vap->va_type, vsecp, cr, &acl_ids->z_fuidp, &acl_ids->z_aclp)) != 0) return (error); /* * Determine uid and gid. */ if ((flag & IS_ROOT_NODE) || zfsvfs->z_replay || ((flag & IS_XATTR) && (vap->va_type == VDIR))) { acl_ids->z_fuid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_uid, cr, ZFS_OWNER, &acl_ids->z_fuidp); acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, cr, ZFS_GROUP, &acl_ids->z_fuidp); gid = vap->va_gid; } else { acl_ids->z_fuid = zfs_fuid_create_cred(zfsvfs, ZFS_OWNER, cr, &acl_ids->z_fuidp); acl_ids->z_fgid = 0; if (vap->va_mask & AT_GID) { acl_ids->z_fgid = zfs_fuid_create(zfsvfs, (uint64_t)vap->va_gid, cr, ZFS_GROUP, &acl_ids->z_fuidp); gid = vap->va_gid; if (acl_ids->z_fgid != dzp->z_gid && !groupmember(vap->va_gid, cr) && secpolicy_vnode_create_gid(cr) != 0) acl_ids->z_fgid = 0; } if (acl_ids->z_fgid == 0) { if (dzp->z_mode & S_ISGID) { char *domain; uint32_t rid; acl_ids->z_fgid = dzp->z_gid; gid = zfs_fuid_map_id(zfsvfs, acl_ids->z_fgid, cr, ZFS_GROUP); if (zfsvfs->z_use_fuids && IS_EPHEMERAL(acl_ids->z_fgid)) { domain = zfs_fuid_idx_domain( &zfsvfs->z_fuid_idx, FUID_INDEX(acl_ids->z_fgid)); rid = FUID_RID(acl_ids->z_fgid); zfs_fuid_node_add(&acl_ids->z_fuidp, domain, rid, FUID_INDEX(acl_ids->z_fgid), acl_ids->z_fgid, ZFS_GROUP); } } else { acl_ids->z_fgid = zfs_fuid_create_cred(zfsvfs, ZFS_GROUP, cr, &acl_ids->z_fuidp); #ifdef __FreeBSD_kernel__ gid = acl_ids->z_fgid = dzp->z_gid; #else gid = crgetgid(cr); #endif } } } /* * If we're creating a directory, and the parent directory has the * set-GID bit set, set in on the new directory. * Otherwise, if the user is neither privileged nor a member of the * file's new group, clear the file's set-GID bit. */ if (!(flag & IS_ROOT_NODE) && (dzp->z_mode & S_ISGID) && (vap->va_type == VDIR)) { acl_ids->z_mode |= S_ISGID; } else { if ((acl_ids->z_mode & S_ISGID) && secpolicy_vnode_setids_setgids(ZTOV(dzp), cr, gid) != 0) acl_ids->z_mode &= ~S_ISGID; } if (acl_ids->z_aclp == NULL) { mutex_enter(&dzp->z_acl_lock); if (!(flag & IS_ROOT_NODE) && (dzp->z_pflags & ZFS_INHERIT_ACE) && !(dzp->z_pflags & ZFS_XATTR)) { VERIFY(0 == zfs_acl_node_read(dzp, &paclp, B_FALSE)); acl_ids->z_aclp = zfs_acl_inherit(zfsvfs, vap->va_type, paclp, acl_ids->z_mode, &need_chmod); inherited = B_TRUE; } else { acl_ids->z_aclp = zfs_acl_alloc(zfs_acl_version_zp(dzp)); acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL; } mutex_exit(&dzp->z_acl_lock); if (need_chmod) { acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ? ZFS_ACL_AUTO_INHERIT : 0; zfs_acl_chmod(vap->va_type, acl_ids->z_mode, (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED), acl_ids->z_aclp); } } if (inherited || vsecp) { acl_ids->z_mode = zfs_mode_compute(acl_ids->z_mode, acl_ids->z_aclp, &acl_ids->z_aclp->z_hints, acl_ids->z_fuid, acl_ids->z_fgid); if (ace_trivial_common(acl_ids->z_aclp, 0, zfs_ace_walk) == 0) acl_ids->z_aclp->z_hints |= ZFS_ACL_TRIVIAL; } return (0); } /* * Free ACL and fuid_infop, but not the acl_ids structure */ void zfs_acl_ids_free(zfs_acl_ids_t *acl_ids) { if (acl_ids->z_aclp) zfs_acl_free(acl_ids->z_aclp); if (acl_ids->z_fuidp) zfs_fuid_info_free(acl_ids->z_fuidp); acl_ids->z_aclp = NULL; acl_ids->z_fuidp = NULL; } boolean_t zfs_acl_ids_overquota(zfsvfs_t *zfsvfs, zfs_acl_ids_t *acl_ids) { return (zfs_fuid_overquota(zfsvfs, B_FALSE, acl_ids->z_fuid) || zfs_fuid_overquota(zfsvfs, B_TRUE, acl_ids->z_fgid)); } /* * Retrieve a file's ACL */ int zfs_getacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) { zfs_acl_t *aclp; ulong_t mask; int error; int count = 0; int largeace = 0; mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT | VSA_ACE_ACLFLAGS | VSA_ACE_ALLTYPES); if (mask == 0) return (SET_ERROR(ENOSYS)); if (error = zfs_zaccess(zp, ACE_READ_ACL, 0, skipaclchk, cr)) return (error); mutex_enter(&zp->z_acl_lock); ASSERT_VOP_LOCKED(ZTOV(zp), __func__); error = zfs_acl_node_read(zp, &aclp, B_FALSE); if (error != 0) { mutex_exit(&zp->z_acl_lock); return (error); } /* * Scan ACL to determine number of ACEs */ if ((zp->z_pflags & ZFS_ACL_OBJ_ACE) && !(mask & VSA_ACE_ALLTYPES)) { void *zacep = NULL; uint64_t who; uint32_t access_mask; uint16_t type, iflags; while (zacep = zfs_acl_next_ace(aclp, zacep, &who, &access_mask, &iflags, &type)) { switch (type) { case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE: case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE: case ACE_SYSTEM_AUDIT_OBJECT_ACE_TYPE: case ACE_SYSTEM_ALARM_OBJECT_ACE_TYPE: largeace++; continue; default: count++; } } vsecp->vsa_aclcnt = count; } else count = (int)aclp->z_acl_count; if (mask & VSA_ACECNT) { vsecp->vsa_aclcnt = count; } if (mask & VSA_ACE) { size_t aclsz; aclsz = count * sizeof (ace_t) + sizeof (ace_object_t) * largeace; vsecp->vsa_aclentp = kmem_alloc(aclsz, KM_SLEEP); vsecp->vsa_aclentsz = aclsz; if (aclp->z_version == ZFS_ACL_VERSION_FUID) zfs_copy_fuid_2_ace(zp->z_zfsvfs, aclp, cr, vsecp->vsa_aclentp, !(mask & VSA_ACE_ALLTYPES)); else { zfs_acl_node_t *aclnode; void *start = vsecp->vsa_aclentp; for (aclnode = list_head(&aclp->z_acl); aclnode; aclnode = list_next(&aclp->z_acl, aclnode)) { bcopy(aclnode->z_acldata, start, aclnode->z_size); start = (caddr_t)start + aclnode->z_size; } ASSERT((caddr_t)start - (caddr_t)vsecp->vsa_aclentp == aclp->z_acl_bytes); } } if (mask & VSA_ACE_ACLFLAGS) { vsecp->vsa_aclflags = 0; if (zp->z_pflags & ZFS_ACL_DEFAULTED) vsecp->vsa_aclflags |= ACL_DEFAULTED; if (zp->z_pflags & ZFS_ACL_PROTECTED) vsecp->vsa_aclflags |= ACL_PROTECTED; if (zp->z_pflags & ZFS_ACL_AUTO_INHERIT) vsecp->vsa_aclflags |= ACL_AUTO_INHERIT; } mutex_exit(&zp->z_acl_lock); return (0); } int zfs_vsec_2_aclp(zfsvfs_t *zfsvfs, vtype_t obj_type, vsecattr_t *vsecp, cred_t *cr, zfs_fuid_info_t **fuidp, zfs_acl_t **zaclp) { zfs_acl_t *aclp; zfs_acl_node_t *aclnode; int aclcnt = vsecp->vsa_aclcnt; int error; if (vsecp->vsa_aclcnt > MAX_ACL_ENTRIES || vsecp->vsa_aclcnt <= 0) return (SET_ERROR(EINVAL)); aclp = zfs_acl_alloc(zfs_acl_version(zfsvfs->z_version)); aclp->z_hints = 0; aclnode = zfs_acl_node_alloc(aclcnt * sizeof (zfs_object_ace_t)); if (aclp->z_version == ZFS_ACL_VERSION_INITIAL) { if ((error = zfs_copy_ace_2_oldace(obj_type, aclp, (ace_t *)vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt, &aclnode->z_size)) != 0) { zfs_acl_free(aclp); zfs_acl_node_free(aclnode); return (error); } } else { if ((error = zfs_copy_ace_2_fuid(zfsvfs, obj_type, aclp, vsecp->vsa_aclentp, aclnode->z_acldata, aclcnt, &aclnode->z_size, fuidp, cr)) != 0) { zfs_acl_free(aclp); zfs_acl_node_free(aclnode); return (error); } } aclp->z_acl_bytes = aclnode->z_size; aclnode->z_ace_count = aclcnt; aclp->z_acl_count = aclcnt; list_insert_head(&aclp->z_acl, aclnode); /* * If flags are being set then add them to z_hints */ if (vsecp->vsa_mask & VSA_ACE_ACLFLAGS) { if (vsecp->vsa_aclflags & ACL_PROTECTED) aclp->z_hints |= ZFS_ACL_PROTECTED; if (vsecp->vsa_aclflags & ACL_DEFAULTED) aclp->z_hints |= ZFS_ACL_DEFAULTED; if (vsecp->vsa_aclflags & ACL_AUTO_INHERIT) aclp->z_hints |= ZFS_ACL_AUTO_INHERIT; } *zaclp = aclp; return (0); } /* * Set a file's ACL */ int zfs_setacl(znode_t *zp, vsecattr_t *vsecp, boolean_t skipaclchk, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog = zfsvfs->z_log; ulong_t mask = vsecp->vsa_mask & (VSA_ACE | VSA_ACECNT); dmu_tx_t *tx; int error; zfs_acl_t *aclp; zfs_fuid_info_t *fuidp = NULL; boolean_t fuid_dirtied; uint64_t acl_obj; ASSERT_VOP_ELOCKED(ZTOV(zp), __func__); if (mask == 0) return (SET_ERROR(ENOSYS)); if (zp->z_pflags & ZFS_IMMUTABLE) return (SET_ERROR(EPERM)); if (error = zfs_zaccess(zp, ACE_WRITE_ACL, 0, skipaclchk, cr)) return (error); error = zfs_vsec_2_aclp(zfsvfs, ZTOV(zp)->v_type, vsecp, cr, &fuidp, &aclp); if (error) return (error); /* * If ACL wide flags aren't being set then preserve any * existing flags. */ if (!(vsecp->vsa_mask & VSA_ACE_ACLFLAGS)) { aclp->z_hints |= (zp->z_pflags & V4_ACL_WIDE_FLAGS); } top: mutex_enter(&zp->z_acl_lock); tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_TRUE); fuid_dirtied = zfsvfs->z_fuid_dirty; if (fuid_dirtied) zfs_fuid_txhold(zfsvfs, tx); /* * If old version and ACL won't fit in bonus and we aren't * upgrading then take out necessary DMU holds */ if ((acl_obj = zfs_external_acl(zp)) != 0) { if (zfsvfs->z_version >= ZPL_VERSION_FUID && zfs_znode_acl_version(zp) <= ZFS_ACL_VERSION_INITIAL) { dmu_tx_hold_free(tx, acl_obj, 0, DMU_OBJECT_END); dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } else { dmu_tx_hold_write(tx, acl_obj, 0, aclp->z_acl_bytes); } } else if (!zp->z_is_sa && aclp->z_acl_bytes > ZFS_ACE_SPACE) { dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0, aclp->z_acl_bytes); } zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_NOWAIT); if (error) { mutex_exit(&zp->z_acl_lock); if (error == ERESTART) { dmu_tx_wait(tx); dmu_tx_abort(tx); goto top; } dmu_tx_abort(tx); zfs_acl_free(aclp); return (error); } error = zfs_aclset_common(zp, aclp, cr, tx); ASSERT(error == 0); ASSERT(zp->z_acl_cached == NULL); zp->z_acl_cached = aclp; if (fuid_dirtied) zfs_fuid_sync(zfsvfs, tx); zfs_log_acl(zilog, tx, zp, vsecp, fuidp); if (fuidp) zfs_fuid_info_free(fuidp); dmu_tx_commit(tx); mutex_exit(&zp->z_acl_lock); return (error); } /* * Check accesses of interest (AoI) against attributes of the dataset * such as read-only. Returns zero if no AoI conflict with dataset * attributes, otherwise an appropriate errno is returned. */ static int zfs_zaccess_dataset_check(znode_t *zp, uint32_t v4_mode) { if ((v4_mode & WRITE_MASK) && (zp->z_zfsvfs->z_vfs->vfs_flag & VFS_RDONLY) && (!IS_DEVVP(ZTOV(zp)) || (IS_DEVVP(ZTOV(zp)) && (v4_mode & WRITE_MASK_ATTRS)))) { return (SET_ERROR(EROFS)); } /* * Only check for READONLY on non-directories. */ if ((v4_mode & WRITE_MASK_DATA) && (((ZTOV(zp)->v_type != VDIR) && (zp->z_pflags & (ZFS_READONLY | ZFS_IMMUTABLE))) || (ZTOV(zp)->v_type == VDIR && (zp->z_pflags & ZFS_IMMUTABLE)))) { return (SET_ERROR(EPERM)); } #ifdef illumos if ((v4_mode & (ACE_DELETE | ACE_DELETE_CHILD)) && (zp->z_pflags & ZFS_NOUNLINK)) { return (SET_ERROR(EPERM)); } #else /* * In FreeBSD we allow to modify directory's content is ZFS_NOUNLINK * (sunlnk) is set. We just don't allow directory removal, which is * handled in zfs_zaccess_delete(). */ if ((v4_mode & ACE_DELETE) && (zp->z_pflags & ZFS_NOUNLINK)) { return (EPERM); } #endif if (((v4_mode & (ACE_READ_DATA|ACE_EXECUTE)) && (zp->z_pflags & ZFS_AV_QUARANTINED))) { return (SET_ERROR(EACCES)); } return (0); } /* * The primary usage of this function is to loop through all of the * ACEs in the znode, determining what accesses of interest (AoI) to * the caller are allowed or denied. The AoI are expressed as bits in * the working_mode parameter. As each ACE is processed, bits covered * by that ACE are removed from the working_mode. This removal * facilitates two things. The first is that when the working mode is * empty (= 0), we know we've looked at all the AoI. The second is * that the ACE interpretation rules don't allow a later ACE to undo * something granted or denied by an earlier ACE. Removing the * discovered access or denial enforces this rule. At the end of * processing the ACEs, all AoI that were found to be denied are * placed into the working_mode, giving the caller a mask of denied * accesses. Returns: * 0 if all AoI granted * EACCESS if the denied mask is non-zero * other error if abnormal failure (e.g., IO error) * * A secondary usage of the function is to determine if any of the * AoI are granted. If an ACE grants any access in * the working_mode, we immediately short circuit out of the function. * This mode is chosen by setting anyaccess to B_TRUE. The * working_mode is not a denied access mask upon exit if the function * is used in this manner. */ static int zfs_zaccess_aces_check(znode_t *zp, uint32_t *working_mode, boolean_t anyaccess, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; zfs_acl_t *aclp; int error; uid_t uid = crgetuid(cr); uint64_t who; uint16_t type, iflags; uint16_t entry_type; uint32_t access_mask; uint32_t deny_mask = 0; zfs_ace_hdr_t *acep = NULL; boolean_t checkit; uid_t gowner; uid_t fowner; zfs_fuid_map_ids(zp, cr, &fowner, &gowner); mutex_enter(&zp->z_acl_lock); ASSERT_VOP_LOCKED(ZTOV(zp), __func__); error = zfs_acl_node_read(zp, &aclp, B_FALSE); if (error != 0) { mutex_exit(&zp->z_acl_lock); return (error); } ASSERT(zp->z_acl_cached); while (acep = zfs_acl_next_ace(aclp, acep, &who, &access_mask, &iflags, &type)) { uint32_t mask_matched; if (!zfs_acl_valid_ace_type(type, iflags)) continue; if (ZTOV(zp)->v_type == VDIR && (iflags & ACE_INHERIT_ONLY_ACE)) continue; /* Skip ACE if it does not affect any AoI */ mask_matched = (access_mask & *working_mode); if (!mask_matched) continue; entry_type = (iflags & ACE_TYPE_FLAGS); checkit = B_FALSE; switch (entry_type) { case ACE_OWNER: if (uid == fowner) checkit = B_TRUE; break; case OWNING_GROUP: who = gowner; /*FALLTHROUGH*/ case ACE_IDENTIFIER_GROUP: checkit = zfs_groupmember(zfsvfs, who, cr); break; case ACE_EVERYONE: checkit = B_TRUE; break; /* USER Entry */ default: if (entry_type == 0) { uid_t newid; newid = zfs_fuid_map_id(zfsvfs, who, cr, ZFS_ACE_USER); if (newid != IDMAP_WK_CREATOR_OWNER_UID && uid == newid) checkit = B_TRUE; break; } else { mutex_exit(&zp->z_acl_lock); return (SET_ERROR(EIO)); } } if (checkit) { if (type == DENY) { DTRACE_PROBE3(zfs__ace__denies, znode_t *, zp, zfs_ace_hdr_t *, acep, uint32_t, mask_matched); deny_mask |= mask_matched; } else { DTRACE_PROBE3(zfs__ace__allows, znode_t *, zp, zfs_ace_hdr_t *, acep, uint32_t, mask_matched); if (anyaccess) { mutex_exit(&zp->z_acl_lock); return (0); } } *working_mode &= ~mask_matched; } /* Are we done? */ if (*working_mode == 0) break; } mutex_exit(&zp->z_acl_lock); /* Put the found 'denies' back on the working mode */ if (deny_mask) { *working_mode |= deny_mask; return (SET_ERROR(EACCES)); } else if (*working_mode) { return (-1); } return (0); } /* * Return true if any access whatsoever granted, we don't actually * care what access is granted. */ boolean_t zfs_has_access(znode_t *zp, cred_t *cr) { uint32_t have = ACE_ALL_PERMS; if (zfs_zaccess_aces_check(zp, &have, B_TRUE, cr) != 0) { uid_t owner; owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); return (secpolicy_vnode_any_access(cr, ZTOV(zp), owner) == 0); } return (B_TRUE); } static int zfs_zaccess_common(znode_t *zp, uint32_t v4_mode, uint32_t *working_mode, boolean_t *check_privs, boolean_t skipaclchk, cred_t *cr) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; int err; *working_mode = v4_mode; *check_privs = B_TRUE; /* * Short circuit empty requests */ if (v4_mode == 0 || zfsvfs->z_replay) { *working_mode = 0; return (0); } if ((err = zfs_zaccess_dataset_check(zp, v4_mode)) != 0) { *check_privs = B_FALSE; return (err); } /* * The caller requested that the ACL check be skipped. This * would only happen if the caller checked VOP_ACCESS() with a * 32 bit ACE mask and already had the appropriate permissions. */ if (skipaclchk) { *working_mode = 0; return (0); } return (zfs_zaccess_aces_check(zp, working_mode, B_FALSE, cr)); } static int zfs_zaccess_append(znode_t *zp, uint32_t *working_mode, boolean_t *check_privs, cred_t *cr) { if (*working_mode != ACE_WRITE_DATA) return (SET_ERROR(EACCES)); return (zfs_zaccess_common(zp, ACE_APPEND_DATA, working_mode, check_privs, B_FALSE, cr)); } int zfs_fastaccesschk_execute(znode_t *zdp, cred_t *cr) { boolean_t owner = B_FALSE; boolean_t groupmbr = B_FALSE; boolean_t is_attr; uid_t uid = crgetuid(cr); int error; if (zdp->z_pflags & ZFS_AV_QUARANTINED) return (SET_ERROR(EACCES)); is_attr = ((zdp->z_pflags & ZFS_XATTR) && (ZTOV(zdp)->v_type == VDIR)); if (is_attr) goto slow; mutex_enter(&zdp->z_acl_lock); if (zdp->z_pflags & ZFS_NO_EXECS_DENIED) { mutex_exit(&zdp->z_acl_lock); return (0); } if (FUID_INDEX(zdp->z_uid) != 0 || FUID_INDEX(zdp->z_gid) != 0) { mutex_exit(&zdp->z_acl_lock); goto slow; } if (uid == zdp->z_uid) { owner = B_TRUE; if (zdp->z_mode & S_IXUSR) { mutex_exit(&zdp->z_acl_lock); return (0); } else { mutex_exit(&zdp->z_acl_lock); goto slow; } } if (groupmember(zdp->z_gid, cr)) { groupmbr = B_TRUE; if (zdp->z_mode & S_IXGRP) { mutex_exit(&zdp->z_acl_lock); return (0); } else { mutex_exit(&zdp->z_acl_lock); goto slow; } } if (!owner && !groupmbr) { if (zdp->z_mode & S_IXOTH) { mutex_exit(&zdp->z_acl_lock); return (0); } } mutex_exit(&zdp->z_acl_lock); slow: DTRACE_PROBE(zfs__fastpath__execute__access__miss); ZFS_ENTER(zdp->z_zfsvfs); error = zfs_zaccess(zdp, ACE_EXECUTE, 0, B_FALSE, cr); ZFS_EXIT(zdp->z_zfsvfs); return (error); } /* * Determine whether Access should be granted/denied. * * The least priv subsytem is always consulted as a basic privilege * can define any form of access. */ int zfs_zaccess(znode_t *zp, int mode, int flags, boolean_t skipaclchk, cred_t *cr) { uint32_t working_mode; int error; int is_attr; boolean_t check_privs; znode_t *xzp; znode_t *check_zp = zp; mode_t needed_bits; uid_t owner; is_attr = ((zp->z_pflags & ZFS_XATTR) && (ZTOV(zp)->v_type == VDIR)); #ifdef __FreeBSD_kernel__ /* * In FreeBSD, we don't care about permissions of individual ADS. * Note that not checking them is not just an optimization - without * this shortcut, EA operations may bogusly fail with EACCES. */ if (zp->z_pflags & ZFS_XATTR) return (0); #else /* * If attribute then validate against base file */ if (is_attr) { uint64_t parent; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_PARENT(zp->z_zfsvfs), &parent, sizeof (parent))) != 0) return (error); if ((error = zfs_zget(zp->z_zfsvfs, parent, &xzp)) != 0) { return (error); } check_zp = xzp; /* * fixup mode to map to xattr perms */ if (mode & (ACE_WRITE_DATA|ACE_APPEND_DATA)) { mode &= ~(ACE_WRITE_DATA|ACE_APPEND_DATA); mode |= ACE_WRITE_NAMED_ATTRS; } if (mode & (ACE_READ_DATA|ACE_EXECUTE)) { mode &= ~(ACE_READ_DATA|ACE_EXECUTE); mode |= ACE_READ_NAMED_ATTRS; } } #endif owner = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER); /* * Map the bits required to the standard vnode flags VREAD|VWRITE|VEXEC * in needed_bits. Map the bits mapped by working_mode (currently * missing) in missing_bits. * Call secpolicy_vnode_access2() with (needed_bits & ~checkmode), * needed_bits. */ needed_bits = 0; working_mode = mode; if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES)) && owner == crgetuid(cr)) working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES); if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) needed_bits |= VREAD; if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) needed_bits |= VWRITE; if (working_mode & ACE_EXECUTE) needed_bits |= VEXEC; if ((error = zfs_zaccess_common(check_zp, mode, &working_mode, &check_privs, skipaclchk, cr)) == 0) { if (is_attr) VN_RELE(ZTOV(xzp)); return (secpolicy_vnode_access2(cr, ZTOV(zp), owner, needed_bits, needed_bits)); } if (error && !check_privs) { if (is_attr) VN_RELE(ZTOV(xzp)); return (error); } if (error && (flags & V_APPEND)) { error = zfs_zaccess_append(zp, &working_mode, &check_privs, cr); } if (error && check_privs) { mode_t checkmode = 0; /* * First check for implicit owner permission on * read_acl/read_attributes */ error = 0; ASSERT(working_mode != 0); if ((working_mode & (ACE_READ_ACL|ACE_READ_ATTRIBUTES) && owner == crgetuid(cr))) working_mode &= ~(ACE_READ_ACL|ACE_READ_ATTRIBUTES); if (working_mode & (ACE_READ_DATA|ACE_READ_NAMED_ATTRS| ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_SYNCHRONIZE)) checkmode |= VREAD; if (working_mode & (ACE_WRITE_DATA|ACE_WRITE_NAMED_ATTRS| ACE_APPEND_DATA|ACE_WRITE_ATTRIBUTES|ACE_SYNCHRONIZE)) checkmode |= VWRITE; if (working_mode & ACE_EXECUTE) checkmode |= VEXEC; error = secpolicy_vnode_access2(cr, ZTOV(check_zp), owner, needed_bits & ~checkmode, needed_bits); if (error == 0 && (working_mode & ACE_WRITE_OWNER)) error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner); if (error == 0 && (working_mode & ACE_WRITE_ACL)) error = secpolicy_vnode_setdac(ZTOV(check_zp), cr, owner); if (error == 0 && (working_mode & (ACE_DELETE|ACE_DELETE_CHILD))) error = secpolicy_vnode_remove(ZTOV(check_zp), cr); if (error == 0 && (working_mode & ACE_SYNCHRONIZE)) { error = secpolicy_vnode_chown(ZTOV(check_zp), cr, owner); } if (error == 0) { /* * See if any bits other than those already checked * for are still present. If so then return EACCES */ if (working_mode & ~(ZFS_CHECKED_MASKS)) { error = SET_ERROR(EACCES); } } } else if (error == 0) { error = secpolicy_vnode_access2(cr, ZTOV(zp), owner, needed_bits, needed_bits); } if (is_attr) VN_RELE(ZTOV(xzp)); return (error); } /* * Translate traditional unix VREAD/VWRITE/VEXEC mode into * native ACL format and call zfs_zaccess() */ int zfs_zaccess_rwx(znode_t *zp, mode_t mode, int flags, cred_t *cr) { return (zfs_zaccess(zp, zfs_unix_to_v4(mode >> 6), flags, B_FALSE, cr)); } /* * Access function for secpolicy_vnode_setattr */ int zfs_zaccess_unix(znode_t *zp, mode_t mode, cred_t *cr) { int v4_mode = zfs_unix_to_v4(mode >> 6); return (zfs_zaccess(zp, v4_mode, 0, B_FALSE, cr)); } static int zfs_delete_final_check(znode_t *zp, znode_t *dzp, mode_t available_perms, cred_t *cr) { int error; uid_t downer; downer = zfs_fuid_map_id(dzp->z_zfsvfs, dzp->z_uid, cr, ZFS_OWNER); error = secpolicy_vnode_access2(cr, ZTOV(dzp), downer, available_perms, VWRITE|VEXEC); if (error == 0) error = zfs_sticky_remove_access(dzp, zp, cr); return (error); } /* * Determine whether Access should be granted/deny, without * consulting least priv subsystem. * * The following chart is the recommended NFSv4 enforcement for * ability to delete an object. * * ------------------------------------------------------- * | Parent Dir | Target Object Permissions | * | permissions | | * ------------------------------------------------------- * | | ACL Allows | ACL Denies| Delete | * | | Delete | Delete | unspecified| * ------------------------------------------------------- * | ACL Allows | Permit | Permit | Permit | * | DELETE_CHILD | | * ------------------------------------------------------- * | ACL Denies | Permit | Deny | Deny | * | DELETE_CHILD | | | | * ------------------------------------------------------- * | ACL specifies | | | | * | only allow | Permit | Permit | Permit | * | write and | | | | * | execute | | | | * ------------------------------------------------------- * | ACL denies | | | | * | write and | Permit | Deny | Deny | * | execute | | | | * ------------------------------------------------------- * ^ * | * No search privilege, can't even look up file? * */ int zfs_zaccess_delete(znode_t *dzp, znode_t *zp, cred_t *cr) { uint32_t dzp_working_mode = 0; uint32_t zp_working_mode = 0; int dzp_error, zp_error; mode_t available_perms; boolean_t dzpcheck_privs = B_TRUE; boolean_t zpcheck_privs = B_TRUE; /* * We want specific DELETE permissions to * take precedence over WRITE/EXECUTE. We don't * want an ACL such as this to mess us up. * user:joe:write_data:deny,user:joe:delete:allow * * However, deny permissions may ultimately be overridden * by secpolicy_vnode_access(). * * We will ask for all of the necessary permissions and then * look at the working modes from the directory and target object * to determine what was found. */ if (zp->z_pflags & (ZFS_IMMUTABLE | ZFS_NOUNLINK)) return (SET_ERROR(EPERM)); /* * First row * If the directory permissions allow the delete, we are done. */ if ((dzp_error = zfs_zaccess_common(dzp, ACE_DELETE_CHILD, &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr)) == 0) return (0); /* * If target object has delete permission then we are done */ if ((zp_error = zfs_zaccess_common(zp, ACE_DELETE, &zp_working_mode, &zpcheck_privs, B_FALSE, cr)) == 0) return (0); ASSERT(dzp_error && zp_error); if (!dzpcheck_privs) return (dzp_error); if (!zpcheck_privs) return (zp_error); /* * Second row * * If directory returns EACCES then delete_child was denied * due to deny delete_child. In this case send the request through * secpolicy_vnode_remove(). We don't use zfs_delete_final_check() * since that *could* allow the delete based on write/execute permission * and we want delete permissions to override write/execute. */ if (dzp_error == EACCES) return (secpolicy_vnode_remove(ZTOV(dzp), cr)); /* XXXPJD: s/dzp/zp/ ? */ /* * Third Row * only need to see if we have write/execute on directory. */ dzp_error = zfs_zaccess_common(dzp, ACE_EXECUTE|ACE_WRITE_DATA, &dzp_working_mode, &dzpcheck_privs, B_FALSE, cr); if (dzp_error != 0 && !dzpcheck_privs) return (dzp_error); /* * Fourth row */ available_perms = (dzp_working_mode & ACE_WRITE_DATA) ? 0 : VWRITE; available_perms |= (dzp_working_mode & ACE_EXECUTE) ? 0 : VEXEC; return (zfs_delete_final_check(zp, dzp, available_perms, cr)); } int zfs_zaccess_rename(znode_t *sdzp, znode_t *szp, znode_t *tdzp, znode_t *tzp, cred_t *cr) { int add_perm; int error; if (szp->z_pflags & ZFS_AV_QUARANTINED) return (SET_ERROR(EACCES)); add_perm = (ZTOV(szp)->v_type == VDIR) ? ACE_ADD_SUBDIRECTORY : ACE_ADD_FILE; /* * Rename permissions are combination of delete permission + * add file/subdir permission. * * BSD operating systems also require write permission * on the directory being moved from one parent directory * to another. */ if (ZTOV(szp)->v_type == VDIR && ZTOV(sdzp) != ZTOV(tdzp)) { if (error = zfs_zaccess(szp, ACE_WRITE_DATA, 0, B_FALSE, cr)) return (error); } /* * first make sure we do the delete portion. * * If that succeeds then check for add_file/add_subdir permissions */ if (error = zfs_zaccess_delete(sdzp, szp, cr)) return (error); /* * If we have a tzp, see if we can delete it? */ if (tzp) { if (error = zfs_zaccess_delete(tdzp, tzp, cr)) return (error); } /* * Now check for add permissions */ error = zfs_zaccess(tdzp, add_perm, 0, B_FALSE, cr); return (error); } Index: stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c =================================================================== --- stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c (revision 324004) +++ stable/10/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_znode.c (revision 324005) @@ -1,2218 +1,2217 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2014 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ /* Portions Copyright 2007 Jeremy Teo */ /* Portions Copyright 2011 Martin Matuska */ #ifdef _KERNEL #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #endif /* _KERNEL */ #include #include #include #include #include #include #include #include #include #include #include "zfs_prop.h" #include "zfs_comutil.h" /* Used by fstat(1). */ SYSCTL_INT(_debug_sizeof, OID_AUTO, znode, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(znode_t), "sizeof(znode_t)"); /* * Define ZNODE_STATS to turn on statistic gathering. By default, it is only * turned on when DEBUG is also defined. */ #ifdef DEBUG #define ZNODE_STATS #endif /* DEBUG */ #ifdef ZNODE_STATS #define ZNODE_STAT_ADD(stat) ((stat)++) #else #define ZNODE_STAT_ADD(stat) /* nothing */ #endif /* ZNODE_STATS */ /* * Functions needed for userland (ie: libzpool) are not put under * #ifdef_KERNEL; the rest of the functions have dependencies * (such as VFS logic) that will not compile easily in userland. */ #ifdef _KERNEL /* * Needed to close a small window in zfs_znode_move() that allows the zfsvfs to * be freed before it can be safely accessed. */ krwlock_t zfsvfs_lock; static kmem_cache_t *znode_cache = NULL; /*ARGSUSED*/ static void znode_evict_error(dmu_buf_t *dbuf, void *user_ptr) { /* * We should never drop all dbuf refs without first clearing * the eviction callback. */ panic("evicting znode %p\n", user_ptr); } extern struct vop_vector zfs_vnodeops; extern struct vop_vector zfs_fifoops; extern struct vop_vector zfs_shareops; static int zfs_znode_cache_constructor(void *buf, void *arg, int kmflags) { znode_t *zp = buf; POINTER_INVALIDATE(&zp->z_zfsvfs); list_link_init(&zp->z_link_node); mutex_init(&zp->z_acl_lock, NULL, MUTEX_DEFAULT, NULL); mutex_init(&zp->z_range_lock, NULL, MUTEX_DEFAULT, NULL); avl_create(&zp->z_range_avl, zfs_range_compare, sizeof (rl_t), offsetof(rl_t, r_node)); zp->z_acl_cached = NULL; zp->z_vnode = NULL; zp->z_moved = 0; return (0); } /*ARGSUSED*/ static void zfs_znode_cache_destructor(void *buf, void *arg) { znode_t *zp = buf; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); - ASSERT(ZTOV(zp) == NULL); - vn_free(ZTOV(zp)); + ASSERT3P(zp->z_vnode, ==, NULL); ASSERT(!list_link_active(&zp->z_link_node)); mutex_destroy(&zp->z_acl_lock); avl_destroy(&zp->z_range_avl); mutex_destroy(&zp->z_range_lock); ASSERT(zp->z_acl_cached == NULL); } #ifdef ZNODE_STATS static struct { uint64_t zms_zfsvfs_invalid; uint64_t zms_zfsvfs_recheck1; uint64_t zms_zfsvfs_unmounted; uint64_t zms_zfsvfs_recheck2; uint64_t zms_obj_held; uint64_t zms_vnode_locked; uint64_t zms_not_only_dnlc; } znode_move_stats; #endif /* ZNODE_STATS */ #ifdef illumos static void zfs_znode_move_impl(znode_t *ozp, znode_t *nzp) { vnode_t *vp; /* Copy fields. */ nzp->z_zfsvfs = ozp->z_zfsvfs; /* Swap vnodes. */ vp = nzp->z_vnode; nzp->z_vnode = ozp->z_vnode; ozp->z_vnode = vp; /* let destructor free the overwritten vnode */ ZTOV(ozp)->v_data = ozp; ZTOV(nzp)->v_data = nzp; nzp->z_id = ozp->z_id; ASSERT(ozp->z_dirlocks == NULL); /* znode not in use */ ASSERT(avl_numnodes(&ozp->z_range_avl) == 0); nzp->z_unlinked = ozp->z_unlinked; nzp->z_atime_dirty = ozp->z_atime_dirty; nzp->z_zn_prefetch = ozp->z_zn_prefetch; nzp->z_blksz = ozp->z_blksz; nzp->z_seq = ozp->z_seq; nzp->z_mapcnt = ozp->z_mapcnt; nzp->z_gen = ozp->z_gen; nzp->z_sync_cnt = ozp->z_sync_cnt; nzp->z_is_sa = ozp->z_is_sa; nzp->z_sa_hdl = ozp->z_sa_hdl; bcopy(ozp->z_atime, nzp->z_atime, sizeof (uint64_t) * 2); nzp->z_links = ozp->z_links; nzp->z_size = ozp->z_size; nzp->z_pflags = ozp->z_pflags; nzp->z_uid = ozp->z_uid; nzp->z_gid = ozp->z_gid; nzp->z_mode = ozp->z_mode; /* * Since this is just an idle znode and kmem is already dealing with * memory pressure, release any cached ACL. */ if (ozp->z_acl_cached) { zfs_acl_free(ozp->z_acl_cached); ozp->z_acl_cached = NULL; } sa_set_userp(nzp->z_sa_hdl, nzp); /* * Invalidate the original znode by clearing fields that provide a * pointer back to the znode. Set the low bit of the vfs pointer to * ensure that zfs_znode_move() recognizes the znode as invalid in any * subsequent callback. */ ozp->z_sa_hdl = NULL; POINTER_INVALIDATE(&ozp->z_zfsvfs); /* * Mark the znode. */ nzp->z_moved = 1; ozp->z_moved = (uint8_t)-1; } /*ARGSUSED*/ static kmem_cbrc_t zfs_znode_move(void *buf, void *newbuf, size_t size, void *arg) { znode_t *ozp = buf, *nzp = newbuf; zfsvfs_t *zfsvfs; vnode_t *vp; /* * The znode is on the file system's list of known znodes if the vfs * pointer is valid. We set the low bit of the vfs pointer when freeing * the znode to invalidate it, and the memory patterns written by kmem * (baddcafe and deadbeef) set at least one of the two low bits. A newly * created znode sets the vfs pointer last of all to indicate that the * znode is known and in a valid state to be moved by this function. */ zfsvfs = ozp->z_zfsvfs; if (!POINTER_IS_VALID(zfsvfs)) { ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_invalid); return (KMEM_CBRC_DONT_KNOW); } /* * Close a small window in which it's possible that the filesystem could * be unmounted and freed, and zfsvfs, though valid in the previous * statement, could point to unrelated memory by the time we try to * prevent the filesystem from being unmounted. */ rw_enter(&zfsvfs_lock, RW_WRITER); if (zfsvfs != ozp->z_zfsvfs) { rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck1); return (KMEM_CBRC_DONT_KNOW); } /* * If the znode is still valid, then so is the file system. We know that * no valid file system can be freed while we hold zfsvfs_lock, so we * can safely ensure that the filesystem is not and will not be * unmounted. The next statement is equivalent to ZFS_ENTER(). */ rrm_enter(&zfsvfs->z_teardown_lock, RW_READER, FTAG); if (zfsvfs->z_unmounted) { ZFS_EXIT(zfsvfs); rw_exit(&zfsvfs_lock); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_unmounted); return (KMEM_CBRC_DONT_KNOW); } rw_exit(&zfsvfs_lock); mutex_enter(&zfsvfs->z_znodes_lock); /* * Recheck the vfs pointer in case the znode was removed just before * acquiring the lock. */ if (zfsvfs != ozp->z_zfsvfs) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_zfsvfs_recheck2); return (KMEM_CBRC_DONT_KNOW); } /* * At this point we know that as long as we hold z_znodes_lock, the * znode cannot be freed and fields within the znode can be safely * accessed. Now, prevent a race with zfs_zget(). */ if (ZFS_OBJ_HOLD_TRYENTER(zfsvfs, ozp->z_id) == 0) { mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_obj_held); return (KMEM_CBRC_LATER); } vp = ZTOV(ozp); if (mutex_tryenter(&vp->v_lock) == 0) { ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_vnode_locked); return (KMEM_CBRC_LATER); } /* Only move znodes that are referenced _only_ by the DNLC. */ if (vp->v_count != 1 || !vn_in_dnlc(vp)) { mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); ZNODE_STAT_ADD(znode_move_stats.zms_not_only_dnlc); return (KMEM_CBRC_LATER); } /* * The znode is known and in a valid state to move. We're holding the * locks needed to execute the critical section. */ zfs_znode_move_impl(ozp, nzp); mutex_exit(&vp->v_lock); ZFS_OBJ_HOLD_EXIT(zfsvfs, ozp->z_id); list_link_replace(&ozp->z_link_node, &nzp->z_link_node); mutex_exit(&zfsvfs->z_znodes_lock); ZFS_EXIT(zfsvfs); return (KMEM_CBRC_YES); } #endif /* illumos */ void zfs_znode_init(void) { /* * Initialize zcache */ rw_init(&zfsvfs_lock, NULL, RW_DEFAULT, NULL); ASSERT(znode_cache == NULL); znode_cache = kmem_cache_create("zfs_znode_cache", sizeof (znode_t), 0, zfs_znode_cache_constructor, zfs_znode_cache_destructor, NULL, NULL, NULL, 0); kmem_cache_set_move(znode_cache, zfs_znode_move); } void zfs_znode_fini(void) { #ifdef illumos /* * Cleanup vfs & vnode ops */ zfs_remove_op_tables(); #endif /* * Cleanup zcache */ if (znode_cache) kmem_cache_destroy(znode_cache); znode_cache = NULL; rw_destroy(&zfsvfs_lock); } #ifdef illumos struct vnodeops *zfs_dvnodeops; struct vnodeops *zfs_fvnodeops; struct vnodeops *zfs_symvnodeops; struct vnodeops *zfs_xdvnodeops; struct vnodeops *zfs_evnodeops; struct vnodeops *zfs_sharevnodeops; void zfs_remove_op_tables() { /* * Remove vfs ops */ ASSERT(zfsfstype); (void) vfs_freevfsops_by_type(zfsfstype); zfsfstype = 0; /* * Remove vnode ops */ if (zfs_dvnodeops) vn_freevnodeops(zfs_dvnodeops); if (zfs_fvnodeops) vn_freevnodeops(zfs_fvnodeops); if (zfs_symvnodeops) vn_freevnodeops(zfs_symvnodeops); if (zfs_xdvnodeops) vn_freevnodeops(zfs_xdvnodeops); if (zfs_evnodeops) vn_freevnodeops(zfs_evnodeops); if (zfs_sharevnodeops) vn_freevnodeops(zfs_sharevnodeops); zfs_dvnodeops = NULL; zfs_fvnodeops = NULL; zfs_symvnodeops = NULL; zfs_xdvnodeops = NULL; zfs_evnodeops = NULL; zfs_sharevnodeops = NULL; } extern const fs_operation_def_t zfs_dvnodeops_template[]; extern const fs_operation_def_t zfs_fvnodeops_template[]; extern const fs_operation_def_t zfs_xdvnodeops_template[]; extern const fs_operation_def_t zfs_symvnodeops_template[]; extern const fs_operation_def_t zfs_evnodeops_template[]; extern const fs_operation_def_t zfs_sharevnodeops_template[]; int zfs_create_op_tables() { int error; /* * zfs_dvnodeops can be set if mod_remove() calls mod_installfs() * due to a failure to remove the the 2nd modlinkage (zfs_modldrv). * In this case we just return as the ops vectors are already set up. */ if (zfs_dvnodeops) return (0); error = vn_make_ops(MNTTYPE_ZFS, zfs_dvnodeops_template, &zfs_dvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_fvnodeops_template, &zfs_fvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_symvnodeops_template, &zfs_symvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_xdvnodeops_template, &zfs_xdvnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_evnodeops_template, &zfs_evnodeops); if (error) return (error); error = vn_make_ops(MNTTYPE_ZFS, zfs_sharevnodeops_template, &zfs_sharevnodeops); return (error); } #endif /* illumos */ int zfs_create_share_dir(zfsvfs_t *zfsvfs, dmu_tx_t *tx) { zfs_acl_ids_t acl_ids; vattr_t vattr; znode_t *sharezp; znode_t *zp; int error; vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VDIR; vattr.va_mode = S_IFDIR|0555; vattr.va_uid = crgetuid(kcred); vattr.va_gid = crgetgid(kcred); sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP); ASSERT(!POINTER_IS_VALID(sharezp->z_zfsvfs)); sharezp->z_moved = 0; sharezp->z_unlinked = 0; sharezp->z_atime_dirty = 0; sharezp->z_zfsvfs = zfsvfs; sharezp->z_is_sa = zfsvfs->z_use_sa; VERIFY(0 == zfs_acl_ids_create(sharezp, IS_ROOT_NODE, &vattr, kcred, NULL, &acl_ids)); zfs_mknode(sharezp, &vattr, tx, kcred, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, sharezp); POINTER_INVALIDATE(&sharezp->z_zfsvfs); error = zap_add(zfsvfs->z_os, MASTER_NODE_OBJ, ZFS_SHARES_DIR, 8, 1, &sharezp->z_id, tx); zfsvfs->z_shares_dir = sharezp->z_id; zfs_acl_ids_free(&acl_ids); sa_handle_destroy(sharezp->z_sa_hdl); kmem_cache_free(znode_cache, sharezp); return (error); } /* * define a couple of values we need available * for both 64 and 32 bit environments. */ #ifndef NBITSMINOR64 #define NBITSMINOR64 32 #endif #ifndef MAXMAJ64 #define MAXMAJ64 0xffffffffUL #endif #ifndef MAXMIN64 #define MAXMIN64 0xffffffffUL #endif /* * Create special expldev for ZFS private use. * Can't use standard expldev since it doesn't do * what we want. The standard expldev() takes a * dev32_t in LP64 and expands it to a long dev_t. * We need an interface that takes a dev32_t in ILP32 * and expands it to a long dev_t. */ static uint64_t zfs_expldev(dev_t dev) { return (((uint64_t)major(dev) << NBITSMINOR64) | minor(dev)); } /* * Special cmpldev for ZFS private use. * Can't use standard cmpldev since it takes * a long dev_t and compresses it to dev32_t in * LP64. We need to do a compaction of a long dev_t * to a dev32_t in ILP32. */ dev_t zfs_cmpldev(uint64_t dev) { return (makedev((dev >> NBITSMINOR64), (dev & MAXMIN64))); } static void zfs_znode_sa_init(zfsvfs_t *zfsvfs, znode_t *zp, dmu_buf_t *db, dmu_object_type_t obj_type, sa_handle_t *sa_hdl) { ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs) || (zfsvfs == zp->z_zfsvfs)); ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zfsvfs, zp->z_id))); ASSERT(zp->z_sa_hdl == NULL); ASSERT(zp->z_acl_cached == NULL); if (sa_hdl == NULL) { VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, zp, SA_HDL_SHARED, &zp->z_sa_hdl)); } else { zp->z_sa_hdl = sa_hdl; sa_set_userp(sa_hdl, zp); } zp->z_is_sa = (obj_type == DMU_OT_SA) ? B_TRUE : B_FALSE; /* * Slap on VROOT if we are the root znode unless we are the root * node of a snapshot mounted under .zfs. */ if (zp->z_id == zfsvfs->z_root && zfsvfs->z_parent == zfsvfs) ZTOV(zp)->v_flag |= VROOT; vn_exists(ZTOV(zp)); } void zfs_znode_dmu_fini(znode_t *zp) { ASSERT(MUTEX_HELD(ZFS_OBJ_MUTEX(zp->z_zfsvfs, zp->z_id)) || zp->z_unlinked || RW_WRITE_HELD(&zp->z_zfsvfs->z_teardown_inactive_lock)); sa_handle_destroy(zp->z_sa_hdl); zp->z_sa_hdl = NULL; } static void zfs_vnode_forget(vnode_t *vp) { /* copied from insmntque_stddtr */ vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Construct a new znode/vnode and intialize. * * This does not do a call to dmu_set_user() that is * up to the caller to do, in case you don't want to * return the znode */ static znode_t * zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, dmu_object_type_t obj_type, sa_handle_t *hdl) { znode_t *zp; vnode_t *vp; uint64_t mode; uint64_t parent; sa_bulk_attr_t bulk[9]; int count = 0; int error; zp = kmem_cache_alloc(znode_cache, KM_SLEEP); KASSERT(curthread->td_vp_reserv > 0, ("zfs_znode_alloc: getnewvnode without any vnodes reserved")); error = getnewvnode("zfs", zfsvfs->z_parent->z_vfs, &zfs_vnodeops, &vp); if (error != 0) { kmem_cache_free(znode_cache, zp); return (NULL); } zp->z_vnode = vp; vp->v_data = zp; ASSERT(!POINTER_IS_VALID(zp->z_zfsvfs)); zp->z_moved = 0; /* * Defer setting z_zfsvfs until the znode is ready to be a candidate for * the zfs_znode_move() callback. */ zp->z_sa_hdl = NULL; zp->z_unlinked = 0; zp->z_atime_dirty = 0; zp->z_mapcnt = 0; zp->z_id = db->db_object; zp->z_blksz = blksz; zp->z_seq = 0x7A4653; zp->z_sync_cnt = 0; vp = ZTOV(zp); zfs_znode_sa_init(zfsvfs, zp, db, obj_type, hdl); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &zp->z_gen, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &zp->z_uid, 8); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &zp->z_gid, 8); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count) != 0 || zp->z_gen == 0) { if (hdl == NULL) sa_handle_destroy(zp->z_sa_hdl); zfs_vnode_forget(vp); zp->z_vnode = NULL; kmem_cache_free(znode_cache, zp); return (NULL); } zp->z_mode = mode; vp->v_type = IFTOVT((mode_t)mode); switch (vp->v_type) { case VDIR: zp->z_zn_prefetch = B_TRUE; /* z_prefetch default is enabled */ break; #ifdef illumos case VBLK: case VCHR: { uint64_t rdev; VERIFY(sa_lookup(zp->z_sa_hdl, SA_ZPL_RDEV(zfsvfs), &rdev, sizeof (rdev)) == 0); vp->v_rdev = zfs_cmpldev(rdev); } break; #endif case VFIFO: #ifdef illumos case VSOCK: case VDOOR: #endif vp->v_op = &zfs_fifoops; break; case VREG: if (parent == zfsvfs->z_shares_dir) { ASSERT(zp->z_uid == 0 && zp->z_gid == 0); vp->v_op = &zfs_shareops; } break; #ifdef illumos case VLNK: vn_setops(vp, zfs_symvnodeops); break; default: vn_setops(vp, zfs_evnodeops); break; #endif } mutex_enter(&zfsvfs->z_znodes_lock); list_insert_tail(&zfsvfs->z_all_znodes, zp); membar_producer(); /* * Everything else must be valid before assigning z_zfsvfs makes the * znode eligible for zfs_znode_move(). */ zp->z_zfsvfs = zfsvfs; mutex_exit(&zfsvfs->z_znodes_lock); /* * Acquire vnode lock before making it available to the world. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); VN_LOCK_AREC(vp); if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); #ifdef illumos VFS_HOLD(zfsvfs->z_vfs); #endif return (zp); } static uint64_t empty_xattr; static uint64_t pad[4]; static zfs_acl_phys_t acl_phys; /* * Create a new DMU object to hold a zfs znode. * * IN: dzp - parent directory for new znode * vap - file attributes for new znode * tx - dmu transaction id for zap operations * cr - credentials of caller * flag - flags: * IS_ROOT_NODE - new object will be root * IS_XATTR - new object is an attribute * bonuslen - length of bonus buffer * setaclp - File/Dir initial ACL * fuidp - Tracks fuid allocation. * * OUT: zpp - allocated znode * */ void zfs_mknode(znode_t *dzp, vattr_t *vap, dmu_tx_t *tx, cred_t *cr, uint_t flag, znode_t **zpp, zfs_acl_ids_t *acl_ids) { uint64_t crtime[2], atime[2], mtime[2], ctime[2]; uint64_t mode, size, links, parent, pflags; uint64_t dzp_pflags = 0; uint64_t rdev = 0; zfsvfs_t *zfsvfs = dzp->z_zfsvfs; dmu_buf_t *db; timestruc_t now; uint64_t gen, obj; int err; int bonuslen; sa_handle_t *sa_hdl; dmu_object_type_t obj_type; sa_bulk_attr_t sa_attrs[ZPL_END]; int cnt = 0; zfs_acl_locator_cb_t locate = { 0 }; ASSERT(vap && (vap->va_mask & (AT_TYPE|AT_MODE)) == (AT_TYPE|AT_MODE)); if (zfsvfs->z_replay) { obj = vap->va_nodeid; now = vap->va_ctime; /* see zfs_replay_create() */ gen = vap->va_nblocks; /* ditto */ } else { obj = 0; vfs_timestamp(&now); gen = dmu_tx_get_txg(tx); } obj_type = zfsvfs->z_use_sa ? DMU_OT_SA : DMU_OT_ZNODE; bonuslen = (obj_type == DMU_OT_SA) ? DN_MAX_BONUSLEN : ZFS_OLD_ZNODE_PHYS_SIZE; /* * Create a new DMU object. */ /* * There's currently no mechanism for pre-reading the blocks that will * be needed to allocate a new object, so we accept the small chance * that there will be an i/o error and we will fail one of the * assertions below. */ if (vap->va_type == VDIR) { if (zfsvfs->z_replay) { VERIFY0(zap_create_claim_norm(zfsvfs->z_os, obj, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx)); } else { obj = zap_create_norm(zfsvfs->z_os, zfsvfs->z_norm, DMU_OT_DIRECTORY_CONTENTS, obj_type, bonuslen, tx); } } else { if (zfsvfs->z_replay) { VERIFY0(dmu_object_claim(zfsvfs->z_os, obj, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx)); } else { obj = dmu_object_alloc(zfsvfs->z_os, DMU_OT_PLAIN_FILE_CONTENTS, 0, obj_type, bonuslen, tx); } } ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); VERIFY(0 == sa_buf_hold(zfsvfs->z_os, obj, NULL, &db)); /* * If this is the root, fix up the half-initialized parent pointer * to reference the just-allocated physical data area. */ if (flag & IS_ROOT_NODE) { dzp->z_id = obj; } else { dzp_pflags = dzp->z_pflags; } /* * If parent is an xattr, so am I. */ if (dzp_pflags & ZFS_XATTR) { flag |= IS_XATTR; } if (zfsvfs->z_use_fuids) pflags = ZFS_ARCHIVE | ZFS_AV_MODIFIED; else pflags = 0; if (vap->va_type == VDIR) { size = 2; /* contents ("." and "..") */ links = (flag & (IS_ROOT_NODE | IS_XATTR)) ? 2 : 1; } else { size = links = 0; } if (vap->va_type == VBLK || vap->va_type == VCHR) { rdev = zfs_expldev(vap->va_rdev); } parent = dzp->z_id; mode = acl_ids->z_mode; if (flag & IS_XATTR) pflags |= ZFS_XATTR; /* * No execs denied will be deterimed when zfs_mode_compute() is called. */ pflags |= acl_ids->z_aclp->z_hints & (ZFS_ACL_TRIVIAL|ZFS_INHERIT_ACE|ZFS_ACL_AUTO_INHERIT| ZFS_ACL_DEFAULTED|ZFS_ACL_PROTECTED); ZFS_TIME_ENCODE(&now, crtime); ZFS_TIME_ENCODE(&now, ctime); if (vap->va_mask & AT_ATIME) { ZFS_TIME_ENCODE(&vap->va_atime, atime); } else { ZFS_TIME_ENCODE(&now, atime); } if (vap->va_mask & AT_MTIME) { ZFS_TIME_ENCODE(&vap->va_mtime, mtime); } else { ZFS_TIME_ENCODE(&now, mtime); } /* Now add in all of the "SA" attributes */ VERIFY(0 == sa_handle_get_from_db(zfsvfs->z_os, db, NULL, SA_HDL_SHARED, &sa_hdl)); /* * Setup the array of attributes to be replaced/set on the new file * * order for DMU_OT_ZNODE is critical since it needs to be constructed * in the old znode_phys_t format. Don't change this ordering */ if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); } else { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MODE(zfsvfs), NULL, &mode, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_SIZE(zfsvfs), NULL, &size, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GEN(zfsvfs), NULL, &gen, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PARENT(zfsvfs), NULL, &parent, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ATIME(zfsvfs), NULL, &atime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_MTIME(zfsvfs), NULL, &mtime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CTIME(zfsvfs), NULL, &ctime, 16); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_CRTIME(zfsvfs), NULL, &crtime, 16); } SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_LINKS(zfsvfs), NULL, &links, 8); if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_XATTR(zfsvfs), NULL, &empty_xattr, 8); } if (obj_type == DMU_OT_ZNODE || (vap->va_type == VBLK || vap->va_type == VCHR)) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_RDEV(zfsvfs), NULL, &rdev, 8); } if (obj_type == DMU_OT_ZNODE) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_FLAGS(zfsvfs), NULL, &pflags, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_UID(zfsvfs), NULL, &acl_ids->z_fuid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_GID(zfsvfs), NULL, &acl_ids->z_fgid, 8); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_PAD(zfsvfs), NULL, pad, sizeof (uint64_t) * 4); SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_ZNODE_ACL(zfsvfs), NULL, &acl_phys, sizeof (zfs_acl_phys_t)); } else if (acl_ids->z_aclp->z_version >= ZFS_ACL_VERSION_FUID) { SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_COUNT(zfsvfs), NULL, &acl_ids->z_aclp->z_acl_count, 8); locate.cb_aclp = acl_ids->z_aclp; SA_ADD_BULK_ATTR(sa_attrs, cnt, SA_ZPL_DACL_ACES(zfsvfs), zfs_acl_data_locator, &locate, acl_ids->z_aclp->z_acl_bytes); mode = zfs_mode_compute(mode, acl_ids->z_aclp, &pflags, acl_ids->z_fuid, acl_ids->z_fgid); } VERIFY(sa_replace_all_by_template(sa_hdl, sa_attrs, cnt, tx) == 0); if (!(flag & IS_ROOT_NODE)) { *zpp = zfs_znode_alloc(zfsvfs, db, 0, obj_type, sa_hdl); ASSERT(*zpp != NULL); } else { /* * If we are creating the root node, the "parent" we * passed in is the znode for the root. */ *zpp = dzp; (*zpp)->z_sa_hdl = sa_hdl; } (*zpp)->z_pflags = pflags; (*zpp)->z_mode = mode; if (vap->va_mask & AT_XVATTR) zfs_xvattr_set(*zpp, (xvattr_t *)vap, tx); if (obj_type == DMU_OT_ZNODE || acl_ids->z_aclp->z_version < ZFS_ACL_VERSION_FUID) { VERIFY0(zfs_aclset_common(*zpp, acl_ids->z_aclp, cr, tx)); } if (!(flag & IS_ROOT_NODE)) { vnode_t *vp; vp = ZTOV(*zpp); vp->v_vflag |= VV_FORCEINSMQ; err = insmntque(vp, zfsvfs->z_vfs); vp->v_vflag &= ~VV_FORCEINSMQ; KASSERT(err == 0, ("insmntque() failed: error %d", err)); } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); } /* * Update in-core attributes. It is assumed the caller will be doing an * sa_bulk_update to push the changes out. */ void zfs_xvattr_set(znode_t *zp, xvattr_t *xvap, dmu_tx_t *tx) { xoptattr_t *xoap; xoap = xva_getxoptattr(xvap); ASSERT(xoap); if (XVA_ISSET_REQ(xvap, XAT_CREATETIME)) { uint64_t times[2]; ZFS_TIME_ENCODE(&xoap->xoa_createtime, times); (void) sa_update(zp->z_sa_hdl, SA_ZPL_CRTIME(zp->z_zfsvfs), ×, sizeof (times), tx); XVA_SET_RTN(xvap, XAT_CREATETIME); } if (XVA_ISSET_REQ(xvap, XAT_READONLY)) { ZFS_ATTR_SET(zp, ZFS_READONLY, xoap->xoa_readonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_READONLY); } if (XVA_ISSET_REQ(xvap, XAT_HIDDEN)) { ZFS_ATTR_SET(zp, ZFS_HIDDEN, xoap->xoa_hidden, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_HIDDEN); } if (XVA_ISSET_REQ(xvap, XAT_SYSTEM)) { ZFS_ATTR_SET(zp, ZFS_SYSTEM, xoap->xoa_system, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SYSTEM); } if (XVA_ISSET_REQ(xvap, XAT_ARCHIVE)) { ZFS_ATTR_SET(zp, ZFS_ARCHIVE, xoap->xoa_archive, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_ARCHIVE); } if (XVA_ISSET_REQ(xvap, XAT_IMMUTABLE)) { ZFS_ATTR_SET(zp, ZFS_IMMUTABLE, xoap->xoa_immutable, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_IMMUTABLE); } if (XVA_ISSET_REQ(xvap, XAT_NOUNLINK)) { ZFS_ATTR_SET(zp, ZFS_NOUNLINK, xoap->xoa_nounlink, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NOUNLINK); } if (XVA_ISSET_REQ(xvap, XAT_APPENDONLY)) { ZFS_ATTR_SET(zp, ZFS_APPENDONLY, xoap->xoa_appendonly, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_APPENDONLY); } if (XVA_ISSET_REQ(xvap, XAT_NODUMP)) { ZFS_ATTR_SET(zp, ZFS_NODUMP, xoap->xoa_nodump, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_NODUMP); } if (XVA_ISSET_REQ(xvap, XAT_OPAQUE)) { ZFS_ATTR_SET(zp, ZFS_OPAQUE, xoap->xoa_opaque, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OPAQUE); } if (XVA_ISSET_REQ(xvap, XAT_AV_QUARANTINED)) { ZFS_ATTR_SET(zp, ZFS_AV_QUARANTINED, xoap->xoa_av_quarantined, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_QUARANTINED); } if (XVA_ISSET_REQ(xvap, XAT_AV_MODIFIED)) { ZFS_ATTR_SET(zp, ZFS_AV_MODIFIED, xoap->xoa_av_modified, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_AV_MODIFIED); } if (XVA_ISSET_REQ(xvap, XAT_AV_SCANSTAMP)) { zfs_sa_set_scanstamp(zp, xvap, tx); XVA_SET_RTN(xvap, XAT_AV_SCANSTAMP); } if (XVA_ISSET_REQ(xvap, XAT_REPARSE)) { ZFS_ATTR_SET(zp, ZFS_REPARSE, xoap->xoa_reparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_REPARSE); } if (XVA_ISSET_REQ(xvap, XAT_OFFLINE)) { ZFS_ATTR_SET(zp, ZFS_OFFLINE, xoap->xoa_offline, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_OFFLINE); } if (XVA_ISSET_REQ(xvap, XAT_SPARSE)) { ZFS_ATTR_SET(zp, ZFS_SPARSE, xoap->xoa_sparse, zp->z_pflags, tx); XVA_SET_RTN(xvap, XAT_SPARSE); } } int zfs_zget(zfsvfs_t *zfsvfs, uint64_t obj_num, znode_t **zpp) { dmu_object_info_t doi; dmu_buf_t *db; znode_t *zp; vnode_t *vp; sa_handle_t *hdl; struct thread *td; int locked; int err; td = curthread; getnewvnode_reserve(1); again: *zpp = NULL; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); getnewvnode_drop_reserve(); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); #ifdef __FreeBSD__ getnewvnode_drop_reserve(); #endif return (SET_ERROR(EINVAL)); } hdl = dmu_buf_get_user(db); if (hdl != NULL) { zp = sa_get_userdata(hdl); /* * Since "SA" does immediate eviction we * should never find a sa handle that doesn't * know about the znode. */ ASSERT3P(zp, !=, NULL); ASSERT3U(zp->z_id, ==, obj_num); *zpp = zp; vp = ZTOV(zp); /* Don't let the vnode disappear after ZFS_OBJ_HOLD_EXIT. */ VN_HOLD(vp); sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); locked = VOP_ISLOCKED(vp); VI_LOCK(vp); if ((vp->v_iflag & VI_DOOMED) != 0 && locked != LK_EXCLUSIVE) { /* * The vnode is doomed and this thread doesn't * hold the exclusive lock on it, so the vnode * must be being reclaimed by another thread. * Otherwise the doomed vnode is being reclaimed * by this thread and zfs_zget is called from * ZIL internals. */ VI_UNLOCK(vp); /* * XXX vrele() locks the vnode when the last reference * is dropped. Although in this case the vnode is * doomed / dead and so no inactivation is required, * the vnode lock is still acquired. That could result * in a LOR with z_teardown_lock if another thread holds * the vnode's lock and tries to take z_teardown_lock. * But that is only possible if the other thread peforms * a ZFS vnode operation on the vnode. That either * should not happen if the vnode is dead or the thread * should also have a refrence to the vnode and thus * our reference is not last. */ VN_RELE(vp); goto again; } VI_UNLOCK(vp); getnewvnode_drop_reserve(); return (0); } /* * Not found create new znode/vnode * but only if file exists. * * There is a small window where zfs_vget() could * find this object while a file create is still in * progress. This is checked for in zfs_znode_alloc() * * if zfs_znode_alloc() fails it will drop the hold on the * bonus buffer. */ zp = zfs_znode_alloc(zfsvfs, db, doi.doi_data_block_size, doi.doi_bonus_type, NULL); if (zp == NULL) { err = SET_ERROR(ENOENT); } else { *zpp = zp; } if (err == 0) { vnode_t *vp = ZTOV(zp); err = insmntque(vp, zfsvfs->z_vfs); if (err == 0) { vp->v_hash = obj_num; VOP_UNLOCK(vp, 0); } else { zp->z_vnode = NULL; zfs_znode_dmu_fini(zp); zfs_znode_free(zp); *zpp = NULL; } } ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); getnewvnode_drop_reserve(); return (err); } int zfs_rezget(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_object_info_t doi; dmu_buf_t *db; vnode_t *vp; uint64_t obj_num = zp->z_id; uint64_t mode, size; sa_bulk_attr_t bulk[8]; int err; int count = 0; uint64_t gen; ZFS_OBJ_HOLD_ENTER(zfsvfs, obj_num); mutex_enter(&zp->z_acl_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } mutex_exit(&zp->z_acl_lock); ASSERT(zp->z_sa_hdl == NULL); err = sa_buf_hold(zfsvfs->z_os, obj_num, NULL, &db); if (err) { ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (err); } dmu_object_info_from_db(db, &doi); if (doi.doi_bonus_type != DMU_OT_SA && (doi.doi_bonus_type != DMU_OT_ZNODE || (doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)))) { sa_buf_rele(db, NULL); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EINVAL)); } zfs_znode_sa_init(zfsvfs, zp, db, doi.doi_bonus_type, NULL); size = zp->z_size; /* reload cached values */ SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GEN(zfsvfs), NULL, &gen, sizeof (gen)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, sizeof (zp->z_size)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_LINKS(zfsvfs), NULL, &zp->z_links, sizeof (zp->z_links)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, sizeof (zp->z_pflags)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_ATIME(zfsvfs), NULL, &zp->z_atime, sizeof (zp->z_atime)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_UID(zfsvfs), NULL, &zp->z_uid, sizeof (zp->z_uid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_GID(zfsvfs), NULL, &zp->z_gid, sizeof (zp->z_gid)); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MODE(zfsvfs), NULL, &mode, sizeof (mode)); if (sa_bulk_lookup(zp->z_sa_hdl, bulk, count)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EIO)); } zp->z_mode = mode; if (gen != zp->z_gen) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (SET_ERROR(EIO)); } /* * It is highly improbable but still quite possible that two * objects in different datasets are created with the same * object numbers and in transaction groups with the same * numbers. znodes corresponding to those objects would * have the same z_id and z_gen, but their other attributes * may be different. * zfs recv -F may replace one of such objects with the other. * As a result file properties recorded in the replaced * object's vnode may no longer match the received object's * properties. At present the only cached property is the * files type recorded in v_type. * So, handle this case by leaving the old vnode and znode * disassociated from the actual object. A new vnode and a * znode will be created if the object is accessed * (e.g. via a look-up). The old vnode and znode will be * recycled when the last vnode reference is dropped. */ vp = ZTOV(zp); if (vp->v_type != IFTOVT((mode_t)zp->z_mode)) { zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (EIO); } zp->z_unlinked = (zp->z_links == 0); zp->z_blksz = doi.doi_data_block_size; vn_pages_remove(vp, 0, 0); if (zp->z_size != size) vnode_pager_setsize(vp, zp->z_size); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj_num); return (0); } void zfs_znode_delete(znode_t *zp, dmu_tx_t *tx) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; objset_t *os = zfsvfs->z_os; uint64_t obj = zp->z_id; uint64_t acl_obj = zfs_external_acl(zp); ZFS_OBJ_HOLD_ENTER(zfsvfs, obj); if (acl_obj) { VERIFY(!zp->z_is_sa); VERIFY(0 == dmu_object_free(os, acl_obj, tx)); } VERIFY(0 == dmu_object_free(os, obj, tx)); zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, obj); zfs_znode_free(zp); } void zfs_zinactive(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t z_id = zp->z_id; ASSERT(zp->z_sa_hdl); /* * Don't allow a zfs_zget() while were trying to release this znode */ ZFS_OBJ_HOLD_ENTER(zfsvfs, z_id); /* * If this was the last reference to a file with no links, * remove the file from the file system. */ if (zp->z_unlinked) { ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); zfs_rmnode(zp); return; } zfs_znode_dmu_fini(zp); ZFS_OBJ_HOLD_EXIT(zfsvfs, z_id); zfs_znode_free(zp); } void zfs_znode_free(znode_t *zp) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; ASSERT(zp->z_sa_hdl == NULL); zp->z_vnode = NULL; mutex_enter(&zfsvfs->z_znodes_lock); POINTER_INVALIDATE(&zp->z_zfsvfs); list_remove(&zfsvfs->z_all_znodes, zp); mutex_exit(&zfsvfs->z_znodes_lock); if (zp->z_acl_cached) { zfs_acl_free(zp->z_acl_cached); zp->z_acl_cached = NULL; } kmem_cache_free(znode_cache, zp); #ifdef illumos VFS_RELE(zfsvfs->z_vfs); #endif } void zfs_tstamp_update_setup(znode_t *zp, uint_t flag, uint64_t mtime[2], uint64_t ctime[2], boolean_t have_tx) { timestruc_t now; vfs_timestamp(&now); if (have_tx) { /* will sa_bulk_update happen really soon? */ zp->z_atime_dirty = 0; zp->z_seq++; } else { zp->z_atime_dirty = 1; } if (flag & AT_ATIME) { ZFS_TIME_ENCODE(&now, zp->z_atime); } if (flag & AT_MTIME) { ZFS_TIME_ENCODE(&now, mtime); if (zp->z_zfsvfs->z_use_fuids) { zp->z_pflags |= (ZFS_ARCHIVE | ZFS_AV_MODIFIED); } } if (flag & AT_CTIME) { ZFS_TIME_ENCODE(&now, ctime); if (zp->z_zfsvfs->z_use_fuids) zp->z_pflags |= ZFS_ARCHIVE; } } /* * Grow the block size for a file. * * IN: zp - znode of file to free data in. * size - requested block size * tx - open transaction. * * NOTE: this function assumes that the znode is write locked. */ void zfs_grow_blocksize(znode_t *zp, uint64_t size, dmu_tx_t *tx) { int error; u_longlong_t dummy; if (size <= zp->z_blksz) return; /* * If the file size is already greater than the current blocksize, * we will not grow. If there is more than one block in a file, * the blocksize cannot change. */ if (zp->z_blksz && zp->z_size > zp->z_blksz) return; error = dmu_object_set_blocksize(zp->z_zfsvfs->z_os, zp->z_id, size, 0, tx); if (error == ENOTSUP) return; ASSERT0(error); /* What blocksize did we actually get? */ dmu_object_size_from_db(sa_get_db(zp->z_sa_hdl), &zp->z_blksz, &dummy); } #ifdef illumos /* * This is a dummy interface used when pvn_vplist_dirty() should *not* * be calling back into the fs for a putpage(). E.g.: when truncating * a file, the pages being "thrown away* don't need to be written out. */ /* ARGSUSED */ static int zfs_no_putpage(vnode_t *vp, page_t *pp, u_offset_t *offp, size_t *lenp, int flags, cred_t *cr) { ASSERT(0); return (0); } #endif /* * Increase the file length * * IN: zp - znode of file to free data in. * end - new end-of-file * * RETURN: 0 on success, error code on failure */ static int zfs_extend(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; dmu_tx_t *tx; rl_t *rl; uint64_t newblksz; int error; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end <= zp->z_size) { zfs_range_unlock(rl); return (0); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); if (end > zp->z_blksz && (!ISP2(zp->z_blksz) || zp->z_blksz < zfsvfs->z_max_blksz)) { /* * We are growing the file past the current block size. */ if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) { /* * File's blocksize is already larger than the * "recordsize" property. Only let it grow to * the next power of 2. */ ASSERT(!ISP2(zp->z_blksz)); newblksz = MIN(end, 1 << highbit64(zp->z_blksz)); } else { newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz); } dmu_tx_hold_write(tx, zp->z_id, 0, newblksz); } else { newblksz = 0; } error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } if (newblksz) zfs_grow_blocksize(zp, newblksz, tx); zp->z_size = end; VERIFY(0 == sa_update(zp->z_sa_hdl, SA_ZPL_SIZE(zp->z_zfsvfs), &zp->z_size, sizeof (zp->z_size), tx)); vnode_pager_setsize(ZTOV(zp), end); zfs_range_unlock(rl); dmu_tx_commit(tx); return (0); } /* * Free space in a file. * * IN: zp - znode of file to free data in. * off - start of section to free. * len - length of section to free. * * RETURN: 0 on success, error code on failure */ static int zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; rl_t *rl; int error; /* * Lock the range being freed. */ rl = zfs_range_lock(zp, off, len, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (off >= zp->z_size) { zfs_range_unlock(rl); return (0); } if (off + len > zp->z_size) len = zp->z_size - off; error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len); if (error == 0) { /* * In FreeBSD we cannot free block in the middle of a file, * but only at the end of a file, so this code path should * never happen. */ vnode_pager_setsize(ZTOV(zp), off); } zfs_range_unlock(rl); return (error); } /* * Truncate a file * * IN: zp - znode of file to free data in. * end - new end-of-file. * * RETURN: 0 on success, error code on failure */ static int zfs_trunc(znode_t *zp, uint64_t end) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; rl_t *rl; int error; sa_bulk_attr_t bulk[2]; int count = 0; /* * We will change zp_size, lock the whole file. */ rl = zfs_range_lock(zp, 0, UINT64_MAX, RL_WRITER); /* * Nothing to do if file already at desired length. */ if (end >= zp->z_size) { zfs_range_unlock(rl); return (0); } error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, end, -1); if (error) { zfs_range_unlock(rl); return (error); } tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); dmu_tx_mark_netfree(tx); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); zfs_range_unlock(rl); return (error); } zp->z_size = end; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_SIZE(zfsvfs), NULL, &zp->z_size, sizeof (zp->z_size)); if (end == 0) { zp->z_pflags &= ~ZFS_SPARSE; SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); } VERIFY(sa_bulk_update(zp->z_sa_hdl, bulk, count, tx) == 0); dmu_tx_commit(tx); /* * Clear any mapped pages in the truncated region. This has to * happen outside of the transaction to avoid the possibility of * a deadlock with someone trying to push a page that we are * about to invalidate. */ vnode_pager_setsize(vp, end); zfs_range_unlock(rl); return (0); } /* * Free space in a file * * IN: zp - znode of file to free data in. * off - start of range * len - end of range (0 => EOF) * flag - current file open mode flags. * log - TRUE if this action should be logged * * RETURN: 0 on success, error code on failure */ int zfs_freesp(znode_t *zp, uint64_t off, uint64_t len, int flag, boolean_t log) { vnode_t *vp = ZTOV(zp); dmu_tx_t *tx; zfsvfs_t *zfsvfs = zp->z_zfsvfs; zilog_t *zilog = zfsvfs->z_log; uint64_t mode; uint64_t mtime[2], ctime[2]; sa_bulk_attr_t bulk[3]; int count = 0; int error; if ((error = sa_lookup(zp->z_sa_hdl, SA_ZPL_MODE(zfsvfs), &mode, sizeof (mode))) != 0) return (error); if (off > zp->z_size) { error = zfs_extend(zp, off+len); if (error == 0 && log) goto log; else return (error); } /* * Check for any locks in the region to be freed. */ if (MANDLOCK(vp, (mode_t)mode)) { uint64_t length = (len ? len : zp->z_size - off); if (error = chklock(vp, FWRITE, off, length, flag, NULL)) return (error); } if (len == 0) { error = zfs_trunc(zp, off); } else { if ((error = zfs_free_range(zp, off, len)) == 0 && off + len > zp->z_size) error = zfs_extend(zp, off+len); } if (error || !log) return (error); log: tx = dmu_tx_create(zfsvfs->z_os); dmu_tx_hold_sa(tx, zp->z_sa_hdl, B_FALSE); zfs_sa_upgrade_txholds(tx, zp); error = dmu_tx_assign(tx, TXG_WAIT); if (error) { dmu_tx_abort(tx); return (error); } SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_MTIME(zfsvfs), NULL, mtime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_CTIME(zfsvfs), NULL, ctime, 16); SA_ADD_BULK_ATTR(bulk, count, SA_ZPL_FLAGS(zfsvfs), NULL, &zp->z_pflags, 8); zfs_tstamp_update_setup(zp, CONTENT_MODIFIED, mtime, ctime, B_TRUE); error = sa_bulk_update(zp->z_sa_hdl, bulk, count, tx); ASSERT(error == 0); zfs_log_truncate(zilog, tx, TX_TRUNCATE, zp, off, len); dmu_tx_commit(tx); return (0); } void zfs_create_fs(objset_t *os, cred_t *cr, nvlist_t *zplprops, dmu_tx_t *tx) { uint64_t moid, obj, sa_obj, version; uint64_t sense = ZFS_CASE_SENSITIVE; uint64_t norm = 0; nvpair_t *elem; int error; int i; znode_t *rootzp = NULL; zfsvfs_t *zfsvfs; vattr_t vattr; znode_t *zp; zfs_acl_ids_t acl_ids; /* * First attempt to create master node. */ /* * In an empty objset, there are no blocks to read and thus * there can be no i/o errors (which we assert below). */ moid = MASTER_NODE_OBJ; error = zap_create_claim(os, moid, DMU_OT_MASTER_NODE, DMU_OT_NONE, 0, tx); ASSERT(error == 0); /* * Set starting attributes. */ version = zfs_zpl_version_map(spa_version(dmu_objset_spa(os))); elem = NULL; while ((elem = nvlist_next_nvpair(zplprops, elem)) != NULL) { /* For the moment we expect all zpl props to be uint64_ts */ uint64_t val; char *name; ASSERT(nvpair_type(elem) == DATA_TYPE_UINT64); VERIFY(nvpair_value_uint64(elem, &val) == 0); name = nvpair_name(elem); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_VERSION)) == 0) { if (val < version) version = val; } else { error = zap_update(os, moid, name, 8, 1, &val, tx); } ASSERT(error == 0); if (strcmp(name, zfs_prop_to_name(ZFS_PROP_NORMALIZE)) == 0) norm = val; else if (strcmp(name, zfs_prop_to_name(ZFS_PROP_CASE)) == 0) sense = val; } ASSERT(version != 0); error = zap_update(os, moid, ZPL_VERSION_STR, 8, 1, &version, tx); /* * Create zap object used for SA attribute registration */ if (version >= ZPL_VERSION_SA) { sa_obj = zap_create(os, DMU_OT_SA_MASTER_NODE, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_SA_ATTRS, 8, 1, &sa_obj, tx); ASSERT(error == 0); } else { sa_obj = 0; } /* * Create a delete queue. */ obj = zap_create(os, DMU_OT_UNLINKED_SET, DMU_OT_NONE, 0, tx); error = zap_add(os, moid, ZFS_UNLINKED_SET, 8, 1, &obj, tx); ASSERT(error == 0); /* * Create root znode. Create minimal znode/vnode/zfsvfs * to allow zfs_mknode to work. */ VATTR_NULL(&vattr); vattr.va_mask = AT_MODE|AT_UID|AT_GID|AT_TYPE; vattr.va_type = VDIR; vattr.va_mode = S_IFDIR|0755; vattr.va_uid = crgetuid(cr); vattr.va_gid = crgetgid(cr); zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP); rootzp = kmem_cache_alloc(znode_cache, KM_SLEEP); ASSERT(!POINTER_IS_VALID(rootzp->z_zfsvfs)); rootzp->z_moved = 0; rootzp->z_unlinked = 0; rootzp->z_atime_dirty = 0; rootzp->z_is_sa = USE_SA(version, os); zfsvfs->z_os = os; zfsvfs->z_parent = zfsvfs; zfsvfs->z_version = version; zfsvfs->z_use_fuids = USE_FUIDS(version, os); zfsvfs->z_use_sa = USE_SA(version, os); zfsvfs->z_norm = norm; error = sa_setup(os, sa_obj, zfs_attr_table, ZPL_END, &zfsvfs->z_attr_table); ASSERT(error == 0); /* * Fold case on file systems that are always or sometimes case * insensitive. */ if (sense == ZFS_CASE_INSENSITIVE || sense == ZFS_CASE_MIXED) zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER; mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL); list_create(&zfsvfs->z_all_znodes, sizeof (znode_t), offsetof(znode_t, z_link_node)); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_init(&zfsvfs->z_hold_mtx[i], NULL, MUTEX_DEFAULT, NULL); rootzp->z_zfsvfs = zfsvfs; VERIFY(0 == zfs_acl_ids_create(rootzp, IS_ROOT_NODE, &vattr, cr, NULL, &acl_ids)); zfs_mknode(rootzp, &vattr, tx, cr, IS_ROOT_NODE, &zp, &acl_ids); ASSERT3P(zp, ==, rootzp); error = zap_add(os, moid, ZFS_ROOT_OBJ, 8, 1, &rootzp->z_id, tx); ASSERT(error == 0); zfs_acl_ids_free(&acl_ids); POINTER_INVALIDATE(&rootzp->z_zfsvfs); sa_handle_destroy(rootzp->z_sa_hdl); kmem_cache_free(znode_cache, rootzp); /* * Create shares directory */ error = zfs_create_share_dir(zfsvfs, tx); ASSERT(error == 0); for (i = 0; i != ZFS_OBJ_MTX_SZ; i++) mutex_destroy(&zfsvfs->z_hold_mtx[i]); kmem_free(zfsvfs, sizeof (zfsvfs_t)); } #endif /* _KERNEL */ static int zfs_sa_setup(objset_t *osp, sa_attr_type_t **sa_table) { uint64_t sa_obj = 0; int error; error = zap_lookup(osp, MASTER_NODE_OBJ, ZFS_SA_ATTRS, 8, 1, &sa_obj); if (error != 0 && error != ENOENT) return (error); error = sa_setup(osp, sa_obj, zfs_attr_table, ZPL_END, sa_table); return (error); } static int zfs_grab_sa_handle(objset_t *osp, uint64_t obj, sa_handle_t **hdlp, dmu_buf_t **db, void *tag) { dmu_object_info_t doi; int error; if ((error = sa_buf_hold(osp, obj, tag, db)) != 0) return (error); dmu_object_info_from_db(*db, &doi); if ((doi.doi_bonus_type != DMU_OT_SA && doi.doi_bonus_type != DMU_OT_ZNODE) || doi.doi_bonus_type == DMU_OT_ZNODE && doi.doi_bonus_size < sizeof (znode_phys_t)) { sa_buf_rele(*db, tag); return (SET_ERROR(ENOTSUP)); } error = sa_handle_get(osp, obj, NULL, SA_HDL_PRIVATE, hdlp); if (error != 0) { sa_buf_rele(*db, tag); return (error); } return (0); } void zfs_release_sa_handle(sa_handle_t *hdl, dmu_buf_t *db, void *tag) { sa_handle_destroy(hdl); sa_buf_rele(db, tag); } /* * Given an object number, return its parent object number and whether * or not the object is an extended attribute directory. */ static int zfs_obj_to_pobj(objset_t *osp, sa_handle_t *hdl, sa_attr_type_t *sa_table, uint64_t *pobjp, int *is_xattrdir) { uint64_t parent; uint64_t pflags; uint64_t mode; uint64_t parent_mode; sa_bulk_attr_t bulk[3]; sa_handle_t *sa_hdl; dmu_buf_t *sa_db; int count = 0; int error; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_PARENT], NULL, &parent, sizeof (parent)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_FLAGS], NULL, &pflags, sizeof (pflags)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &mode, sizeof (mode)); if ((error = sa_bulk_lookup(hdl, bulk, count)) != 0) return (error); /* * When a link is removed its parent pointer is not changed and will * be invalid. There are two cases where a link is removed but the * file stays around, when it goes to the delete queue and when there * are additional links. */ error = zfs_grab_sa_handle(osp, parent, &sa_hdl, &sa_db, FTAG); if (error != 0) return (error); error = sa_lookup(sa_hdl, ZPL_MODE, &parent_mode, sizeof (parent_mode)); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); if (error != 0) return (error); *is_xattrdir = ((pflags & ZFS_XATTR) != 0) && S_ISDIR(mode); /* * Extended attributes can be applied to files, directories, etc. * Otherwise the parent must be a directory. */ if (!*is_xattrdir && !S_ISDIR(parent_mode)) return (SET_ERROR(EINVAL)); *pobjp = parent; return (0); } /* * Given an object number, return some zpl level statistics */ static int zfs_obj_to_stats_impl(sa_handle_t *hdl, sa_attr_type_t *sa_table, zfs_stat_t *sb) { sa_bulk_attr_t bulk[4]; int count = 0; SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_MODE], NULL, &sb->zs_mode, sizeof (sb->zs_mode)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_GEN], NULL, &sb->zs_gen, sizeof (sb->zs_gen)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_LINKS], NULL, &sb->zs_links, sizeof (sb->zs_links)); SA_ADD_BULK_ATTR(bulk, count, sa_table[ZPL_CTIME], NULL, &sb->zs_ctime, sizeof (sb->zs_ctime)); return (sa_bulk_lookup(hdl, bulk, count)); } static int zfs_obj_to_path_impl(objset_t *osp, uint64_t obj, sa_handle_t *hdl, sa_attr_type_t *sa_table, char *buf, int len) { sa_handle_t *sa_hdl; sa_handle_t *prevhdl = NULL; dmu_buf_t *prevdb = NULL; dmu_buf_t *sa_db = NULL; char *path = buf + len - 1; int error; *path = '\0'; sa_hdl = hdl; for (;;) { uint64_t pobj; char component[MAXNAMELEN + 2]; size_t complen; int is_xattrdir; if (prevdb) zfs_release_sa_handle(prevhdl, prevdb, FTAG); if ((error = zfs_obj_to_pobj(osp, sa_hdl, sa_table, &pobj, &is_xattrdir)) != 0) break; if (pobj == obj) { if (path[0] != '/') *--path = '/'; break; } component[0] = '/'; if (is_xattrdir) { (void) sprintf(component + 1, ""); } else { error = zap_value_search(osp, pobj, obj, ZFS_DIRENT_OBJ(-1ULL), component + 1); if (error != 0) break; } complen = strlen(component); path -= complen; ASSERT(path >= buf); bcopy(component, path, complen); obj = pobj; if (sa_hdl != hdl) { prevhdl = sa_hdl; prevdb = sa_db; } error = zfs_grab_sa_handle(osp, obj, &sa_hdl, &sa_db, FTAG); if (error != 0) { sa_hdl = prevhdl; sa_db = prevdb; break; } } if (sa_hdl != NULL && sa_hdl != hdl) { ASSERT(sa_db != NULL); zfs_release_sa_handle(sa_hdl, sa_db, FTAG); } if (error == 0) (void) memmove(buf, path, buf + len - path); return (error); } int zfs_obj_to_path(objset_t *osp, uint64_t obj, char *buf, int len) { sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } int zfs_obj_to_stats(objset_t *osp, uint64_t obj, zfs_stat_t *sb, char *buf, int len) { char *path = buf + len - 1; sa_attr_type_t *sa_table; sa_handle_t *hdl; dmu_buf_t *db; int error; *path = '\0'; error = zfs_sa_setup(osp, &sa_table); if (error != 0) return (error); error = zfs_grab_sa_handle(osp, obj, &hdl, &db, FTAG); if (error != 0) return (error); error = zfs_obj_to_stats_impl(hdl, sa_table, sb); if (error != 0) { zfs_release_sa_handle(hdl, db, FTAG); return (error); } error = zfs_obj_to_path_impl(osp, obj, hdl, sa_table, buf, len); zfs_release_sa_handle(hdl, db, FTAG); return (error); } #ifdef _KERNEL int zfs_znode_parent_and_name(znode_t *zp, znode_t **dzpp, char *buf) { zfsvfs_t *zfsvfs = zp->z_zfsvfs; uint64_t parent; int is_xattrdir; int err; /* Extended attributes should not be visible as regular files. */ if ((zp->z_pflags & ZFS_XATTR) != 0) return (SET_ERROR(EINVAL)); err = zfs_obj_to_pobj(zfsvfs->z_os, zp->z_sa_hdl, zfsvfs->z_attr_table, &parent, &is_xattrdir); if (err != 0) return (err); ASSERT0(is_xattrdir); /* No name as this is a root object. */ if (parent == zp->z_id) return (SET_ERROR(EINVAL)); err = zap_value_search(zfsvfs->z_os, parent, zp->z_id, ZFS_DIRENT_OBJ(-1ULL), buf); if (err != 0) return (err); err = zfs_zget(zfsvfs, parent, dzpp); return (err); } #endif /* _KERNEL */ Index: stable/10 =================================================================== --- stable/10 (revision 324004) +++ stable/10 (revision 324005) Property changes on: stable/10 ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r323479,323491