Differential D7421 Diff 19575 sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c

Changeset View

Standalone View

sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_ctldir.c

Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
* so that it cannot be freed until all snapshots have been unmounted.		* so that it cannot be freed until all snapshots have been unmounted.
*/		*/

#include <sys/zfs_context.h>		#include <sys/zfs_context.h>
#include <sys/zfs_ctldir.h>		#include <sys/zfs_ctldir.h>
#include <sys/zfs_ioctl.h>		#include <sys/zfs_ioctl.h>
#include <sys/zfs_vfsops.h>		#include <sys/zfs_vfsops.h>
#include <sys/namei.h>		#include <sys/namei.h>
#include <sys/gfs.h>
#include <sys/stat.h>		#include <sys/stat.h>
#include <sys/dmu.h>		#include <sys/dmu.h>
		#include <sys/dsl_dataset.h>
#include <sys/dsl_destroy.h>		#include <sys/dsl_destroy.h>
#include <sys/dsl_deleg.h>		#include <sys/dsl_deleg.h>
#include <sys/mount.h>		#include <sys/mount.h>
#include <sys/sunddi.h>		#include <sys/sunddi.h>

#include "zfs_namecheck.h"		#include "zfs_namecheck.h"

typedef struct zfsctl_node {		/*
gfs_dir_t zc_gfs_private;		* "Synthetic" filesystem implementation.
uint64_t zc_id;		*/
timestruc_t zc_cmtime; /* ctime and mtime, always the same */
} zfsctl_node_t;

typedef struct zfsctl_snapdir {		/*
zfsctl_node_t sd_node;		* Assert that A implies B.
kmutex_t sd_lock;		*/
avl_tree_t sd_snaps;		#define KASSERT_IMPLY(A, B, msg) KASSERT(!(A) \|\| (B), (msg));
} zfsctl_snapdir_t;

typedef struct {		static MALLOC_DEFINE(M_SFSNODES, "sfs_nodes", "synthetic-fs nodes");
char *se_name;
vnode_t *se_root;
avl_node_t se_node;
} zfs_snapentry_t;

		typedef struct sfs_node {
		char sn_name[MAXNAMELEN];
		uint64_t sn_parent_id;
		uint64_t sn_id;
		} sfs_node_t;

		/*
		* Check the parent's ID as well as the node's to account for a chance
		* that IDs originating from different domains (snapshot IDs, artifical
		* IDs, znode IDs) may clash.
		*/
static int		static int
snapentry_compare(const void a, const void b)		sfs_compare_ids(struct vnode vp, void arg)
{		{
const zfs_snapentry_t *sa = a;		sfs_node_t *n1 = vp->v_data;
const zfs_snapentry_t *sb = b;		sfs_node_t *n2 = arg;
int ret = strcmp(sa->se_name, sb->se_name);		bool equal;

if (ret < 0)		equal = n1->sn_id == n2->sn_id &&
return (-1);		n1->sn_parent_id == n2->sn_parent_id;
else if (ret > 0)
return (1);		/* Zero means equality. */
else		return (!equal);
return (0);
}		}

#ifdef illumos		static int
vnodeops_t *zfsctl_ops_root;		sfs_vnode_get(const struct mount *mp, int flags, uint64_t parent_id,
vnodeops_t *zfsctl_ops_snapdir;		uint64_t id, struct vnode **vpp)
vnodeops_t *zfsctl_ops_snapshot;		{
vnodeops_t *zfsctl_ops_shares;		sfs_node_t search;
vnodeops_t *zfsctl_ops_shares_dir;		int err;
		smhUnsubmitted Not Done Inline Actions err seems redundant here, could just return the result from vfs_hash_get? smh: err seems redundant here, could just return the result from vfs_hash_get?
		avgAuthorUnsubmitted Not Done Inline Actions Well, it's just my personal preference. I do not like function calls within return statements. I don't like resulting longer lines too. I guess that style(9) does not prohibit this? avg: Well, it's just my personal preference. I do not like function calls within return statements.

static const fs_operation_def_t zfsctl_tops_root[];		search.sn_id = id;
static const fs_operation_def_t zfsctl_tops_snapdir[];		search.sn_parent_id = parent_id;
static const fs_operation_def_t zfsctl_tops_snapshot[];		err = vfs_hash_get(mp, (u_int)id, flags, curthread, vpp,
static const fs_operation_def_t zfsctl_tops_shares[];		sfs_compare_ids, &search);
#else		return (err);
static struct vop_vector zfsctl_ops_root;		}
static struct vop_vector zfsctl_ops_snapdir;
static struct vop_vector zfsctl_ops_snapshot;
static struct vop_vector zfsctl_ops_shares;
static struct vop_vector zfsctl_ops_shares_dir;
#endif

static vnode_t zfsctl_mknode_snapdir(vnode_t );		static int
static vnode_t zfsctl_mknode_shares(vnode_t );		sfs_vnode_insert(struct vnode *vp, int flags, uint64_t parent_id,
static vnode_t zfsctl_snapshot_mknode(vnode_t , uint64_t objset);		uint64_t id, struct vnode **vpp)
static int zfsctl_unmount_snap(zfs_snapentry_t , int, cred_t );		{
		int err;
		smhUnsubmitted Not Done Inline Actions Redundant? smh: Redundant?
		avgAuthorUnsubmitted Not Done Inline Actions ditto avg: ditto

#ifdef illumos		KASSERT(vp->v_data != NULL, ("sfs_vnode_insert with NULL v_data"));
static gfs_opsvec_t zfsctl_opsvec[] = {		err = vfs_hash_insert(vp, (u_int)id, flags, curthread, vpp,
{ ".zfs", zfsctl_tops_root, &zfsctl_ops_root },		sfs_compare_ids, vp->v_data);
{ ".zfs/snapshot", zfsctl_tops_snapdir, &zfsctl_ops_snapdir },		return (err);
{ ".zfs/snapshot/vnode", zfsctl_tops_snapshot, &zfsctl_ops_snapshot },		}
{ ".zfs/shares", zfsctl_tops_shares, &zfsctl_ops_shares_dir },
{ ".zfs/shares/vnode", zfsctl_tops_shares, &zfsctl_ops_shares },
{ NULL }
};
#endif

		static void
		sfs_vnode_remove(struct vnode *vp)
		{
		vfs_hash_remove(vp);
		}

		typedef void sfs_vnode_setup_fn(vnode_t vp, void arg);

		static int
		sfs_vgetx(struct mount *mp, int flags, uint64_t parent_id, uint64_t id,
		const char tag, struct vop_vector vops,
		sfs_vnode_setup_fn setup, void *arg,
		struct vnode **vpp)
		{
		struct vnode *vp;
		int error;

		error = sfs_vnode_get(mp, flags, parent_id, id, vpp);
		if (error != 0 \|\| *vpp != NULL) {
		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
		"sfs vnode with no data");
		return (error);
		}

		/* Allocate a new vnode/inode. */
		error = getnewvnode(tag, mp, vops, &vp);
		if (error != 0) {
		*vpp = NULL;
		return (error);
		}

/*		/*
* Root directory elements. We only have two entries		* Exclusively lock the vnode vnode while it's being constructed.
* snapshot and shares.
*/		*/
static gfs_dirent_t zfsctl_root_entries[] = {		lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL);
{ "snapshot", zfsctl_mknode_snapdir, GFS_CACHE_VNODE },		error = insmntque(vp, mp);
{ "shares", zfsctl_mknode_shares, GFS_CACHE_VNODE },		if (error != 0) {
{ NULL }		*vpp = NULL;
};		return (error);
		}

/* include . and .. in the calculation */		setup(vp, arg);
#define NROOT_ENTRIES ((sizeof (zfsctl_root_entries) / \
sizeof (gfs_dirent_t)) + 1)

		error = sfs_vnode_insert(vp, flags, parent_id, id, vpp);
		if (error != 0 \|\| *vpp != NULL) {
		KASSERT_IMPLY(error == 0, (*vpp)->v_data != NULL,
		"sfs vnode with no data");
		return (error);
		}

		*vpp = vp;
		return (0);
		}

		static void
		sfs_print_node(sfs_node_t *node)
		{
		printf("\tname = %s\n", node->sn_name);
		printf("\tparent_id = %ju\n", (uintmax_t)node->sn_parent_id);
		printf("\tid = %ju\n", (uintmax_t)node->sn_id);
		}

		static sfs_node_t *
		sfs_alloc_node(size_t size, const char *name, uint64_t parent_id, uint64_t id)
		{
		struct sfs_node *node;

		KASSERT(strlen(name) < sizeof(node->sn_name),
		("sfs node name is too long"));
		KASSERT(size >= sizeof(*node), ("sfs node size is too small"));
		node = malloc(size, M_SFSNODES, M_WAITOK \| M_ZERO);
		strlcpy(node->sn_name, name, sizeof(node->sn_name));
		node->sn_parent_id = parent_id;
		node->sn_id = id;

		return (node);
		}

		static void
		sfs_destroy_node(sfs_node_t *node)
		{
		free(node, M_SFSNODES);
		}

		static void *
		sfs_reclaim_vnode(vnode_t *vp)
		{
		sfs_node_t *node;
		void *data;

		sfs_vnode_remove(vp);
		data = vp->v_data;
		vp->v_data = NULL;
		return (data);
		}

		static int
		sfs_readdir_common(uint64_t parent_id, uint64_t id, struct vop_readdir_args *ap,
		uio_t uio, off_t offp)
		{
		struct dirent entry;
		int error;

		/* Reset ncookies for subsequent use of vfs_read_dirent. */
		if (ap->a_ncookies != NULL)
		*ap->a_ncookies = 0;

		if (uio->uio_resid < sizeof(entry))
		return (EINVAL);
		asomersUnsubmitted Done Inline Actions should be return (SET_ERROR(EINVAL)) here and on lines 266 and 281 asomers: should be ``` return (SET_ERROR(EINVAL)) ``` here and on lines 266 and 281

		if (uio->uio_offset < 0)
		return (EINVAL);
		if (uio->uio_offset == 0) {
		entry.d_fileno = id;
		entry.d_type = DT_DIR;
		entry.d_name[0] = '.';
		entry.d_name[1] = '\0';
		entry.d_namlen = 1;
		entry.d_reclen = sizeof(entry);
		asomersUnsubmitted Not Done Inline Actions should be sizeof (entry) (note the space) here and elsewhere in the file. asomers: should be ``` sizeof (entry) ``` (note the space) here and elsewhere in the file.
		avgAuthorUnsubmitted Not Done Inline Actions I prefer to use style(9) in this file. avg: I prefer to use //style(9)// in this file.
		asomersUnsubmitted Not Done Inline Actions Better to have consistent but nonstandard style than inconsistent style. If you're going to change a file's style, you should do it all at once. But we can't change this entire file's style, because that would make merges from OpenZFS harder. So please keep the whole file in Solaris style. asomers: Better to have consistent but nonstandard style than inconsistent style. If you're going to…
		avgAuthorUnsubmitted Not Done Inline Actions I agree in general, but this file is so much rewritten now that it would be extremely hard to merge upstream changes to it anyway. avg: I agree in general, but this file is so much rewritten now that it would be extremely hard to…
		asomersUnsubmitted Done Inline Actions In that case, you should convert the entire file rather than leave it with mixed style. asomers: In that case, you should convert the entire file rather than leave it with mixed style.
		avgAuthorUnsubmitted Not Done Inline Actions Done. avg: Done.
		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
		if (error != 0)
		return (error);
		}

		if (uio->uio_offset < sizeof(entry))
		return (EINVAL);
		if (uio->uio_offset == sizeof(entry)) {
		entry.d_fileno = parent_id;
		entry.d_type = DT_DIR;
		entry.d_name[0] = '.';
		entry.d_name[1] = '.';
		entry.d_name[2] = '\0';
		entry.d_namlen = 2;
		entry.d_reclen = sizeof(entry);
		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
		if (error != 0)
		return (error);
		}

		if (error == 0 && offp != NULL)
		offp = 2 sizeof(entry);
		return (error);
		}


/*		/*
* Initialize the various GFS pieces we'll need to create and manipulate .zfs		* .zfs inode namespace
* directories. This is called from the ZFS init routine, and initializes the		*
* vnode ops vectors that we'll be using.		* We need to generate unique inode numbers for all files and directories
		* within the .zfs pseudo-filesystem. We use the following scheme:
		*
		* ENTRY ZFSCTL_INODE
		* .zfs 1
		* .zfs/snapshot 2
		* .zfs/snapshot/<snap> objectid(snap)
*/		*/
		#define ZFSCTL_INO_SNAP(id) (id)

		static struct vop_vector zfsctl_ops_root;
		static struct vop_vector zfsctl_ops_snapdir;
		static struct vop_vector zfsctl_ops_snapshot;
		static struct vop_vector zfsctl_ops_shares_dir;

void		void
zfsctl_init(void)		zfsctl_init(void)
{		{
#ifdef illumos
VERIFY(gfs_make_opsvec(zfsctl_opsvec) == 0);
#endif
}		}

void		void
zfsctl_fini(void)		zfsctl_fini(void)
{		{
#ifdef illumos
/*
* Remove vfsctl vnode ops
*/
if (zfsctl_ops_root)
vn_freevnodeops(zfsctl_ops_root);
if (zfsctl_ops_snapdir)
vn_freevnodeops(zfsctl_ops_snapdir);
if (zfsctl_ops_snapshot)
vn_freevnodeops(zfsctl_ops_snapshot);
if (zfsctl_ops_shares)
vn_freevnodeops(zfsctl_ops_shares);
if (zfsctl_ops_shares_dir)
vn_freevnodeops(zfsctl_ops_shares_dir);

zfsctl_ops_root = NULL;
zfsctl_ops_snapdir = NULL;
zfsctl_ops_snapshot = NULL;
zfsctl_ops_shares = NULL;
zfsctl_ops_shares_dir = NULL;
#endif /* illumos */
}		}

boolean_t		boolean_t
zfsctl_is_node(vnode_t *vp)		zfsctl_is_node(vnode_t *vp)
{		{
return (vn_matchops(vp, zfsctl_ops_root) \|\|		return (vn_matchops(vp, zfsctl_ops_root) \|\|
vn_matchops(vp, zfsctl_ops_snapdir) \|\|		vn_matchops(vp, zfsctl_ops_snapdir) \|\|
vn_matchops(vp, zfsctl_ops_snapshot) \|\|		vn_matchops(vp, zfsctl_ops_snapshot) \|\|
vn_matchops(vp, zfsctl_ops_shares) \|\|
vn_matchops(vp, zfsctl_ops_shares_dir));		vn_matchops(vp, zfsctl_ops_shares_dir));

}		}

/*		typedef struct zfsctl_root {
* Return the inode number associated with the 'snapshot' or		sfs_node_t node;
* 'shares' directory.		sfs_node_t *snapdir;
*/		timestruc_t cmtime;
/* ARGSUSED */		} zfsctl_root_t;
static ino64_t
zfsctl_root_inode_cb(vnode_t *vp, int index)
{
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;

ASSERT(index < 2);

if (index == 0)
return (ZFSCTL_INO_SNAPDIR);

return (zfsvfs->z_shares_dir);
}

/*		/*
* Create the '.zfs' directory. This directory is cached as part of the VFS		* Create the '.zfs' directory.
* structure. This results in a hold on the vfs_t. The code in zfs_umount()
* therefore checks against a vfs_count of 2 instead of 1. This reference
* is removed when the ctldir is destroyed in the unmount.
*/		*/
void		void
zfsctl_create(zfsvfs_t *zfsvfs)		zfsctl_create(zfsvfs_t *zfsvfs)
{		{
vnode_t vp, rvp;		zfsctl_root_t *dot_zfs;
zfsctl_node_t *zcp;		sfs_node_t *snapdir;
		vnode_t *rvp;
uint64_t crtime[2];		uint64_t crtime[2];

ASSERT(zfsvfs->z_ctldir == NULL);		ASSERT(zfsvfs->z_ctldir == NULL);

vp = gfs_root_create(sizeof (zfsctl_node_t), zfsvfs->z_vfs,		snapdir = sfs_alloc_node(sizeof(*snapdir), "snapshot", ZFSCTL_INO_ROOT,
&zfsctl_ops_root, ZFSCTL_INO_ROOT, zfsctl_root_entries,		ZFSCTL_INO_SNAPDIR);
zfsctl_root_inode_cb, MAXNAMELEN, NULL, NULL);		dot_zfs = (zfsctl_root_t )sfs_alloc_node(sizeof(dot_zfs), ".zfs", 0,
zcp = vp->v_data;		ZFSCTL_INO_ROOT);
zcp->zc_id = ZFSCTL_INO_ROOT;		dot_zfs->snapdir = snapdir;

VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);		VERIFY(VFS_ROOT(zfsvfs->z_vfs, LK_EXCLUSIVE, &rvp) == 0);
VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),		VERIFY(0 == sa_lookup(VTOZ(rvp)->z_sa_hdl, SA_ZPL_CRTIME(zfsvfs),
&crtime, sizeof (crtime)));		&crtime, sizeof (crtime)));
ZFS_TIME_DECODE(&zcp->zc_cmtime, crtime);		ZFS_TIME_DECODE(&dot_zfs->cmtime, crtime);
VN_URELE(rvp);		vput(rvp);

/*		zfsvfs->z_ctldir = dot_zfs;
* We're only faking the fact that we have a root of a filesystem for
* the sake of the GFS interfaces. Undo the flag manipulation it did
* for us.
*/
vp->v_vflag &= ~VV_ROOT;

zfsvfs->z_ctldir = vp;

VOP_UNLOCK(vp, 0);
}		}

/*		/*
* Destroy the '.zfs' directory. Only called when the filesystem is unmounted.		* Destroy the '.zfs' directory. Only called when the filesystem is unmounted.
* There might still be more references if we were force unmounted, but only		* The nodes must not have any associated vnodes by now as they should be
* new zfs_inactive() calls can occur and they don't reference .zfs		* vflush-ed.
*/		*/
void		void
zfsctl_destroy(zfsvfs_t *zfsvfs)		zfsctl_destroy(zfsvfs_t *zfsvfs)
{		{
VN_RELE(zfsvfs->z_ctldir);		sfs_destroy_node(zfsvfs->z_ctldir->snapdir);
		sfs_destroy_node((sfs_node_t *)zfsvfs->z_ctldir);
zfsvfs->z_ctldir = NULL;		zfsvfs->z_ctldir = NULL;
}		}

/*		static int
* Given a root znode, retrieve the associated .zfs directory.		zfsctl_fs_root_vnode(struct mount mp, void arg __unused, int flags,
* Add a hold to the vnode and return it.		struct vnode **vpp)
*/
vnode_t *
zfsctl_root(znode_t *zp)
{		{
ASSERT(zfs_has_ctldir(zp));		return (VFS_ROOT(mp, flags, vpp));
VN_HOLD(zp->z_zfsvfs->z_ctldir);
return (zp->z_zfsvfs->z_ctldir);
}		}

		static void
		zfsctl_common_vnode_setup(vnode_t vp, void arg)
		{
		ASSERT_VOP_ELOCKED(vp, __func__);

		/* We support shared locking. */
		VN_LOCK_ASHARE(vp);
		vp->v_type = VDIR;
		vp->v_data = arg;
		}

static int		static int
zfsctl_common_print(ap)		zfsctl_root_vnode(struct mount mp, void arg __unused, int flags,
struct vop_print_args /* {		struct vnode **vpp)
struct vnode *a_vp;
} / ap;
{		{
vnode_t *vp = ap->a_vp;		void *node;
gfs_file_t *fp = vp->v_data;		int err;

printf(" parent = %p\n", fp->gfs_parent);		node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir;
printf(" type = %d\n", fp->gfs_type);		err = sfs_vgetx(mp, flags, 0, ZFSCTL_INO_ROOT, "zfs", &zfsctl_ops_root,
printf(" index = %d\n", fp->gfs_index);		zfsctl_common_vnode_setup, node, vpp);
printf(" ino = %ju\n", (uintmax_t)fp->gfs_ino);		return (err);
return (0);
}		}

		static int
		zfsctl_snapdir_vnode(struct mount mp, void arg __unused, int flags,
		struct vnode **vpp)
		{
		void *node;
		int err;
		smhUnsubmitted Not Done Inline Actions Redundant? smh: Redundant?
		avgAuthorUnsubmitted Not Done Inline Actions ditto avg: ditto

		node = ((zfsvfs_t*)mp->mnt_data)->z_ctldir->snapdir;
		err = sfs_vgetx(mp, flags, ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, "zfs",
		&zfsctl_ops_snapdir, zfsctl_common_vnode_setup, node, vpp);
		return (err);
		}

/*		/*
		* Given a root znode, retrieve the associated .zfs directory.
		* Add a hold to the vnode and return it.
		*/
		int
		zfsctl_root(zfsvfs_t zfsvfs, int flags, vnode_t *vpp)
		{
		vnode_t *vp;
		int error;

		error = zfsctl_root_vnode(zfsvfs->z_vfs, NULL, flags, vpp);
		smhUnsubmitted Not Done Inline Actions redundant? smh: redundant?
		avgAuthorUnsubmitted Not Done Inline Actions ditto avg: ditto
		return (error);
		}

		/*
* Common open routine. Disallow any write access.		* Common open routine. Disallow any write access.
*/		*/
/* ARGSUSED */		/* ARGSUSED */
static int		static int
zfsctl_common_open(struct vop_open_args *ap)		zfsctl_common_open(struct vop_open_args *ap)
{		{
int flags = ap->a_mode;		int flags = ap->a_mode;

Show All 23 Lines	struct vop_access_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
accmode_t a_accmode;		accmode_t a_accmode;
struct ucred *a_cred;		struct ucred *a_cred;
struct thread *a_td;		struct thread *a_td;
} / ap;		} / ap;
{		{
accmode_t accmode = ap->a_accmode;		accmode_t accmode = ap->a_accmode;

#ifdef TODO
if (flags & V_ACE_MASK) {
if (accmode & ACE_ALL_WRITE_PERMS)
return (SET_ERROR(EACCES));
} else {
#endif
if (accmode & VWRITE)		if (accmode & VWRITE)
return (SET_ERROR(EACCES));		return (SET_ERROR(EACCES));
#ifdef TODO
}
#endif

return (0);		return (0);
}		}

/*		/*
* Common getattr function. Fill in basic information.		* Common getattr function. Fill in basic information.
*/		*/
static void		static void
zfsctl_common_getattr(vnode_t vp, vattr_t vap)		zfsctl_common_getattr(vnode_t vp, vattr_t vap)
{		{
timestruc_t now;		timestruc_t now;
		sfs_node_t *node;

		node = vp->v_data;

vap->va_uid = 0;		vap->va_uid = 0;
vap->va_gid = 0;		vap->va_gid = 0;
vap->va_rdev = 0;		vap->va_rdev = 0;
/*		/*
* We are a purely virtual object, so we have no		* We are a purely virtual object, so we have no
* blocksize or allocated blocks.		* blocksize or allocated blocks.
*/		*/
vap->va_blksize = 0;		vap->va_blksize = 0;
vap->va_nblocks = 0;		vap->va_nblocks = 0;
vap->va_seq = 0;		vap->va_seq = 0;
vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
vap->va_mode = S_IRUSR \| S_IXUSR \| S_IRGRP \| S_IXGRP \|		vap->va_mode = S_IRUSR \| S_IXUSR \| S_IRGRP \| S_IXGRP \|
S_IROTH \| S_IXOTH;		S_IROTH \| S_IXOTH;
vap->va_type = VDIR;		vap->va_type = VDIR;
/*		/*
* We live in the now (for atime).		* We live in the now (for atime).
*/		*/
gethrestime(&now);		gethrestime(&now);
vap->va_atime = now;		vap->va_atime = now;
/* FreeBSD: Reset chflags(2) flags. */		/* FreeBSD: Reset chflags(2) flags. */
vap->va_flags = 0;		vap->va_flags = 0;

		vap->va_nodeid = node->sn_id;

		/* At least '.' and '..'. */
		vap->va_nlink = 2;
}		}

/ARGSUSED/		/ARGSUSED/
static int		static int
zfsctl_common_fid(ap)		zfsctl_common_fid(ap)
struct vop_fid_args /* {		struct vop_fid_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct fid *a_fid;		struct fid *a_fid;
} / ap;		} / ap;
{		{
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;
fid_t fidp = (void )ap->a_fid;		fid_t fidp = (void )ap->a_fid;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;		sfs_node_t *node = vp->v_data;
zfsctl_node_t *zcp = vp->v_data;		uint64_t object = node->sn_id;
uint64_t object = zcp->zc_id;
zfid_short_t *zfid;		zfid_short_t *zfid;
int i;		int i;

ZFS_ENTER(zfsvfs);

#ifdef illumos
if (fidp->fid_len < SHORT_FID_LEN) {
fidp->fid_len = SHORT_FID_LEN;
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOSPC));
}
#endif

zfid = (zfid_short_t *)fidp;		zfid = (zfid_short_t *)fidp;

zfid->zf_len = SHORT_FID_LEN;		zfid->zf_len = SHORT_FID_LEN;

for (i = 0; i < sizeof (zfid->zf_object); i++)		for (i = 0; i < sizeof (zfid->zf_object); i++)
zfid->zf_object[i] = (uint8_t)(object >> (8 * i));		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));

/* .zfs znodes always have a generation number of 0 */		/* .zfs nodes always have a generation number of 0 */
for (i = 0; i < sizeof (zfid->zf_gen); i++)		for (i = 0; i < sizeof (zfid->zf_gen); i++)
zfid->zf_gen[i] = 0;		zfid->zf_gen[i] = 0;

ZFS_EXIT(zfsvfs);
return (0);		return (0);
}		}


/ARGSUSED/
static int		static int
zfsctl_shares_fid(ap)		zfsctl_common_reclaim(ap)
struct vop_fid_args /* {		struct vop_reclaim_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct fid *a_fid;		struct thread *a_td;
} / ap;		} / ap;
{		{
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;
fid_t fidp = (void )ap->a_fid;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
znode_t *dzp;
int error;

ZFS_ENTER(zfsvfs);		(void) sfs_reclaim_vnode(vp);
		return (0);
if (zfsvfs->z_shares_dir == 0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTSUP));
}		}

if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {		static int
error = VOP_FID(ZTOV(dzp), fidp);		zfsctl_common_print(ap)
VN_RELE(ZTOV(dzp));		struct vop_print_args /* {
		struct vnode *a_vp;
		} / ap;
		{
		sfs_print_node(ap->a_vp->v_data);
		return (0);
}		}

ZFS_EXIT(zfsvfs);
return (error);
}

/*		/*
* .zfs inode namespace
*
* We need to generate unique inode numbers for all files and directories
* within the .zfs pseudo-filesystem. We use the following scheme:
*
* ENTRY ZFSCTL_INODE
* .zfs 1
* .zfs/snapshot 2
* .zfs/snapshot/<snap> objectid(snap)
*/

#define ZFSCTL_INO_SNAP(id) (id)

/*
* Get root directory attributes.		* Get root directory attributes.
*/		*/
/* ARGSUSED */		/* ARGSUSED */
static int		static int
zfsctl_root_getattr(ap)		zfsctl_root_getattr(ap)
struct vop_getattr_args /* {		struct vop_getattr_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct vattr *a_vap;		struct vattr *a_vap;
struct ucred *a_cred;		struct ucred *a_cred;
} / ap;		} / ap;
{		{
struct vnode *vp = ap->a_vp;		struct vnode *vp = ap->a_vp;
struct vattr *vap = ap->a_vap;		struct vattr *vap = ap->a_vap;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;		zfsctl_root_t *node = vp->v_data;
zfsctl_node_t *zcp = vp->v_data;

ZFS_ENTER(zfsvfs);
vap->va_nodeid = ZFSCTL_INO_ROOT;
vap->va_nlink = vap->va_size = NROOT_ENTRIES;
vap->va_mtime = vap->va_ctime = zcp->zc_cmtime;
vap->va_birthtime = vap->va_ctime;

zfsctl_common_getattr(vp, vap);		zfsctl_common_getattr(vp, vap);
ZFS_EXIT(zfsvfs);		vap->va_ctime = node->cmtime;
		vap->va_mtime = vap->va_ctime;
		vap->va_birthtime = vap->va_ctime;
		vap->va_nlink += 1; /* snapdir */
return (0);		return (0);
}		}

/*		/*
* Special case the handling of "..".		* When we lookup "." we still can be asked to lock it
		* differently, can't we?
*/		*/
/* ARGSUSED */
int		int
zfsctl_root_lookup(vnode_t dvp, char nm, vnode_t *vpp, pathname_t pnp,		zfsctl_relock_dot(vnode_t *dvp, int ltype)
int flags, vnode_t rdir, cred_t cr, caller_context_t *ct,
int direntflags, pathname_t realpnp)
{		{
zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;		vref(dvp);
int err;		if (ltype != VOP_ISLOCKED(dvp)) {
		if (ltype == LK_EXCLUSIVE)
		vn_lock(dvp, LK_UPGRADE \| LK_RETRY);
		else /* if (ltype == LK_SHARED) */
		vn_lock(dvp, LK_DOWNGRADE \| LK_RETRY);

/*		/* Relock for the "." case may left us with reclaimed vnode. */
* No extended attributes allowed under .zfs		if ((dvp->v_iflag & VI_DOOMED) != 0) {
*/		vrele(dvp);
if (flags & LOOKUP_XATTR)		return (SET_ERROR(ENOENT));
return (SET_ERROR(EINVAL));

ZFS_ENTER(zfsvfs);

if (strcmp(nm, "..") == 0) {
#ifdef illumos
err = VFS_ROOT(dvp->v_vfsp, LK_EXCLUSIVE, vpp);
#else
/*
* NB: can not use VFS_ROOT here as it would acquire
* the vnode lock of the parent (root) vnode while
* holding the child's (.zfs) lock.
*/
znode_t *rootzp;

err = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
if (err == 0)
*vpp = ZTOV(rootzp);
#endif
} else {
err = gfs_vop_lookup(dvp, nm, vpp, pnp, flags, rdir,
cr, ct, direntflags, realpnp);
}		}

ZFS_EXIT(zfsvfs);

return (err);
}		}
		return (0);
		}

static int		/*
zfsctl_freebsd_root_lookup(ap)		* Special case the handling of "..".
		*/
		int
		zfsctl_root_lookup(ap)
struct vop_lookup_args /* {		struct vop_lookup_args /* {
struct vnode *a_dvp;		struct vnode *a_dvp;
struct vnode **a_vpp;		struct vnode **a_vpp;
struct componentname *a_cnp;		struct componentname *a_cnp;
} / ap;		} / ap;
{		{
		struct componentname *cnp = ap->a_cnp;
vnode_t *dvp = ap->a_dvp;		vnode_t *dvp = ap->a_dvp;
vnode_t **vpp = ap->a_vpp;		vnode_t **vpp = ap->a_vpp;
cred_t *cr = ap->a_cnp->cn_cred;		cred_t *cr = ap->a_cnp->cn_cred;
int flags = ap->a_cnp->cn_flags;		int flags = ap->a_cnp->cn_flags;
int lkflags = ap->a_cnp->cn_lkflags;		int lkflags = ap->a_cnp->cn_lkflags;
int nameiop = ap->a_cnp->cn_nameiop;		int nameiop = ap->a_cnp->cn_nameiop;
char nm[NAME_MAX + 1];
int err;		int err;
		int ltype;

if ((flags & ISLASTCN) && (nameiop == RENAME \|\| nameiop == CREATE))		ASSERT(dvp->v_type == VDIR);
return (EOPNOTSUPP);

ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));		if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);		return (SET_ERROR(ENOTSUP));
relookup:
err = zfsctl_root_lookup(dvp, nm, vpp, NULL, 0, NULL, cr, NULL, NULL, NULL);		if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
if (err == 0 && (nm[0] != '.' \|\| nm[1] != '\0')) {		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
if (flags & ISDOTDOT) {		if (err == 0)
VOP_UNLOCK(dvp, 0);		*vpp = dvp;
err = vn_lock(*vpp, lkflags);		} else if ((flags & ISDOTDOT) != 0) {
if (err != 0) {		err = vn_vget_ino_gen(dvp, zfsctl_fs_root_vnode, NULL,
vrele(*vpp);		lkflags, vpp);
*vpp = NULL;		} else if (strncmp(cnp->cn_nameptr, "snapshot", cnp->cn_namelen) == 0) {
}		err = zfsctl_snapdir_vnode(dvp->v_mount, NULL, lkflags, vpp);
vn_lock(dvp, LK_EXCLUSIVE \| LK_RETRY);
} else {		} else {
err = vn_lock(*vpp, LK_EXCLUSIVE);		err = SET_ERROR(ENOENT);
if (err != 0) {
VERIFY3S(err, ==, ENOENT);
goto relookup;
}		}
}		if (err != 0)
}		*vpp = NULL;
return (err);		return (err);
}		}

static int		static int
zfsctl_root_print(ap)		zfsctl_root_readdir(ap)
struct vop_print_args /* {		struct vop_readdir_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
		struct uio *a_uio;
		struct ucred *a_cred;
		int *a_eofflag;
		int *ncookies;
		u_long **a_cookies;
} / ap;		} / ap;
{		{
printf(" .zfs node\n");		struct dirent entry;
zfsctl_common_print(ap);		vnode_t *vp = ap->a_vp;
return (0);		zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
}		zfsctl_root_t *node = vp->v_data;
		uio_t *uio = ap->a_uio;
		int *eofp = ap->a_eofflag;
		off_t dots_offset;
		int error;

#ifdef illumos		ASSERT(vp->v_type == VDIR);
static int
zfsctl_pathconf(vnode_t vp, int cmd, ulong_t valp, cred_t *cr,		error = sfs_readdir_common(zfsvfs->z_root, ZFSCTL_INO_ROOT, ap, uio,
caller_context_t *ct)		&dots_offset);
{		if (error != 0) {
/*		if (error == ENAMETOOLONG) /* ran out of destination space */
		smhUnsubmitted Not Done Inline Actions Its not clear to me why we want to loose the ENAMETOOLONG error here and below could you explain? smh: Its not clear to me why we want to loose the ENAMETOOLONG error here and below could you…
		avgAuthorUnsubmitted Not Done Inline Actions I think that the comment already explains it to a degree. If you look at `vfs_read_dirent` it should be clearer. Unfortunately, `ENAMETOOLONG` is used to signal "buffer full" rather than any name being too long. avg: I think that the comment already explains it to a degree. If you look at `vfs_read_dirent` it…
* We only care about ACL_ENABLED so that libsec can		error = 0;
* display ACL correctly and not default to POSIX draft.		return (error);
*/
if (cmd == _PC_ACL_ENABLED) {
*valp = _ACL_ACE_ENABLED;
return (0);
}		}
		if (uio->uio_offset != dots_offset)
		return (EINVAL);

return (fs_pathconf(vp, cmd, valp, cr, ct));		CTASSERT(sizeof(node->snapdir->sn_name) <= sizeof(entry.d_name));
		entry.d_fileno = node->snapdir->sn_id;
		entry.d_type = DT_DIR;
		strcpy(entry.d_name, node->snapdir->sn_name);
		entry.d_namlen = strlen(entry.d_name);
		entry.d_reclen = sizeof(entry);
		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
		if (error != 0) {
		if (error == ENAMETOOLONG)
		error = 0;
		return (error);
}		}
#endif /* illumos */		if (eofp != NULL)
		*eofp = 1;
		return (error);
		}

#ifdef illumos
static const fs_operation_def_t zfsctl_tops_root[] = {
{ VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
{ VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
{ VOPNAME_IOCTL, { .error = fs_inval } },
{ VOPNAME_GETATTR, { .vop_getattr = zfsctl_root_getattr } },
{ VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
{ VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
{ VOPNAME_LOOKUP, { .vop_lookup = zfsctl_root_lookup } },
{ VOPNAME_SEEK, { .vop_seek = fs_seek } },
{ VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
{ VOPNAME_PATHCONF, { .vop_pathconf = zfsctl_pathconf } },
{ VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
{ NULL }
};
#endif /* illumos */

static struct vop_vector zfsctl_ops_root = {		static struct vop_vector zfsctl_ops_root = {
.vop_default = &default_vnodeops,		.vop_default = &default_vnodeops,
.vop_open = zfsctl_common_open,		.vop_open = zfsctl_common_open,
.vop_close = zfsctl_common_close,		.vop_close = zfsctl_common_close,
.vop_ioctl = VOP_EINVAL,		.vop_ioctl = VOP_EINVAL,
.vop_getattr = zfsctl_root_getattr,		.vop_getattr = zfsctl_root_getattr,
.vop_access = zfsctl_common_access,		.vop_access = zfsctl_common_access,
.vop_readdir = gfs_vop_readdir,		.vop_readdir = zfsctl_root_readdir,
.vop_lookup = zfsctl_freebsd_root_lookup,		.vop_lookup = zfsctl_root_lookup,
.vop_inactive = VOP_NULL,		.vop_inactive = VOP_NULL,
.vop_reclaim = gfs_vop_reclaim,		.vop_reclaim = zfsctl_common_reclaim,
#ifdef TODO
.vop_pathconf = zfsctl_pathconf,
#endif
.vop_fid = zfsctl_common_fid,		.vop_fid = zfsctl_common_fid,
.vop_print = zfsctl_root_print,		.vop_print = zfsctl_common_print,
};		};

/*
* Gets the full dataset name that corresponds to the given snapshot name
* Example:
* zfsctl_snapshot_zname("snap1") -> "mypool/myfs@snap1"
*/
static int		static int
zfsctl_snapshot_zname(vnode_t vp, const char name, int len, char *zname)		zfsctl_snapshot_zname(vnode_t vp, const char name, int len, char *zname)
{		{
objset_t os = ((zfsvfs_t )((vp)->v_vfsp->vfs_data))->z_os;		objset_t os = ((zfsvfs_t )((vp)->v_vfsp->vfs_data))->z_os;

if (zfs_component_namecheck(name, NULL, NULL) != 0)
return (SET_ERROR(EILSEQ));
dmu_objset_name(os, zname);		dmu_objset_name(os, zname);
if (strlen(zname) + 1 + strlen(name) >= len)		if (strlen(zname) + 1 + strlen(name) >= len)
return (SET_ERROR(ENAMETOOLONG));		return (SET_ERROR(ENAMETOOLONG));
(void) strcat(zname, "@");		(void) strcat(zname, "@");
(void) strcat(zname, name);		(void) strcat(zname, name);
return (0);		return (0);
}		}

static int		static int
zfsctl_unmount_snap(zfs_snapentry_t sep, int fflags, cred_t cr)		zfsctl_snapshot_lookup(vnode_t vp, const char name, uint64_t *id)
{		{
vnode_t *svp = sep->se_root;		objset_t os = ((zfsvfs_t )((vp)->v_vfsp->vfs_data))->z_os;
int error;

ASSERT(vn_ismntpt(svp));

/* this will be dropped by dounmount() */
if ((error = vn_vfswlock(svp)) != 0)
return (error);

#ifdef illumos
VN_HOLD(svp);
error = dounmount(vn_mountedvfs(svp), fflags, cr);
if (error) {
VN_RELE(svp);
return (error);
}

/*
* We can't use VN_RELE(), as that will try to invoke
* zfsctl_snapdir_inactive(), which would cause us to destroy
* the sd_lock mutex held by our caller.
*/
ASSERT(svp->v_count == 1);
gfs_vop_reclaim(svp, cr, NULL);

kmem_free(sep->se_name, strlen(sep->se_name) + 1);
kmem_free(sep, sizeof (zfs_snapentry_t));

return (0);
#else
vfs_ref(vn_mountedvfs(svp));
return (dounmount(vn_mountedvfs(svp), fflags, curthread));
#endif
}

#ifdef illumos
static void
zfsctl_rename_snap(zfsctl_snapdir_t sdp, zfs_snapentry_t sep, const char *nm)
{
avl_index_t where;
vfs_t *vfsp;
refstr_t *pathref;
char newpath[MAXNAMELEN];
char *tail;

ASSERT(MUTEX_HELD(&sdp->sd_lock));
ASSERT(sep != NULL);

vfsp = vn_mountedvfs(sep->se_root);
ASSERT(vfsp != NULL);

vfs_lock_wait(vfsp);

/*
* Change the name in the AVL tree.
*/
avl_remove(&sdp->sd_snaps, sep);
kmem_free(sep->se_name, strlen(sep->se_name) + 1);
sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
(void) strcpy(sep->se_name, nm);
VERIFY(avl_find(&sdp->sd_snaps, sep, &where) == NULL);
avl_insert(&sdp->sd_snaps, sep, where);

/*
* Change the current mountpoint info:
* - update the tail of the mntpoint path
* - update the tail of the resource path
*/
pathref = vfs_getmntpoint(vfsp);
(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
VERIFY((tail = strrchr(newpath, '/')) != NULL);
*(tail+1) = '\0';
ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
(void) strcat(newpath, nm);
refstr_rele(pathref);
vfs_setmntpoint(vfsp, newpath, 0);

pathref = vfs_getresource(vfsp);
(void) strncpy(newpath, refstr_value(pathref), sizeof (newpath));
VERIFY((tail = strrchr(newpath, '@')) != NULL);
*(tail+1) = '\0';
ASSERT3U(strlen(newpath) + strlen(nm), <, sizeof (newpath));
(void) strcat(newpath, nm);
refstr_rele(pathref);
vfs_setresource(vfsp, newpath, 0);

vfs_unlock(vfsp);
}
#endif /* illumos */

#ifdef illumos
/ARGSUSED/
static int
zfsctl_snapdir_rename(vnode_t sdvp, char snm, vnode_t tdvp, char tnm,
cred_t cr, caller_context_t ct, int flags)
{
zfsctl_snapdir_t *sdp = sdvp->v_data;
zfs_snapentry_t search, *sep;
zfsvfs_t *zfsvfs;
avl_index_t where;
char from[MAXNAMELEN], to[MAXNAMELEN];
char real[MAXNAMELEN], fsname[MAXNAMELEN];
int err;		int err;

zfsvfs = sdvp->v_vfsp->vfs_data;		err = dsl_dataset_snap_lookup(dmu_objset_ds(os), name, id);
ZFS_ENTER(zfsvfs);

if ((flags & FIGNORECASE) \|\| zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
err = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
MAXNAMELEN, NULL);
if (err == 0) {
snm = real;
} else if (err != ENOTSUP) {
ZFS_EXIT(zfsvfs);
return (err);		return (err);
}		}
}

ZFS_EXIT(zfsvfs);

dmu_objset_name(zfsvfs->z_os, fsname);

err = zfsctl_snapshot_zname(sdvp, snm, MAXNAMELEN, from);
if (err == 0)
err = zfsctl_snapshot_zname(tdvp, tnm, MAXNAMELEN, to);
if (err == 0)
err = zfs_secpolicy_rename_perms(from, to, cr);
if (err != 0)
return (err);

/*		/*
* Cannot move snapshots out of the snapdir.		* Given a vnode get a root vnode of a filesystem mounted on top of
		* the vnode, if any. The root vnode is referenced and locked.
		* If no filesystem is mounted then the original vnode remains referenced
		bcrUnsubmitted Done Inline Actions s/orinal/original/ Same for the next line... bcr: s/orinal/original/ Same for the next line...
		* and locked. If any error happens the original vnode is unlocked and
		* released.
*/		*/
if (sdvp != tdvp)
return (SET_ERROR(EINVAL));

if (strcmp(snm, tnm) == 0)
return (0);

mutex_enter(&sdp->sd_lock);

search.se_name = (char *)snm;
if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) == NULL) {
mutex_exit(&sdp->sd_lock);
return (SET_ERROR(ENOENT));
}

err = dsl_dataset_rename_snapshot(fsname, snm, tnm, 0);
if (err == 0)
zfsctl_rename_snap(sdp, sep, tnm);

mutex_exit(&sdp->sd_lock);

return (err);
}
#endif /* illumos */

#ifdef illumos
/* ARGSUSED */
static int		static int
zfsctl_snapdir_remove(vnode_t dvp, char name, vnode_t cwd, cred_t cr,		zfsctl_mounted_here(vnode_t **vpp, int flags)
caller_context_t *ct, int flags)
{		{
zfsctl_snapdir_t *sdp = dvp->v_data;		struct mount *mp;
zfs_snapentry_t *sep;
zfs_snapentry_t search;
zfsvfs_t *zfsvfs;
char snapname[MAXNAMELEN];
char real[MAXNAMELEN];
int err;		int err;

zfsvfs = dvp->v_vfsp->vfs_data;		ASSERT_VOP_LOCKED(*vpp, __func__);
ZFS_ENTER(zfsvfs);		ASSERT3S((*vpp)->v_type, ==, VDIR);

if ((flags & FIGNORECASE) \|\| zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {		if ((mp = (*vpp)->v_mountedhere) != NULL) {
		err = vfs_busy(mp, 0);
err = dmu_snapshot_realname(zfsvfs->z_os, name, real,		KASSERT(err == 0, ("vfs_busy(mp, 0) failed with %d", err));
MAXNAMELEN, NULL);		KASSERT(vrefcnt(*vpp) > 1, ("unreferenced mountpoint"));
if (err == 0) {		vput(*vpp);
name = real;		err = VFS_ROOT(mp, flags, vpp);
} else if (err != ENOTSUP) {		vfs_unbusy(mp);
ZFS_EXIT(zfsvfs);
return (err);		return (err);
}		}
		return (EJUSTRETURN);
		smhUnsubmitted Not Done Inline Actions SET_ERROR or is this some sort of special case? smh: SET_ERROR or is this some sort of special case?
		avgAuthorUnsubmitted Not Done Inline Actions Yeah, this is just a special return value, it does not mean that any error occurred. avg: Yeah, this is just a special return value, it does not mean that any error occurred.
}		}

ZFS_EXIT(zfsvfs);		typedef struct {
		const char *snap_name;
		uint64_t snap_id;
		} snapshot_setup_arg_t;

err = zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, snapname);		static void
if (err == 0)		zfsctl_snapshot_vnode_setup(vnode_t vp, void arg)
err = zfs_secpolicy_destroy_perms(snapname, cr);
if (err != 0)
return (err);

mutex_enter(&sdp->sd_lock);

search.se_name = name;
sep = avl_find(&sdp->sd_snaps, &search, NULL);
if (sep) {
avl_remove(&sdp->sd_snaps, sep);
err = zfsctl_unmount_snap(sep, MS_FORCE, cr);
if (err != 0)
avl_add(&sdp->sd_snaps, sep);
else
err = dsl_destroy_snapshot(snapname, B_FALSE);
} else {
err = SET_ERROR(ENOENT);
}

mutex_exit(&sdp->sd_lock);

return (err);
}
#endif /* illumos */

/*
* This creates a snapshot under '.zfs/snapshot'.
*/
/* ARGSUSED */
static int
zfsctl_snapdir_mkdir(vnode_t dvp, char dirname, vattr_t vap, vnode_t *vpp,
cred_t cr, caller_context_t cc, int flags, vsecattr_t *vsecp)
{		{
zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;		snapshot_setup_arg_t *ssa = arg;
char name[MAXNAMELEN];		sfs_node_t *node;
int err;
static enum symfollow follow = NO_FOLLOW;
static enum uio_seg seg = UIO_SYSSPACE;

if (zfs_component_namecheck(dirname, NULL, NULL) != 0)		ASSERT_VOP_ELOCKED(vp, __func__);
return (SET_ERROR(EILSEQ));

dmu_objset_name(zfsvfs->z_os, name);		node = sfs_alloc_node(sizeof(sfs_node_t),
		ssa->snap_name, ZFSCTL_INO_SNAPDIR, ssa->snap_id);
		zfsctl_common_vnode_setup(vp, node);

*vpp = NULL;		/* We have to support recursive locking. */
		VN_LOCK_AREC(vp);
err = zfs_secpolicy_snapshot_perms(name, cr);
if (err != 0)
return (err);

if (err == 0) {
err = dmu_objset_snapshot_one(name, dirname);
if (err != 0)
return (err);
err = lookupnameat(dirname, seg, follow, NULL, vpp, dvp);
}		}

return (err);
}

static int
zfsctl_freebsd_snapdir_mkdir(ap)
struct vop_mkdir_args /* {
struct vnode *a_dvp;
struct vnode **a_vpp;
struct componentname *a_cnp;
struct vattr *a_vap;
} / ap;
{

ASSERT(ap->a_cnp->cn_flags & SAVENAME);

return (zfsctl_snapdir_mkdir(ap->a_dvp, ap->a_cnp->cn_nameptr, NULL,
ap->a_vpp, ap->a_cnp->cn_cred, NULL, 0, NULL));
}

/*		/*
* Lookup entry point for the 'snapshot' directory. Try to open the		* Lookup entry point for the 'snapshot' directory. Try to open the
* snapshot if it exist, creating the pseudo filesystem vnode as necessary.		* snapshot if it exist, creating the pseudo filesystem vnode as necessary.
* Perform a mount of the associated dataset on top of the vnode.		* Perform a mount of the associated dataset on top of the vnode.
*/		*/
/* ARGSUSED */		/* ARGSUSED */
int		int
zfsctl_snapdir_lookup(ap)		zfsctl_snapdir_lookup(ap)
struct vop_lookup_args /* {		struct vop_lookup_args /* {
struct vnode *a_dvp;		struct vnode *a_dvp;
struct vnode **a_vpp;		struct vnode **a_vpp;
struct componentname *a_cnp;		struct componentname *a_cnp;
} / ap;		} / ap;
{		{
vnode_t *dvp = ap->a_dvp;		vnode_t *dvp = ap->a_dvp;
vnode_t **vpp = ap->a_vpp;		vnode_t **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;		struct componentname *cnp = ap->a_cnp;
char nm[NAME_MAX + 1];		char name[MAXNAMELEN];
zfsctl_snapdir_t *sdp = dvp->v_data;		char fullname[MAXNAMELEN];
objset_t *snap;
char snapname[MAXNAMELEN];
char real[MAXNAMELEN];
char *mountpoint;		char *mountpoint;
zfs_snapentry_t *sep, search;
size_t mountpoint_len;		size_t mountpoint_len;
avl_index_t where;
zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;		zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
		uint64_t snap_id;
		int nameiop = cnp->cn_nameiop;
		int lkflags = cnp->cn_lkflags;
		int flags = cnp->cn_flags;
int err;		int err;
int ltype, flags = 0;

/*		/*
* No extended attributes allowed under .zfs		* No extended attributes allowed under .zfs
		asomersUnsubmitted Done Inline Actions This comment is obsolete. asomers: This comment is obsolete.
		asomersUnsubmitted Done Inline Actions Still need to fix this comment. asomers: Still need to fix this comment.
*/		*/
if (flags & LOOKUP_XATTR)
return (SET_ERROR(EINVAL));
ASSERT(ap->a_cnp->cn_namelen < sizeof(nm));
strlcpy(nm, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);

ASSERT(dvp->v_type == VDIR);		ASSERT(dvp->v_type == VDIR);

*vpp = NULL;		if ((flags & ISLASTCN) != 0 && nameiop != LOOKUP)
		return (ENOTSUP);

/*		if (cnp->cn_namelen == 1 && *cnp->cn_nameptr == '.') {
* If we get a recursive call, that means we got called		err = zfsctl_relock_dot(dvp, lkflags & LK_TYPE_MASK);
* from the domount() code while it was trying to look up the		if (err == 0)
* spec (which looks like a local path for zfs). We need to		*vpp = dvp;
* add some flag to domount() to tell it not to do this lookup.		return (err);
*/
if (MUTEX_HELD(&sdp->sd_lock))
return (SET_ERROR(ENOENT));

ZFS_ENTER(zfsvfs);
if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
VOP_UNLOCK(dvp, 0);
VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
}		}
ZFS_EXIT(zfsvfs);		if (flags & ISDOTDOT) {
return (0);		err = vn_vget_ino_gen(dvp, zfsctl_root_vnode, NULL, lkflags,
		vpp);
		return (err);
}		}

if (flags & FIGNORECASE) {		if (cnp->cn_namelen >= sizeof(name))
boolean_t conflict = B_FALSE;		return (SET_ERROR(ENAMETOOLONG));

err = dmu_snapshot_realname(zfsvfs->z_os, nm, real,		strlcpy(name, ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen + 1);
MAXNAMELEN, &conflict);		err = zfsctl_snapshot_lookup(dvp, name, &snap_id);
if (err == 0) {		if (err != 0) {
strlcpy(nm, real, sizeof(nm));		err = SET_ERROR(ENOENT);
		smhUnsubmitted Done Inline Actions No need to assign to err, just return? return (SET_ERROR(ENOENT)); smh: No need to assign to err, just return? return (SET_ERROR(ENOENT));
		avgAuthorUnsubmitted Not Done Inline Actions Yes. avg: Yes.
} else if (err != ENOTSUP) {
ZFS_EXIT(zfsvfs);
return (err);		return (err);
}		}
#if 0
if (realpnp)
(void) strlcpy(realpnp->pn_buf, nm,
realpnp->pn_bufsize);
if (conflict && direntflags)
*direntflags = ED_CASE_CONFLICT;
#endif
}

relookup:		for (;;) {
mutex_enter(&sdp->sd_lock);		snapshot_setup_arg_t ssa;
search.se_name = (char *)nm;
if ((sep = avl_find(&sdp->sd_snaps, &search, &where)) != NULL) {		ssa.snap_name = name;
*vpp = sep->se_root;		ssa.snap_id = snap_id;
VN_HOLD(*vpp);		err = sfs_vgetx(dvp->v_mount, LK_SHARED, ZFSCTL_INO_SNAPDIR,
err = traverse(vpp, LK_EXCLUSIVE \| LK_RETRY);		snap_id, "zfs", &zfsctl_ops_snapshot,
if (err != 0) {		zfsctl_snapshot_vnode_setup, &ssa, vpp);
*vpp = NULL;		if (err != 0)
} else if (*vpp == sep->se_root) {		return (err);

		/* Check if a new vnode has just been created. */
		if (VOP_ISLOCKED(*vpp) == LK_EXCLUSIVE)
		break;

/*		/*
* The snapshot was unmounted behind our backs,		* The vnode must be referenced at least by this thread and
* try to remount it.		* the mounted snapshot or the thread doing the mounting.
		* There can be more references from concurrent lookups.
*/		*/
VERIFY(zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname) == 0);		KASSERT(vrefcnt(*vpp) > 1, ("found unreferenced mountpoint"));
goto domount;
}		/*
mutex_exit(&sdp->sd_lock);		* Check if a snapshot is already mounted on top of the vnode.
ZFS_EXIT(zfsvfs);		*/
		err = zfsctl_mounted_here(vpp, lkflags);
		if (err != EJUSTRETURN)
return (err);		return (err);
}

		#ifdef INVARIANTS
/*		/*
* The requested snapshot is not currently mounted, look it up.		* If the vnode not covered yet, then the mount operation
		* must be in progress.
*/		*/
err = zfsctl_snapshot_zname(dvp, nm, MAXNAMELEN, snapname);		VI_LOCK(*vpp);
if (err != 0) {		KASSERT(((*vpp)->v_iflag & VI_MOUNT) != 0,
mutex_exit(&sdp->sd_lock);		("snapshot vnode not covered"));
ZFS_EXIT(zfsvfs);		VI_UNLOCK(*vpp);
		#endif
		vput(*vpp);

/*		/*
* handle "ls *" or "?" in a graceful manner,		* In this situation we can loop on uncontested locks and starve
* forcing EILSEQ to ENOENT.		* the thread doing the lengthy, non-trivial mount operation.
* Since shell ultimately passes "*" or "?" as name to lookup
*/		*/
return (err == EILSEQ ? ENOENT : err);		kern_yield(PRI_USER);
}		}
if (dmu_objset_hold(snapname, FTAG, &snap) != 0) {
mutex_exit(&sdp->sd_lock);
#ifdef illumos
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOENT));
#else /* !illumos */
/* Translate errors and add SAVENAME when needed. */
if ((cnp->cn_flags & ISLASTCN) && cnp->cn_nameiop == CREATE) {
err = EJUSTRETURN;
cnp->cn_flags \|= SAVENAME;
} else {
err = SET_ERROR(ENOENT);
}
ZFS_EXIT(zfsvfs);
return (err);
#endif /* illumos */
}

sep = kmem_alloc(sizeof (zfs_snapentry_t), KM_SLEEP);		VERIFY0(zfsctl_snapshot_zname(dvp, name, MAXNAMELEN, fullname));
sep->se_name = kmem_alloc(strlen(nm) + 1, KM_SLEEP);
(void) strcpy(sep->se_name, nm);
*vpp = sep->se_root = zfsctl_snapshot_mknode(dvp, dmu_objset_id(snap));
avl_insert(&sdp->sd_snaps, sep, where);

dmu_objset_rele(snap, FTAG);
domount:
mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +		mountpoint_len = strlen(dvp->v_vfsp->mnt_stat.f_mntonname) +
strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(nm) + 1;		strlen("/" ZFS_CTLDIR_NAME "/snapshot/") + strlen(name) + 1;
mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);		mountpoint = kmem_alloc(mountpoint_len, KM_SLEEP);
(void) snprintf(mountpoint, mountpoint_len,		(void) snprintf(mountpoint, mountpoint_len,
"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",		"%s/" ZFS_CTLDIR_NAME "/snapshot/%s",
dvp->v_vfsp->mnt_stat.f_mntonname, nm);		dvp->v_vfsp->mnt_stat.f_mntonname, name);
mutex_exit(&sdp->sd_lock);

/*		err = mount_snapshot(curthread, vpp, "zfs", mountpoint, fullname, 0);
* The vnode may get reclaimed between dropping sd_lock and
* getting the vnode lock.
* */
err = vn_lock(*vpp, LK_EXCLUSIVE);
if (err == ENOENT)
goto relookup;
VERIFY0(err);
err = mount_snapshot(curthread, vpp, "zfs", mountpoint, snapname, 0);
kmem_free(mountpoint, mountpoint_len);		kmem_free(mountpoint, mountpoint_len);
if (err == 0) {		if (err == 0) {
/*		/*
* Fix up the root vnode mounted on .zfs/snapshot/<snapname>.		* Fix up the root vnode mounted on .zfs/snapshot/<snapname>.
*		*
* This is where we lie about our v_vfsp in order to		* This is where we lie about our v_vfsp in order to
* make .zfs/snapshot/<snapname> accessible over NFS		* make .zfs/snapshot/<snapname> accessible over NFS
* without requiring manual mounts of <snapname>.		* without requiring manual mounts of <snapname>.
*/		*/
ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);		ASSERT(VTOZ(*vpp)->z_zfsvfs != zfsvfs);
VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;		VTOZ(*vpp)->z_zfsvfs->z_parent = zfsvfs;
(*vpp)->v_flag &= ~VROOT;
}
ZFS_EXIT(zfsvfs);

#ifdef illumos		/* Clear the root flag (set via VFS_ROOT) as well. */
/*		(*vpp)->v_vflag &= ~VV_ROOT;
* If we had an error, drop our hold on the vnode and
* zfsctl_snapshot_inactive() will clean up.
*/
if (err != 0) {
VN_RELE(*vpp);
*vpp = NULL;
}		}
#else
if (err != 0)		if (err != 0)
*vpp = NULL;		*vpp = NULL;
#endif
return (err);		return (err);
}		}

/* ARGSUSED */
int
zfsctl_shares_lookup(ap)
struct vop_lookup_args /* {
struct vnode *a_dvp;
struct vnode **a_vpp;
struct componentname *a_cnp;
} / ap;
{
vnode_t *dvp = ap->a_dvp;
vnode_t **vpp = ap->a_vpp;
struct componentname *cnp = ap->a_cnp;
zfsvfs_t *zfsvfs = dvp->v_vfsp->vfs_data;
char nm[NAME_MAX + 1];
znode_t *dzp;
int error;

ZFS_ENTER(zfsvfs);

ASSERT(cnp->cn_namelen < sizeof(nm));
strlcpy(nm, cnp->cn_nameptr, cnp->cn_namelen + 1);

if (gfs_lookup_dot(vpp, dvp, zfsvfs->z_ctldir, nm) == 0) {
if (nm[0] == '.' && nm[1] == '.' && nm[2] =='\0') {
VOP_UNLOCK(dvp, 0);
VERIFY0(vn_lock(*vpp, LK_EXCLUSIVE));
VERIFY0(vn_lock(dvp, LK_EXCLUSIVE));
}
ZFS_EXIT(zfsvfs);
return (0);
}

if (zfsvfs->z_shares_dir == 0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTSUP));
}
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
error = VOP_LOOKUP(ZTOV(dzp), vpp, cnp);
VN_RELE(ZTOV(dzp));
}

ZFS_EXIT(zfsvfs);

return (error);
}

/* ARGSUSED */
static int		static int
zfsctl_snapdir_readdir_cb(vnode_t vp, void dp, int *eofp,		zfsctl_snapdir_readdir(ap)
offset_t offp, offset_t nextp, void *data, int flags)
{
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
char snapname[MAXNAMELEN];
uint64_t id, cookie;
boolean_t case_conflict;
int error;

ZFS_ENTER(zfsvfs);

cookie = *offp;
dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN, snapname, &id,
&cookie, &case_conflict);
dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
if (error) {
ZFS_EXIT(zfsvfs);
if (error == ENOENT) {
*eofp = 1;
return (0);
}
return (error);
}

if (flags & V_RDDIR_ENTFLAGS) {
edirent_t *eodp = dp;

(void) strcpy(eodp->ed_name, snapname);
eodp->ed_ino = ZFSCTL_INO_SNAP(id);
eodp->ed_eflags = case_conflict ? ED_CASE_CONFLICT : 0;
} else {
struct dirent64 *odp = dp;

(void) strcpy(odp->d_name, snapname);
odp->d_ino = ZFSCTL_INO_SNAP(id);
}
*nextp = cookie;

ZFS_EXIT(zfsvfs);

return (0);
}

/* ARGSUSED */
static int
zfsctl_shares_readdir(ap)
struct vop_readdir_args /* {		struct vop_readdir_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct uio *a_uio;		struct uio *a_uio;
struct ucred *a_cred;		struct ucred *a_cred;
int *a_eofflag;		int *a_eofflag;
int *a_ncookies;		int *ncookies;
u_long **a_cookies;		u_long **a_cookies;
} / ap;		} / ap;
{		{
		char snapname[MAXNAMELEN];
		struct dirent entry;
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;
uio_t *uiop = ap->a_uio;
cred_t *cr = ap->a_cred;
int *eofp = ap->a_eofflag;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;		zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
znode_t *dzp;		uio_t *uio = ap->a_uio;
		int *eofp = ap->a_eofflag;
		off_t dots_offset;
int error;		int error;

ZFS_ENTER(zfsvfs);		ASSERT(vp->v_type == VDIR);

if (zfsvfs->z_shares_dir == 0) {		error = sfs_readdir_common(ZFSCTL_INO_ROOT, ZFSCTL_INO_SNAPDIR, ap, uio,
ZFS_EXIT(zfsvfs);		&dots_offset);
return (SET_ERROR(ENOTSUP));		if (error != 0) {
}		if (error == ENAMETOOLONG) /* ran out of destination space */
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {		error = 0;
vn_lock(ZTOV(dzp), LK_SHARED \| LK_RETRY);
error = VOP_READDIR(ZTOV(dzp), uiop, cr, eofp, ap->a_ncookies, ap->a_cookies);
VN_URELE(ZTOV(dzp));
} else {
*eofp = 1;
error = SET_ERROR(ENOENT);
}

ZFS_EXIT(zfsvfs);
return (error);		return (error);
}		}

/*		for (;;) {
* pvp is the '.zfs' directory (zfsctl_node_t).		uint64_t cookie;
*		uint64_t id;
* Creates vp, which is '.zfs/snapshot' (zfsctl_snapdir_t).
*
* This function is the callback to create a GFS vnode for '.zfs/snapshot'
* when a lookup is performed on .zfs for "snapshot".
*/
vnode_t *
zfsctl_mknode_snapdir(vnode_t *pvp)
{
vnode_t *vp;
zfsctl_snapdir_t *sdp;

vp = gfs_dir_create(sizeof (zfsctl_snapdir_t), pvp, pvp->v_vfsp,		cookie = uio->uio_offset - dots_offset;
&zfsctl_ops_snapdir, NULL, NULL, MAXNAMELEN,
zfsctl_snapdir_readdir_cb, NULL);
sdp = vp->v_data;
sdp->sd_node.zc_id = ZFSCTL_INO_SNAPDIR;
sdp->sd_node.zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
mutex_init(&sdp->sd_lock, NULL, MUTEX_DEFAULT, NULL);
avl_create(&sdp->sd_snaps, snapentry_compare,
sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t, se_node));
VOP_UNLOCK(vp, 0);
return (vp);
}

vnode_t *		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
zfsctl_mknode_shares(vnode_t *pvp)		error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
{		snapname, &id, &cookie, NULL);
vnode_t *vp;		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
zfsctl_node_t *sdp;		if (error != 0) {
		if (error == ENOENT) {
vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,		if (eofp != NULL)
&zfsctl_ops_shares, NULL, NULL, MAXNAMELEN,		*eofp = 1;
NULL, NULL);		error = 0;
sdp = vp->v_data;
sdp->zc_cmtime = ((zfsctl_node_t *)pvp->v_data)->zc_cmtime;
VOP_UNLOCK(vp, 0);
return (vp);

}		}
		return (error);
		}

/* ARGSUSED */		entry.d_fileno = id;
static int		entry.d_type = DT_DIR;
zfsctl_shares_getattr(ap)		strcpy(entry.d_name, snapname);
struct vop_getattr_args /* {		entry.d_namlen = strlen(entry.d_name);
struct vnode *a_vp;		entry.d_reclen = sizeof(entry);
struct vattr *a_vap;		error = vfs_read_dirent(ap, &entry, uio->uio_offset);
struct ucred *a_cred;		if (error != 0) {
struct thread *a_td;		if (error == ENAMETOOLONG)
} / ap;		error = 0;
{		return (error);
vnode_t *vp = ap->a_vp;
vattr_t *vap = ap->a_vap;
cred_t *cr = ap->a_cred;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
znode_t *dzp;
int error;

ZFS_ENTER(zfsvfs);
if (zfsvfs->z_shares_dir == 0) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(ENOTSUP));
}		}
if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {		uio->uio_offset = cookie + dots_offset;
vn_lock(ZTOV(dzp), LK_SHARED \| LK_RETRY);
error = VOP_GETATTR(ZTOV(dzp), vap, cr);
VN_URELE(ZTOV(dzp));
}		}
ZFS_EXIT(zfsvfs);		/* NOTREACHED */
return (error);


}		}

/* ARGSUSED */		/* ARGSUSED */
static int		static int
zfsctl_snapdir_getattr(ap)		zfsctl_snapdir_getattr(ap)
struct vop_getattr_args /* {		struct vop_getattr_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct vattr *a_vap;		struct vattr *a_vap;
struct ucred *a_cred;		struct ucred *a_cred;
} / ap;		} / ap;
{		{
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;
vattr_t *vap = ap->a_vap;		vattr_t *vap = ap->a_vap;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;		zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
zfsctl_snapdir_t *sdp = vp->v_data;		sfs_node_t *node = vp->v_data;

ZFS_ENTER(zfsvfs);
zfsctl_common_getattr(vp, vap);		zfsctl_common_getattr(vp, vap);
vap->va_nodeid = gfs_file_inode(vp);		vap->va_ctime = dmu_objset_snap_cmtime(zfsvfs->z_os);
vap->va_nlink = vap->va_size = avl_numnodes(&sdp->sd_snaps) + 2;		vap->va_mtime = vap->va_ctime;
vap->va_ctime = vap->va_mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
vap->va_birthtime = vap->va_ctime;		vap->va_birthtime = vap->va_ctime;
ZFS_EXIT(zfsvfs);

return (0);		return (0);
}		}

/* ARGSUSED */
static int
zfsctl_snapdir_reclaim(ap)
struct vop_reclaim_args /* {
struct vnode *a_vp;
struct thread *a_td;
} / ap;
{
vnode_t *vp = ap->a_vp;
zfsctl_snapdir_t *sdp = vp->v_data;
zfs_snapentry_t *sep;

ASSERT(avl_numnodes(&sdp->sd_snaps) == 0);
mutex_destroy(&sdp->sd_lock);
avl_destroy(&sdp->sd_snaps);
gfs_vop_reclaim(ap);

return (0);
}

static int
zfsctl_shares_print(ap)
struct vop_print_args /* {
struct vnode *a_vp;
} / ap;
{
printf(" .zfs/shares node\n");
zfsctl_common_print(ap);
return (0);
}

static int
zfsctl_snapdir_print(ap)
struct vop_print_args /* {
struct vnode *a_vp;
} / ap;
{
vnode_t *vp = ap->a_vp;
zfsctl_snapdir_t *sdp = vp->v_data;

printf(" .zfs/snapshot node\n");
printf(" number of children = %lu\n", avl_numnodes(&sdp->sd_snaps));
zfsctl_common_print(ap);
return (0);
}

#ifdef illumos
static const fs_operation_def_t zfsctl_tops_snapdir[] = {
{ VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
{ VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
{ VOPNAME_IOCTL, { .error = fs_inval } },
{ VOPNAME_GETATTR, { .vop_getattr = zfsctl_snapdir_getattr } },
{ VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
{ VOPNAME_RENAME, { .vop_rename = zfsctl_snapdir_rename } },
{ VOPNAME_RMDIR, { .vop_rmdir = zfsctl_snapdir_remove } },
{ VOPNAME_MKDIR, { .vop_mkdir = zfsctl_snapdir_mkdir } },
{ VOPNAME_READDIR, { .vop_readdir = gfs_vop_readdir } },
{ VOPNAME_LOOKUP, { .vop_lookup = zfsctl_snapdir_lookup } },
{ VOPNAME_SEEK, { .vop_seek = fs_seek } },
{ VOPNAME_INACTIVE, { .vop_inactive = zfsctl_snapdir_inactive } },
{ VOPNAME_FID, { .vop_fid = zfsctl_common_fid } },
{ NULL }
};

static const fs_operation_def_t zfsctl_tops_shares[] = {
{ VOPNAME_OPEN, { .vop_open = zfsctl_common_open } },
{ VOPNAME_CLOSE, { .vop_close = zfsctl_common_close } },
{ VOPNAME_IOCTL, { .error = fs_inval } },
{ VOPNAME_GETATTR, { .vop_getattr = zfsctl_shares_getattr } },
{ VOPNAME_ACCESS, { .vop_access = zfsctl_common_access } },
{ VOPNAME_READDIR, { .vop_readdir = zfsctl_shares_readdir } },
{ VOPNAME_LOOKUP, { .vop_lookup = zfsctl_shares_lookup } },
{ VOPNAME_SEEK, { .vop_seek = fs_seek } },
{ VOPNAME_INACTIVE, { .vop_inactive = gfs_vop_inactive } },
{ VOPNAME_FID, { .vop_fid = zfsctl_shares_fid } },
{ NULL }
};
#else /* !illumos */
static struct vop_vector zfsctl_ops_snapdir = {		static struct vop_vector zfsctl_ops_snapdir = {
.vop_default = &default_vnodeops,		.vop_default = &default_vnodeops,
.vop_open = zfsctl_common_open,		.vop_open = zfsctl_common_open,
.vop_close = zfsctl_common_close,		.vop_close = zfsctl_common_close,
.vop_ioctl = VOP_EINVAL,
.vop_getattr = zfsctl_snapdir_getattr,		.vop_getattr = zfsctl_snapdir_getattr,
.vop_access = zfsctl_common_access,		.vop_access = zfsctl_common_access,
.vop_mkdir = zfsctl_freebsd_snapdir_mkdir,		.vop_readdir = zfsctl_snapdir_readdir,
.vop_readdir = gfs_vop_readdir,
.vop_lookup = zfsctl_snapdir_lookup,		.vop_lookup = zfsctl_snapdir_lookup,
.vop_inactive = VOP_NULL,		.vop_reclaim = zfsctl_common_reclaim,
.vop_reclaim = zfsctl_snapdir_reclaim,
.vop_fid = zfsctl_common_fid,		.vop_fid = zfsctl_common_fid,
.vop_print = zfsctl_snapdir_print,		.vop_print = zfsctl_common_print,
};		};

static struct vop_vector zfsctl_ops_shares = {
.vop_default = &default_vnodeops,
.vop_open = zfsctl_common_open,
.vop_close = zfsctl_common_close,
.vop_ioctl = VOP_EINVAL,
.vop_getattr = zfsctl_shares_getattr,
.vop_access = zfsctl_common_access,
.vop_readdir = zfsctl_shares_readdir,
.vop_lookup = zfsctl_shares_lookup,
.vop_inactive = VOP_NULL,
.vop_reclaim = gfs_vop_reclaim,
.vop_fid = zfsctl_shares_fid,
.vop_print = zfsctl_shares_print,
};
#endif /* illumos */

/*
* pvp is the GFS vnode '.zfs/snapshot'.
*
* This creates a GFS node under '.zfs/snapshot' representing each
* snapshot. This newly created GFS node is what we mount snapshot
* vfs_t's ontop of.
*/
static vnode_t *
zfsctl_snapshot_mknode(vnode_t *pvp, uint64_t objset)
{
vnode_t *vp;
zfsctl_node_t *zcp;

vp = gfs_dir_create(sizeof (zfsctl_node_t), pvp, pvp->v_vfsp,
&zfsctl_ops_snapshot, NULL, NULL, MAXNAMELEN, NULL, NULL);
zcp = vp->v_data;
zcp->zc_id = objset;
VOP_UNLOCK(vp, 0);

return (vp);
}

static int		static int
zfsctl_snapshot_inactive(ap)		zfsctl_snapshot_inactive(ap)
struct vop_inactive_args /* {		struct vop_inactive_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct thread *a_td;		struct thread *a_td;
} / ap;		} / ap;
{		{
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;

vrecycle(vp);		VERIFY(vrecycle(vp) == 1);
return (0);		return (0);
}		}

static int		static int
zfsctl_snapshot_reclaim(ap)		zfsctl_snapshot_reclaim(ap)
struct vop_reclaim_args /* {		struct vop_reclaim_args /* {
struct vnode *a_vp;		struct vnode *a_vp;
struct thread *a_td;		struct thread *a_td;
} / ap;		} / ap;
{		{
vnode_t *vp = ap->a_vp;		vnode_t *vp = ap->a_vp;
cred_t *cr = ap->a_td->td_ucred;		void *data = vp->v_data;
zfsctl_snapdir_t *sdp;
zfs_snapentry_t sep, next;
int locked;
vnode_t *dvp;

VERIFY(gfs_dir_lookup(vp, "..", &dvp, cr, 0, NULL, NULL) == 0);		sfs_reclaim_vnode(vp);
sdp = dvp->v_data;		sfs_destroy_node(data);
/* this may already have been unmounted */
if (sdp == NULL) {
VN_RELE(dvp);
return (0);		return (0);
}		}
if (!(locked = MUTEX_HELD(&sdp->sd_lock)))
mutex_enter(&sdp->sd_lock);

ASSERT(!vn_ismntpt(vp));

sep = avl_first(&sdp->sd_snaps);
while (sep != NULL) {
next = AVL_NEXT(&sdp->sd_snaps, sep);

if (sep->se_root == vp) {
avl_remove(&sdp->sd_snaps, sep);
kmem_free(sep->se_name, strlen(sep->se_name) + 1);
kmem_free(sep, sizeof (zfs_snapentry_t));
break;
}
sep = next;
}
ASSERT(sep != NULL);

if (!locked)
mutex_exit(&sdp->sd_lock);
VN_RELE(dvp);

/*
* Dispose of the vnode for the snapshot mount point.
* This is safe to do because once this entry has been removed
* from the AVL tree, it can't be found again, so cannot become
* "active". If we lookup the same name again we will end up
* creating a new vnode.
*/
gfs_vop_reclaim(ap);
return (0);

}

static int		static int
zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)		zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
{		{
zfsvfs_t *zfsvfs = ap->a_vp->v_vfsp->vfs_data;		struct mount *mp;
vnode_t dvp, vp;		vnode_t *dvp;
zfsctl_snapdir_t *sdp;		vnode_t *vp;
zfs_snapentry_t *sep;		sfs_node_t *node;
		size_t len;
		int locked;
int error;		int error;

ASSERT(zfsvfs->z_ctldir != NULL);		vp = ap->a_vp;
error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,		node = vp->v_data;
NULL, 0, NULL, kcred, NULL, NULL, NULL);
if (error != 0)
return (error);
sdp = dvp->v_data;

mutex_enter(&sdp->sd_lock);		/*
sep = avl_first(&sdp->sd_snaps);		* Prevent unmounting of the snapshot while the vnode lock
while (sep != NULL) {		* is not held. That is not strictly required, but allows
vp = sep->se_root;		* us to assert that an uncovered snapshot vnode is never
if (vp == ap->a_vp)		* "leaked".
break;		*/
sep = AVL_NEXT(&sdp->sd_snaps, sep);		mp = vp->v_mountedhere;
}		if (mp == NULL)
if (sep == NULL) {		return (SET_ERROR(ENOENT));
mutex_exit(&sdp->sd_lock);		error = vfs_busy(mp, 0);
error = ENOENT;		KASSERT(error == 0, ("vfs_busy(mp, 0) failed with %d", error));
} else {
size_t len;

len = strlen(sep->se_name);		/*
*ap->a_buflen -= len;		* We can vput the vnode as we can now depend on the reference owned
bcopy(sep->se_name, ap->a_buf + *ap->a_buflen, len);		* by the busied mp. But we also need to hold the vnode, because
mutex_exit(&sdp->sd_lock);		* the reference may go after vfs_unbusy() which has to be called
vref(dvp);		* before we can lock the vnode again.
		*/
		locked = VOP_ISLOCKED(vp);
		vhold(vp);
		vput(vp);

		/* Look up .zfs/snapshot, our parent. */
		error = zfsctl_snapdir_vnode(vp->v_mount, NULL, LK_SHARED, &dvp);
		if (error == 0) {
		VOP_UNLOCK(dvp, 0);
*ap->a_vpp = dvp;		*ap->a_vpp = dvp;
}
VN_RELE(dvp);

		len = strlen(node->sn_name);
		*ap->a_buflen -= len;
		bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
		}
		vfs_unbusy(mp);
		vget(vp, locked \| LK_VNHELD \| LK_RETRY, curthread);
return (error);		return (error);
}		}

static int
zfsctl_snaphot_print(ap)
struct vop_print_args /* {
struct vnode *a_vp;
} / ap;
{
vnode_t *vp = ap->a_vp;
zfsctl_node_t *zcp = vp->v_data;

printf(" .zfs/snapshot/<snap> node\n");
printf(" id = %ju\n", (uintmax_t)zcp->zc_id);
zfsctl_common_print(ap);
return (0);
}

/*		/*
* These VP's should never see the light of day. They should always		* These VP's should never see the light of day. They should always
* be covered.		* be covered.
*/		*/
static struct vop_vector zfsctl_ops_snapshot = {		static struct vop_vector zfsctl_ops_snapshot = {
.vop_default = &default_vnodeops,		.vop_default = NULL, /* ensure very restricted access */
.vop_inactive = zfsctl_snapshot_inactive,		.vop_inactive = zfsctl_snapshot_inactive,
.vop_reclaim = zfsctl_snapshot_reclaim,		.vop_reclaim = zfsctl_snapshot_reclaim,
.vop_vptocnp = zfsctl_snapshot_vptocnp,		.vop_vptocnp = zfsctl_snapshot_vptocnp,
.vop_print = zfsctl_snaphot_print,		.vop_lock1 = vop_stdlock,
		.vop_unlock = vop_stdunlock,
		.vop_islocked = vop_stdislocked,
		.vop_advlockpurge = vop_stdadvlockpurge, /* called by vgone */
		.vop_print = zfsctl_common_print,
};		};

int		int
zfsctl_lookup_objset(vfs_t vfsp, uint64_t objsetid, zfsvfs_t *zfsvfsp)		zfsctl_lookup_objset(vfs_t vfsp, uint64_t objsetid, zfsvfs_t *zfsvfsp)
{		{
		struct mount *mp;
zfsvfs_t *zfsvfs = vfsp->vfs_data;		zfsvfs_t *zfsvfs = vfsp->vfs_data;
vnode_t dvp, vp;		vnode_t *vp;
zfsctl_snapdir_t *sdp;
zfsctl_node_t *zcp;
zfs_snapentry_t *sep;
int error;		int error;

ASSERT(zfsvfs->z_ctldir != NULL);		ASSERT(zfsvfs->z_ctldir != NULL);
error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,		*zfsvfsp = NULL;
NULL, 0, NULL, kcred, NULL, NULL, NULL);		error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
if (error != 0)		ZFSCTL_INO_SNAPDIR, objsetid, &vp);
return (error);		if (error == 0 && vp != NULL) {
sdp = dvp->v_data;

mutex_enter(&sdp->sd_lock);
sep = avl_first(&sdp->sd_snaps);
while (sep != NULL) {
vp = sep->se_root;
zcp = vp->v_data;
if (zcp->zc_id == objsetid)
break;

sep = AVL_NEXT(&sdp->sd_snaps, sep);
}

if (sep != NULL) {
VN_HOLD(vp);
/*		/*
* Return the mounted root rather than the covered mount point.		* XXX Probably need to at least reference, if not busy, the mp.
* Takes the GFS vnode at .zfs/snapshot/<snapshot objsetid>
* and returns the ZFS vnode mounted on top of the GFS node.
* This ZFS vnode is the root of the vfs for objset 'objsetid'.
*/		*/
error = traverse(&vp, LK_SHARED \| LK_RETRY);		if (vp->v_mountedhere != NULL)
if (error == 0) {		*zfsvfsp = vp->v_mountedhere->mnt_data;
if (vp == sep->se_root) {		vput(vp);
VN_RELE(vp); /* release covered vp */
error = SET_ERROR(EINVAL);
} else {
*zfsvfsp = VTOZ(vp)->z_zfsvfs;
VN_URELE(vp); /* put snapshot's root vp */
}		}
		if (*zfsvfsp != NULL)
		return (0);
		else
		smhUnsubmitted Done Inline Actions No need for an else here and personally I would check for the error code so that the natural return is success which I find clearer e.g. if (zfsvfsp == NULL) return (SET_ERROR(EINVAL)); return (0); smh:* No need for an else here and personally I would check for the error code so that the natural…
		avgAuthorUnsubmitted Not Done Inline Actions Yes, this is better. avg: Yes, this is better.
		return (SET_ERROR(EINVAL));
}		}
mutex_exit(&sdp->sd_lock);
} else {
error = SET_ERROR(EINVAL);
mutex_exit(&sdp->sd_lock);
}

VN_RELE(dvp);

return (error);
}

/*		/*
* Unmount any snapshots for the given filesystem. This is called from		* Unmount any snapshots for the given filesystem. This is called from
* zfs_umount() - if we have a ctldir, then go through and unmount all the		* zfs_umount() - if we have a ctldir, then go through and unmount all the
* snapshots.		* snapshots.
*/		*/
int		int
zfsctl_umount_snapshots(vfs_t vfsp, int fflags, cred_t cr)		zfsctl_umount_snapshots(vfs_t vfsp, int fflags, cred_t cr)
{		{
		char snapname[MAXNAMELEN];
zfsvfs_t *zfsvfs = vfsp->vfs_data;		zfsvfs_t *zfsvfs = vfsp->vfs_data;
		struct mount *mp;
vnode_t *dvp;		vnode_t *dvp;
zfsctl_snapdir_t *sdp;		vnode_t *vp;
zfs_snapentry_t sep, next;		sfs_node_t *node;
		sfs_node_t *snap;
		uint64_t cookie;
int error;		int error;

ASSERT(zfsvfs->z_ctldir != NULL);		ASSERT(zfsvfs->z_ctldir != NULL);
error = zfsctl_root_lookup(zfsvfs->z_ctldir, "snapshot", &dvp,
NULL, 0, NULL, cr, NULL, NULL, NULL);
if (error != 0)
return (error);
sdp = dvp->v_data;

mutex_enter(&sdp->sd_lock);		cookie = 0;
		for (;;) {
		uint64_t id;

sep = avl_first(&sdp->sd_snaps);		dsl_pool_config_enter(dmu_objset_pool(zfsvfs->z_os), FTAG);
while (sep != NULL) {		error = dmu_snapshot_list_next(zfsvfs->z_os, MAXNAMELEN,
next = AVL_NEXT(&sdp->sd_snaps, sep);		snapname, &id, &cookie, NULL);
		dsl_pool_config_exit(dmu_objset_pool(zfsvfs->z_os), FTAG);
		if (error != 0) {
		if (error == ENOENT)
		error = 0;
		break;
		}

		for (;;) {
		error = sfs_vnode_get(vfsp, LK_EXCLUSIVE,
		ZFSCTL_INO_SNAPDIR, id, &vp);
		if (error != 0 \|\| vp == NULL)
		break;

		mp = vp->v_mountedhere;

/*		/*
* If this snapshot is not mounted, then it must		* v_mountedhere being NULL means that the
* have just been unmounted by somebody else, and		* (uncovered) vnode is in a transient state
* will be cleaned up by zfsctl_snapdir_inactive().		* (mounting or unmounting), so loop until it
		* settles down.
*/		*/
if (vn_ismntpt(sep->se_root)) {		if (mp != NULL)
error = zfsctl_unmount_snap(sep, fflags, cr);		break;
if (error) {		vput(vp);
avl_index_t where;		}
		if (error != 0)
		break;
		if (vp == NULL)
		continue; /* no mountpoint, nothing to do */

/*		/*
* Before reinserting snapshot to the tree,		* The mount-point vnode is kept locked to avoid spurious EBUSY
* check if it was actually removed. For example		* from a concurrent umount.
* when snapshot mount point is busy, we will		* The vnode lock must have recursive locking enabled.
* have an error here, but there will be no need
* to reinsert snapshot.
*/		*/
if (avl_find(&sdp->sd_snaps, sep, &where) == NULL)		vfs_ref(mp);
avl_insert(&sdp->sd_snaps, sep, where);		error = dounmount(mp, fflags, curthread);
		KASSERT_IMPLY(error == 0, vrefcnt(vp) == 1,
		("extra references after unmount"));
		vput(vp);
		if (error != 0)
break;		break;
}		}
}		KASSERT_IMPLY((fflags & MS_FORCE) != 0, error == 0,
sep = next;		("force unmounting failed"));
}

mutex_exit(&sdp->sd_lock);
VN_RELE(dvp);

return (error);		return (error);
}		}