Index: head/sys/fs/cd9660/cd9660_iconv.c =================================================================== --- head/sys/fs/cd9660/cd9660_iconv.c (revision 326267) +++ head/sys/fs/cd9660/cd9660_iconv.c (revision 326268) @@ -1,36 +1,38 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Ryuichiro Imura * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include VFS_DECLARE_ICONV(cd9660); Index: head/sys/fs/devfs/devfs.h =================================================================== --- head/sys/fs/devfs/devfs.h (revision 326267) +++ head/sys/fs/devfs/devfs.h (revision 326268) @@ -1,205 +1,207 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 2000 * Poul-Henning Kamp. All rights reserved. * Copyright (c) 2002 * Dima Dorfman. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs.h 8.6 (Berkeley) 3/29/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs.h 1.14 * * $FreeBSD$ */ #ifndef _FS_DEVFS_DEVFS_H_ #define _FS_DEVFS_DEVFS_H_ #define DEVFS_MAGIC 0xdb0a087a /* * Identifiers. The ruleset and rule numbers are 16-bit values. The * "rule ID" is a combination of the ruleset and rule number; it * should be able to univocally describe a rule in the system. In * this implementation, the upper 16 bits of the rule ID is the * ruleset number; the lower 16 bits, the rule number within the * aforementioned ruleset. */ typedef uint16_t devfs_rnum; typedef uint16_t devfs_rsnum; typedef uint32_t devfs_rid; /* * Identifier manipulators. */ #define rid2rsn(rid) ((rid) >> 16) #define rid2rn(rid) ((rid) & 0xffff) #define mkrid(rsn, rn) ((rn) | ((rsn) << 16)) /* * Plain DEVFS rule. This gets shared between kernel and userland * verbatim, so it shouldn't contain any pointers or other kernel- or * userland-specific values. */ struct devfs_rule { uint32_t dr_magic; /* Magic number. */ devfs_rid dr_id; /* Identifier. */ /* * Conditions under which this rule should be applied. These * are ANDed together since OR can be simulated by using * multiple rules. dr_icond determines which of the other * variables we should process. */ int dr_icond; #define DRC_DSWFLAGS 0x001 #define DRC_PATHPTRN 0x002 int dr_dswflags; /* cdevsw flags to match. */ #define DEVFS_MAXPTRNLEN 200 char dr_pathptrn[DEVFS_MAXPTRNLEN]; /* Pattern to match path. */ /* * Things to change. dr_iacts determines which of the other * variables we should process. */ int dr_iacts; #define DRA_BACTS 0x001 #define DRA_UID 0x002 #define DRA_GID 0x004 #define DRA_MODE 0x008 #define DRA_INCSET 0x010 int dr_bacts; /* Boolean (on/off) action. */ #define DRB_HIDE 0x001 /* Hide entry (DE_WHITEOUT). */ #define DRB_UNHIDE 0x002 /* Unhide entry. */ uid_t dr_uid; gid_t dr_gid; mode_t dr_mode; devfs_rsnum dr_incset; /* Included ruleset. */ }; /* * Rule-related ioctls. */ #define DEVFSIO_RADD _IOWR('D', 0, struct devfs_rule) #define DEVFSIO_RDEL _IOW('D', 1, devfs_rid) #define DEVFSIO_RAPPLY _IOW('D', 2, struct devfs_rule) #define DEVFSIO_RAPPLYID _IOW('D', 3, devfs_rid) #define DEVFSIO_RGETNEXT _IOWR('D', 4, struct devfs_rule) #define DEVFSIO_SUSE _IOW('D', 10, devfs_rsnum) #define DEVFSIO_SAPPLY _IOW('D', 11, devfs_rsnum) #define DEVFSIO_SGETNEXT _IOWR('D', 12, devfs_rsnum) /* XXX: DEVFSIO_RS_GET_INFO for refcount, active if any, etc. */ #ifdef _KERNEL #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_DEVFS); #endif struct componentname; TAILQ_HEAD(devfs_dlist_head, devfs_dirent); struct devfs_dirent { struct cdev_priv *de_cdp; int de_inode; int de_flags; #define DE_WHITEOUT 0x01 #define DE_DOT 0x02 #define DE_DOTDOT 0x04 #define DE_DOOMED 0x08 #define DE_COVERED 0x10 #define DE_USER 0x20 int de_holdcnt; struct dirent *de_dirent; TAILQ_ENTRY(devfs_dirent) de_list; struct devfs_dlist_head de_dlist; struct devfs_dirent *de_dir; int de_links; mode_t de_mode; uid_t de_uid; gid_t de_gid; struct label *de_label; struct timespec de_atime; struct timespec de_mtime; struct timespec de_ctime; struct vnode *de_vnode; char *de_symlink; }; struct devfs_mount { u_int dm_idx; struct mount *dm_mount; struct devfs_dirent *dm_rootdir; unsigned dm_generation; int dm_holdcnt; struct sx dm_lock; devfs_rsnum dm_ruleset; }; #define DEVFS_ROOTINO 2 extern unsigned devfs_rule_depth; #define VFSTODEVFS(mp) ((struct devfs_mount *)((mp)->mnt_data)) #define DEVFS_DE_HOLD(de) ((de)->de_holdcnt++) #define DEVFS_DE_DROP(de) (--(de)->de_holdcnt == 0) #define DEVFS_DMP_HOLD(dmp) ((dmp)->dm_holdcnt++) #define DEVFS_DMP_DROP(dmp) (--(dmp)->dm_holdcnt == 0) #define DEVFS_DEL_VNLOCKED 0x01 #define DEVFS_DEL_NORECURSE 0x02 void devfs_rules_apply(struct devfs_mount *, struct devfs_dirent *); void devfs_rules_cleanup(struct devfs_mount *); int devfs_rules_ioctl(struct devfs_mount *, u_long, caddr_t, struct thread *); void devfs_ruleset_set(devfs_rsnum rsnum, struct devfs_mount *dm); void devfs_ruleset_apply(struct devfs_mount *dm); int devfs_allocv(struct devfs_dirent *, struct mount *, int, struct vnode **); char *devfs_fqpn(char *, struct devfs_mount *, struct devfs_dirent *, struct componentname *); void devfs_delete(struct devfs_mount *, struct devfs_dirent *, int); void devfs_dirent_free(struct devfs_dirent *); void devfs_populate(struct devfs_mount *); void devfs_cleanup(struct devfs_mount *); void devfs_unmount_final(struct devfs_mount *); struct devfs_dirent *devfs_newdirent(char *, int); struct devfs_dirent *devfs_parent_dirent(struct devfs_dirent *); struct devfs_dirent *devfs_vmkdir(struct devfs_mount *, char *, int, struct devfs_dirent *, u_int); struct devfs_dirent *devfs_find(struct devfs_dirent *, const char *, int, int); #endif /* _KERNEL */ #endif /* !_FS_DEVFS_DEVFS_H_ */ Index: head/sys/fs/devfs/devfs_devs.c =================================================================== --- head/sys/fs/devfs/devfs_devs.c (revision 326267) +++ head/sys/fs/devfs/devfs_devs.c (revision 326268) @@ -1,741 +1,743 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000,2004 * Poul-Henning Kamp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36 * * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * The one true (but secret) list of active devices in the system. * Locked by dev_lock()/devmtx */ struct cdev_priv_list cdevp_list = TAILQ_HEAD_INITIALIZER(cdevp_list); struct unrhdr *devfs_inos; static MALLOC_DEFINE(M_DEVFS2, "DEVFS2", "DEVFS data 2"); static MALLOC_DEFINE(M_DEVFS3, "DEVFS3", "DEVFS data 3"); static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage"); SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem"); static unsigned devfs_generation; SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD, &devfs_generation, 0, "DEVFS generation number"); unsigned devfs_rule_depth = 1; SYSCTL_UINT(_vfs_devfs, OID_AUTO, rule_depth, CTLFLAG_RW, &devfs_rule_depth, 0, "Max depth of ruleset include"); /* * Helper sysctl for devname(3). We're given a dev_t and return the * name, if any, registered by the device driver. */ static int sysctl_devname(SYSCTL_HANDLER_ARGS) { int error; dev_t ud; #ifdef COMPAT_FREEBSD11 uint32_t ud_compat; #endif struct cdev_priv *cdp; struct cdev *dev; #ifdef COMPAT_FREEBSD11 if (req->newlen == sizeof(ud_compat)) { error = SYSCTL_IN(req, &ud_compat, sizeof(ud_compat)); if (error == 0) ud = ud_compat == (uint32_t)NODEV ? NODEV : ud_compat; } else #endif error = SYSCTL_IN(req, &ud, sizeof (ud)); if (error) return (error); if (ud == NODEV) return (EINVAL); dev = NULL; dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) if (cdp->cdp_inode == ud) { dev = &cdp->cdp_c; dev_refl(dev); break; } dev_unlock(); if (dev == NULL) return (ENOENT); error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1); dev_rel(dev); return (error); } SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE, NULL, 0, sysctl_devname, "", "devname(3) handler"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct cdev), "sizeof(struct cdev)"); SYSCTL_INT(_debug_sizeof, OID_AUTO, cdev_priv, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, sizeof(struct cdev_priv), "sizeof(struct cdev_priv)"); struct cdev * devfs_alloc(int flags) { struct cdev_priv *cdp; struct cdev *cdev; struct timespec ts; cdp = malloc(sizeof *cdp, M_CDEVP, M_ZERO | ((flags & MAKEDEV_NOWAIT) ? M_NOWAIT : M_WAITOK)); if (cdp == NULL) return (NULL); cdp->cdp_dirents = &cdp->cdp_dirent0; cdev = &cdp->cdp_c; LIST_INIT(&cdev->si_children); vfs_timestamp(&ts); cdev->si_atime = cdev->si_mtime = cdev->si_ctime = ts; return (cdev); } int devfs_dev_exists(const char *name) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { if ((cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (devfs_pathpath(cdp->cdp_c.si_name, name) != 0) return (1); if (devfs_pathpath(name, cdp->cdp_c.si_name) != 0) return (1); } if (devfs_dir_find(name) != 0) return (1); return (0); } void devfs_free(struct cdev *cdev) { struct cdev_priv *cdp; cdp = cdev2priv(cdev); if (cdev->si_cred != NULL) crfree(cdev->si_cred); devfs_free_cdp_inode(cdp->cdp_inode); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); free(cdp, M_CDEVP); } struct devfs_dirent * devfs_find(struct devfs_dirent *dd, const char *name, int namelen, int type) { struct devfs_dirent *de; TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (namelen != de->de_dirent->d_namlen) continue; if (type != 0 && type != de->de_dirent->d_type) continue; /* * The race with finding non-active name is not * completely closed by the check, but it is similar * to the devfs_allocv() in making it unlikely enough. */ if (de->de_dirent->d_type == DT_CHR && (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (bcmp(name, de->de_dirent->d_name, namelen) != 0) continue; break; } KASSERT(de == NULL || (de->de_flags & DE_DOOMED) == 0, ("devfs_find: returning a doomed entry")); return (de); } struct devfs_dirent * devfs_newdirent(char *name, int namelen) { int i; struct devfs_dirent *de; struct dirent d; d.d_namlen = namelen; i = sizeof(*de) + GENERIC_DIRSIZ(&d); de = malloc(i, M_DEVFS3, M_WAITOK | M_ZERO); de->de_dirent = (struct dirent *)(de + 1); de->de_dirent->d_namlen = namelen; de->de_dirent->d_reclen = GENERIC_DIRSIZ(&d); bcopy(name, de->de_dirent->d_name, namelen); de->de_dirent->d_name[namelen] = '\0'; vfs_timestamp(&de->de_ctime); de->de_mtime = de->de_atime = de->de_ctime; de->de_links = 1; de->de_holdcnt = 1; #ifdef MAC mac_devfs_init(de); #endif return (de); } struct devfs_dirent * devfs_parent_dirent(struct devfs_dirent *de) { if (de->de_dirent->d_type != DT_DIR) return (de->de_dir); if (de->de_flags & (DE_DOT | DE_DOTDOT)) return (NULL); de = TAILQ_FIRST(&de->de_dlist); /* "." */ if (de == NULL) return (NULL); de = TAILQ_NEXT(de, de_list); /* ".." */ if (de == NULL) return (NULL); return (de->de_dir); } struct devfs_dirent * devfs_vmkdir(struct devfs_mount *dmp, char *name, int namelen, struct devfs_dirent *dotdot, u_int inode) { struct devfs_dirent *dd; struct devfs_dirent *de; /* Create the new directory */ dd = devfs_newdirent(name, namelen); TAILQ_INIT(&dd->de_dlist); dd->de_dirent->d_type = DT_DIR; dd->de_mode = 0555; dd->de_links = 2; dd->de_dir = dd; if (inode != 0) dd->de_inode = inode; else dd->de_inode = alloc_unr(devfs_inos); /* * "." and ".." are always the two first entries in the * de_dlist list. * * Create the "." entry in the new directory. */ de = devfs_newdirent(".", 1); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); de->de_dir = dd; /* Create the ".." entry in the new directory. */ de = devfs_newdirent("..", 2); de->de_dirent->d_type = DT_DIR; de->de_flags |= DE_DOTDOT; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); if (dotdot == NULL) { de->de_dir = dd; } else { de->de_dir = dotdot; sx_assert(&dmp->dm_lock, SX_XLOCKED); TAILQ_INSERT_TAIL(&dotdot->de_dlist, dd, de_list); dotdot->de_links++; devfs_rules_apply(dmp, dd); } #ifdef MAC mac_devfs_create_directory(dmp->dm_mount, name, namelen, dd); #endif return (dd); } void devfs_dirent_free(struct devfs_dirent *de) { struct vnode *vp; vp = de->de_vnode; mtx_lock(&devfs_de_interlock); if (vp != NULL && vp->v_data == de) vp->v_data = NULL; mtx_unlock(&devfs_de_interlock); free(de, M_DEVFS3); } /* * Removes a directory if it is empty. Also empty parent directories are * removed recursively. */ static void devfs_rmdir_empty(struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_dirent *dd, *de_dot, *de_dotdot; sx_assert(&dm->dm_lock, SX_XLOCKED); for (;;) { KASSERT(de->de_dirent->d_type == DT_DIR, ("devfs_rmdir_empty: de is not a directory")); if ((de->de_flags & DE_DOOMED) != 0 || de == dm->dm_rootdir) return; de_dot = TAILQ_FIRST(&de->de_dlist); KASSERT(de_dot != NULL, ("devfs_rmdir_empty: . missing")); de_dotdot = TAILQ_NEXT(de_dot, de_list); KASSERT(de_dotdot != NULL, ("devfs_rmdir_empty: .. missing")); /* Return if the directory is not empty. */ if (TAILQ_NEXT(de_dotdot, de_list) != NULL) return; dd = devfs_parent_dirent(de); KASSERT(dd != NULL, ("devfs_rmdir_empty: NULL dd")); TAILQ_REMOVE(&de->de_dlist, de_dot, de_list); TAILQ_REMOVE(&de->de_dlist, de_dotdot, de_list); TAILQ_REMOVE(&dd->de_dlist, de, de_list); DEVFS_DE_HOLD(dd); devfs_delete(dm, de, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dot, DEVFS_DEL_NORECURSE); devfs_delete(dm, de_dotdot, DEVFS_DEL_NORECURSE); if (DEVFS_DE_DROP(dd)) { devfs_dirent_free(dd); return; } de = dd; } } /* * The caller needs to hold the dm for the duration of the call since * dm->dm_lock may be temporary dropped. */ void devfs_delete(struct devfs_mount *dm, struct devfs_dirent *de, int flags) { struct devfs_dirent *dd; struct vnode *vp; KASSERT((de->de_flags & DE_DOOMED) == 0, ("devfs_delete doomed dirent")); de->de_flags |= DE_DOOMED; if ((flags & DEVFS_DEL_NORECURSE) == 0) { dd = devfs_parent_dirent(de); if (dd != NULL) DEVFS_DE_HOLD(dd); if (de->de_flags & DE_USER) { KASSERT(dd != NULL, ("devfs_delete: NULL dd")); devfs_dir_unref_de(dm, dd); } } else dd = NULL; mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); vholdl(vp); sx_unlock(&dm->dm_lock); if ((flags & DEVFS_DEL_VNLOCKED) == 0) vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY); else VI_UNLOCK(vp); vgone(vp); if ((flags & DEVFS_DEL_VNLOCKED) == 0) VOP_UNLOCK(vp, 0); vdrop(vp); sx_xlock(&dm->dm_lock); } else mtx_unlock(&devfs_de_interlock); if (de->de_symlink) { free(de->de_symlink, M_DEVFS); de->de_symlink = NULL; } #ifdef MAC mac_devfs_destroy(de); #endif if (de->de_inode > DEVFS_ROOTINO) { devfs_free_cdp_inode(de->de_inode); de->de_inode = 0; } if (DEVFS_DE_DROP(de)) devfs_dirent_free(de); if (dd != NULL) { if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else devfs_rmdir_empty(dm, dd); } } /* * Called on unmount. * Recursively removes the entire tree. * The caller needs to hold the dm for the duration of the call. */ static void devfs_purge(struct devfs_mount *dm, struct devfs_dirent *dd) { struct devfs_dirent *de; sx_assert(&dm->dm_lock, SX_XLOCKED); DEVFS_DE_HOLD(dd); for (;;) { /* * Use TAILQ_LAST() to remove "." and ".." last. * We might need ".." to resolve a path in * devfs_dir_unref_de(). */ de = TAILQ_LAST(&dd->de_dlist, devfs_dlist_head); if (de == NULL) break; TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_flags & DE_USER) devfs_dir_unref_de(dm, dd); if (de->de_flags & (DE_DOT | DE_DOTDOT)) devfs_delete(dm, de, DEVFS_DEL_NORECURSE); else if (de->de_dirent->d_type == DT_DIR) devfs_purge(dm, de); else devfs_delete(dm, de, DEVFS_DEL_NORECURSE); } if (DEVFS_DE_DROP(dd)) devfs_dirent_free(dd); else if ((dd->de_flags & DE_DOOMED) == 0) devfs_delete(dm, dd, DEVFS_DEL_NORECURSE); } /* * Each cdev_priv has an array of pointers to devfs_dirent which is indexed * by the mount points dm_idx. * This function extends the array when necessary, taking into account that * the default array is 1 element and not malloc'ed. */ static void devfs_metoo(struct cdev_priv *cdp, struct devfs_mount *dm) { struct devfs_dirent **dep; int siz; siz = (dm->dm_idx + 1) * sizeof *dep; dep = malloc(siz, M_DEVFS2, M_WAITOK | M_ZERO); dev_lock(); if (dm->dm_idx <= cdp->cdp_maxdirent) { /* We got raced */ dev_unlock(); free(dep, M_DEVFS2); return; } memcpy(dep, cdp->cdp_dirents, (cdp->cdp_maxdirent + 1) * sizeof *dep); if (cdp->cdp_maxdirent > 0) free(cdp->cdp_dirents, M_DEVFS2); cdp->cdp_dirents = dep; /* * XXX: if malloc told us how much we actually got this could * XXX: be optimized. */ cdp->cdp_maxdirent = dm->dm_idx; dev_unlock(); } /* * The caller needs to hold the dm for the duration of the call. */ static int devfs_populate_loop(struct devfs_mount *dm, int cleanup) { struct cdev_priv *cdp; struct devfs_dirent *de; struct devfs_dirent *dd, *dt; struct cdev *pdev; int de_flags, depth, j; char *q, *s; sx_assert(&dm->dm_lock, SX_XLOCKED); dev_lock(); TAILQ_FOREACH(cdp, &cdevp_list, cdp_list) { KASSERT(cdp->cdp_dirents != NULL, ("NULL cdp_dirents")); /* * If we are unmounting, or the device has been destroyed, * clean up our dirent. */ if ((cleanup || !(cdp->cdp_flags & CDP_ACTIVE)) && dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; cdp->cdp_dirents[dm->dm_idx] = NULL; KASSERT(cdp == de->de_cdp, ("%s %d %s %p %p", __func__, __LINE__, cdp->cdp_c.si_name, cdp, de->de_cdp)); KASSERT(de->de_dir != NULL, ("Null de->de_dir")); dev_unlock(); TAILQ_REMOVE(&de->de_dir->de_dlist, de, de_list); de->de_cdp = NULL; de->de_inode = 0; devfs_delete(dm, de, 0); dev_lock(); cdp->cdp_inuse--; dev_unlock(); return (1); } /* * GC any lingering devices */ if (!(cdp->cdp_flags & CDP_ACTIVE)) { if (cdp->cdp_inuse > 0) continue; TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); return (1); } /* * Don't create any new dirents if we are unmounting */ if (cleanup) continue; KASSERT((cdp->cdp_flags & CDP_ACTIVE), ("Bogons, I tell ya'!")); if (dm->dm_idx <= cdp->cdp_maxdirent && cdp->cdp_dirents[dm->dm_idx] != NULL) { de = cdp->cdp_dirents[dm->dm_idx]; KASSERT(cdp == de->de_cdp, ("inconsistent cdp")); continue; } cdp->cdp_inuse++; dev_unlock(); if (dm->dm_idx > cdp->cdp_maxdirent) devfs_metoo(cdp, dm); dd = dm->dm_rootdir; s = cdp->cdp_c.si_name; for (;;) { for (q = s; *q != '/' && *q != '\0'; q++) continue; if (*q != '/') break; de = devfs_find(dd, s, q - s, 0); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); else if (de->de_dirent->d_type == DT_LNK) { de = devfs_find(dd, s, q - s, DT_DIR); if (de == NULL) de = devfs_vmkdir(dm, s, q - s, dd, 0); de->de_flags |= DE_COVERED; } s = q + 1; dd = de; KASSERT(dd->de_dirent->d_type == DT_DIR && (dd->de_flags & (DE_DOT | DE_DOTDOT)) == 0, ("%s: invalid directory (si_name=%s)", __func__, cdp->cdp_c.si_name)); } de_flags = 0; de = devfs_find(dd, s, q - s, DT_LNK); if (de != NULL) de_flags |= DE_COVERED; de = devfs_newdirent(s, q - s); if (cdp->cdp_c.si_flags & SI_ALIAS) { de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_dirent->d_type = DT_LNK; pdev = cdp->cdp_c.si_parent; dt = dd; depth = 0; while (dt != dm->dm_rootdir && (dt = devfs_parent_dirent(dt)) != NULL) depth++; j = depth * 3 + strlen(pdev->si_name) + 1; de->de_symlink = malloc(j, M_DEVFS, M_WAITOK); de->de_symlink[0] = 0; while (depth-- > 0) strcat(de->de_symlink, "../"); strcat(de->de_symlink, pdev->si_name); } else { de->de_uid = cdp->cdp_c.si_uid; de->de_gid = cdp->cdp_c.si_gid; de->de_mode = cdp->cdp_c.si_mode; de->de_dirent->d_type = DT_CHR; } de->de_flags |= de_flags; de->de_inode = cdp->cdp_inode; de->de_cdp = cdp; #ifdef MAC mac_devfs_create_device(cdp->cdp_c.si_cred, dm->dm_mount, &cdp->cdp_c, de); #endif de->de_dir = dd; TAILQ_INSERT_TAIL(&dd->de_dlist, de, de_list); devfs_rules_apply(dm, de); dev_lock(); /* XXX: could check that cdp is still active here */ KASSERT(cdp->cdp_dirents[dm->dm_idx] == NULL, ("%s %d\n", __func__, __LINE__)); cdp->cdp_dirents[dm->dm_idx] = de; KASSERT(de->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); dev_unlock(); return (1); } dev_unlock(); return (0); } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_populate(struct devfs_mount *dm) { unsigned gen; sx_assert(&dm->dm_lock, SX_XLOCKED); gen = devfs_generation; if (dm->dm_generation == gen) return; while (devfs_populate_loop(dm, 0)) continue; dm->dm_generation = gen; } /* * The caller needs to hold the dm for the duration of the call. */ void devfs_cleanup(struct devfs_mount *dm) { sx_assert(&dm->dm_lock, SX_XLOCKED); while (devfs_populate_loop(dm, 1)) continue; devfs_purge(dm, dm->dm_rootdir); } /* * devfs_create() and devfs_destroy() are called from kern_conf.c and * in both cases the devlock() mutex is held, so no further locking * is necessary and no sleeping allowed. */ void devfs_create(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags |= CDP_ACTIVE; cdp->cdp_inode = alloc_unrl(devfs_inos); dev_refl(dev); TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list); devfs_generation++; } void devfs_destroy(struct cdev *dev) { struct cdev_priv *cdp; mtx_assert(&devmtx, MA_OWNED); cdp = cdev2priv(dev); cdp->cdp_flags &= ~CDP_ACTIVE; devfs_generation++; } ino_t devfs_alloc_cdp_inode(void) { return (alloc_unr(devfs_inos)); } void devfs_free_cdp_inode(ino_t ino) { if (ino > 0) free_unr(devfs_inos, ino); } static void devfs_devs_init(void *junk __unused) { devfs_inos = new_unrhdr(DEVFS_ROOTINO + 1, INT_MAX, &devmtx); } SYSINIT(devfs_devs, SI_SUB_DEVFS, SI_ORDER_FIRST, devfs_devs_init, NULL); Index: head/sys/fs/devfs/devfs_dir.c =================================================================== --- head/sys/fs/devfs/devfs_dir.c (revision 326267) +++ head/sys/fs/devfs/devfs_dir.c (revision 326268) @@ -1,175 +1,177 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010 Jaakko Heinonen * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include struct dirlistent { char *dir; int refcnt; LIST_ENTRY(dirlistent) link; }; static LIST_HEAD(, dirlistent) devfs_dirlist = LIST_HEAD_INITIALIZER(devfs_dirlist); static MALLOC_DEFINE(M_DEVFS4, "DEVFS4", "DEVFS directory list"); static struct mtx dirlist_mtx; MTX_SYSINIT(dirlist_mtx, &dirlist_mtx, "devfs dirlist lock", MTX_DEF); /* Returns 1 if the path is in the directory list. */ int devfs_dir_find(const char *path) { struct dirlistent *dle; mtx_lock(&dirlist_mtx); LIST_FOREACH(dle, &devfs_dirlist, link) { if (devfs_pathpath(dle->dir, path) != 0) { mtx_unlock(&dirlist_mtx); return (1); } } mtx_unlock(&dirlist_mtx); return (0); } static struct dirlistent * devfs_dir_findent_locked(const char *dir) { struct dirlistent *dle; mtx_assert(&dirlist_mtx, MA_OWNED); LIST_FOREACH(dle, &devfs_dirlist, link) { if (strcmp(dir, dle->dir) == 0) return (dle); } return (NULL); } static void devfs_dir_ref(const char *dir) { struct dirlistent *dle, *dle_new; if (*dir == '\0') return; dle_new = malloc(sizeof(*dle), M_DEVFS4, M_WAITOK); dle_new->dir = strdup(dir, M_DEVFS4); dle_new->refcnt = 1; mtx_lock(&dirlist_mtx); dle = devfs_dir_findent_locked(dir); if (dle != NULL) { dle->refcnt++; mtx_unlock(&dirlist_mtx); free(dle_new->dir, M_DEVFS4); free(dle_new, M_DEVFS4); return; } LIST_INSERT_HEAD(&devfs_dirlist, dle_new, link); mtx_unlock(&dirlist_mtx); } void devfs_dir_ref_de(struct devfs_mount *dm, struct devfs_dirent *de) { char dirname[SPECNAMELEN + 1], *namep; namep = devfs_fqpn(dirname, dm, de, NULL); KASSERT(namep != NULL, ("devfs_ref_dir_de: NULL namep")); devfs_dir_ref(namep); } static void devfs_dir_unref(const char *dir) { struct dirlistent *dle; if (*dir == '\0') return; mtx_lock(&dirlist_mtx); dle = devfs_dir_findent_locked(dir); KASSERT(dle != NULL, ("devfs_dir_unref: dir %s not referenced", dir)); dle->refcnt--; KASSERT(dle->refcnt >= 0, ("devfs_dir_unref: negative refcnt")); if (dle->refcnt == 0) { LIST_REMOVE(dle, link); mtx_unlock(&dirlist_mtx); free(dle->dir, M_DEVFS4); free(dle, M_DEVFS4); } else mtx_unlock(&dirlist_mtx); } void devfs_dir_unref_de(struct devfs_mount *dm, struct devfs_dirent *de) { char dirname[SPECNAMELEN + 1], *namep; namep = devfs_fqpn(dirname, dm, de, NULL); KASSERT(namep != NULL, ("devfs_unref_dir_de: NULL namep")); devfs_dir_unref(namep); } /* Returns 1 if the path p1 contains the path p2. */ int devfs_pathpath(const char *p1, const char *p2) { for (;;p1++, p2++) { if (*p1 != *p2) { if (*p1 == '/' && *p2 == '\0') return (1); else return (0); } else if (*p1 == '\0') return (1); } /* NOTREACHED */ } Index: head/sys/fs/devfs/devfs_int.h =================================================================== --- head/sys/fs/devfs/devfs_int.h (revision 326267) +++ head/sys/fs/devfs/devfs_int.h (revision 326268) @@ -1,96 +1,98 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2005 Poul-Henning Kamp. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * This file documents a private interface and it SHALL only be used * by kern/kern_conf.c and fs/devfs/... */ #ifndef _FS_DEVFS_DEVFS_INT_H_ #define _FS_DEVFS_DEVFS_INT_H_ #include #ifdef _KERNEL struct devfs_dirent; struct devfs_mount; struct cdev_privdata { struct file *cdpd_fp; void *cdpd_data; void (*cdpd_dtr)(void *); LIST_ENTRY(cdev_privdata) cdpd_list; }; struct cdev_priv { struct cdev cdp_c; TAILQ_ENTRY(cdev_priv) cdp_list; u_int cdp_inode; u_int cdp_flags; #define CDP_ACTIVE (1 << 0) #define CDP_SCHED_DTR (1 << 1) #define CDP_UNREF_DTR (1 << 2) u_int cdp_inuse; u_int cdp_maxdirent; struct devfs_dirent **cdp_dirents; struct devfs_dirent *cdp_dirent0; TAILQ_ENTRY(cdev_priv) cdp_dtr_list; void (*cdp_dtr_cb)(void *); void *cdp_dtr_cb_arg; LIST_HEAD(, cdev_privdata) cdp_fdpriv; }; #define cdev2priv(c) __containerof(c, struct cdev_priv, cdp_c) struct cdev *devfs_alloc(int); int devfs_dev_exists(const char *); void devfs_free(struct cdev *); void devfs_create(struct cdev *); void devfs_destroy(struct cdev *); void devfs_destroy_cdevpriv(struct cdev_privdata *); int devfs_dir_find(const char *); void devfs_dir_ref_de(struct devfs_mount *, struct devfs_dirent *); void devfs_dir_unref_de(struct devfs_mount *, struct devfs_dirent *); int devfs_pathpath(const char *, const char *); extern struct unrhdr *devfs_inos; extern struct mtx devmtx; extern struct mtx devfs_de_interlock; extern struct sx clone_drain_lock; extern struct mtx cdevpriv_mtx; extern TAILQ_HEAD(cdev_priv_list, cdev_priv) cdevp_list; #endif /* _KERNEL */ #endif /* !_FS_DEVFS_DEVFS_INT_H_ */ Index: head/sys/fs/devfs/devfs_rule.c =================================================================== --- head/sys/fs/devfs/devfs_rule.c (revision 326267) +++ head/sys/fs/devfs/devfs_rule.c (revision 326268) @@ -1,822 +1,824 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2002 Dima Dorfman. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * DEVFS ruleset implementation. * * A note on terminology: To "run" a rule on a dirent is to take the * prescribed action; to "apply" a rule is to check whether it matches * a dirent and run if if it does. * * A note on locking: Only foreign entry points (non-static functions) * should deal with locking. Everything else assumes we already hold * the required kind of lock. * * A note on namespace: devfs_rules_* are the non-static functions for * the entire "ruleset" subsystem, devfs_rule_* are the static * functions that operate on rules, and devfs_ruleset_* are the static * functions that operate on rulesets. The line between the last two * isn't always clear, but the guideline is still useful. * * A note on "special" identifiers: Ruleset 0 is the NULL, or empty, * ruleset; it cannot be deleted or changed in any way. This may be * assumed inside the code; e.g., a ruleset of 0 may be interpeted to * mean "no ruleset". The interpretation of rule 0 is * command-dependent, but in no case is there a real rule with number * 0. * * A note on errno codes: To make it easier for the userland to tell * what went wrong, we sometimes use errno codes that are not entirely * appropriate for the error but that would be less ambiguous than the * appropriate "generic" code. For example, when we can't find a * ruleset, we return ESRCH instead of ENOENT (except in * DEVFSIO_{R,S}GETNEXT, where a nonexistent ruleset means "end of * list", and the userland expects ENOENT to be this indicator); this * way, when an operation fails, it's clear that what couldn't be * found is a ruleset and not a rule (well, it's clear to those who * know the convention). */ #include #include #include #include #include #include #include #include #include #include #include #include /* * Kernel version of devfs_rule. */ struct devfs_krule { TAILQ_ENTRY(devfs_krule) dk_list; struct devfs_ruleset *dk_ruleset; struct devfs_rule dk_rule; }; TAILQ_HEAD(rulehead, devfs_krule); static MALLOC_DEFINE(M_DEVFSRULE, "DEVFS_RULE", "DEVFS rule storage"); /* * Structure to describe a ruleset. */ struct devfs_ruleset { TAILQ_ENTRY(devfs_ruleset) ds_list; struct rulehead ds_rules; devfs_rsnum ds_number; int ds_refcount; }; static devfs_rid devfs_rid_input(devfs_rid rid, struct devfs_mount *dm); static void devfs_rule_applyde_recursive(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de); static void devfs_rule_applydm(struct devfs_krule *dk, struct devfs_mount *dm); static int devfs_rule_autonumber(struct devfs_ruleset *ds, devfs_rnum *rnp); static struct devfs_krule *devfs_rule_byid(devfs_rid rid); static int devfs_rule_delete(struct devfs_krule *dkp); static struct cdev *devfs_rule_getdev(struct devfs_dirent *de); static int devfs_rule_input(struct devfs_rule *dr, struct devfs_mount *dm); static int devfs_rule_insert(struct devfs_rule *dr); static int devfs_rule_match(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de); static int devfs_rule_matchpath(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de); static void devfs_rule_run(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de, unsigned depth); static void devfs_ruleset_applyde(struct devfs_ruleset *ds, struct devfs_mount *dm, struct devfs_dirent *de, unsigned depth); static void devfs_ruleset_applydm(struct devfs_ruleset *ds, struct devfs_mount *dm); static struct devfs_ruleset *devfs_ruleset_bynum(devfs_rsnum rsnum); static struct devfs_ruleset *devfs_ruleset_create(devfs_rsnum rsnum); static void devfs_ruleset_reap(struct devfs_ruleset *dsp); static int devfs_ruleset_use(devfs_rsnum rsnum, struct devfs_mount *dm); static struct sx sx_rules; SX_SYSINIT(sx_rules, &sx_rules, "DEVFS ruleset lock"); static TAILQ_HEAD(, devfs_ruleset) devfs_rulesets = TAILQ_HEAD_INITIALIZER(devfs_rulesets); /* * Called to apply the proper rules for 'de' before it can be * exposed to the userland. This should be called with an exclusive * lock on dm in case we need to run anything. */ void devfs_rules_apply(struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_ruleset *ds; sx_assert(&dm->dm_lock, SX_XLOCKED); if (dm->dm_ruleset == 0) return; sx_slock(&sx_rules); ds = devfs_ruleset_bynum(dm->dm_ruleset); KASSERT(ds != NULL, ("mount-point has NULL ruleset")); devfs_ruleset_applyde(ds, dm, de, devfs_rule_depth); sx_sunlock(&sx_rules); } /* * Rule subsystem ioctl hook. */ int devfs_rules_ioctl(struct devfs_mount *dm, u_long cmd, caddr_t data, struct thread *td) { struct devfs_ruleset *ds; struct devfs_krule *dk; struct devfs_rule *dr; devfs_rsnum rsnum; devfs_rnum rnum; devfs_rid rid; int error; sx_assert(&dm->dm_lock, SX_XLOCKED); /* * XXX: This returns an error regardless of whether we actually * support the cmd or not. * * We could make this privileges finer grained if desired. */ error = priv_check(td, PRIV_DEVFS_RULE); if (error) return (error); sx_xlock(&sx_rules); switch (cmd) { case DEVFSIO_RADD: dr = (struct devfs_rule *)data; error = devfs_rule_input(dr, dm); if (error != 0) break; dk = devfs_rule_byid(dr->dr_id); if (dk != NULL) { error = EEXIST; break; } if (rid2rsn(dr->dr_id) == 0) { error = EIO; break; } error = devfs_rule_insert(dr); break; case DEVFSIO_RAPPLY: dr = (struct devfs_rule *)data; error = devfs_rule_input(dr, dm); if (error != 0) break; /* * This is one of many possible hackish * implementations. The primary contender is an * implementation where the rule we read in is * temporarily inserted into some ruleset, perhaps * with a hypothetical DRO_NOAUTO flag so that it * doesn't get used where it isn't intended, and * applied in the normal way. This can be done in the * userland (DEVFSIO_ADD, DEVFSIO_APPLYID, * DEVFSIO_DEL) or in the kernel; either way it breaks * some corner case assumptions in other parts of the * code (not that this implementation doesn't do * that). */ if (dr->dr_iacts & DRA_INCSET && devfs_ruleset_bynum(dr->dr_incset) == NULL) { error = ESRCH; break; } dk = malloc(sizeof(*dk), M_TEMP, M_WAITOK | M_ZERO); memcpy(&dk->dk_rule, dr, sizeof(*dr)); devfs_rule_applydm(dk, dm); free(dk, M_TEMP); break; case DEVFSIO_RAPPLYID: rid = *(devfs_rid *)data; rid = devfs_rid_input(rid, dm); dk = devfs_rule_byid(rid); if (dk == NULL) { error = ENOENT; break; } devfs_rule_applydm(dk, dm); break; case DEVFSIO_RDEL: rid = *(devfs_rid *)data; rid = devfs_rid_input(rid, dm); dk = devfs_rule_byid(rid); if (dk == NULL) { error = ENOENT; break; } ds = dk->dk_ruleset; error = devfs_rule_delete(dk); break; case DEVFSIO_RGETNEXT: dr = (struct devfs_rule *)data; error = devfs_rule_input(dr, dm); if (error != 0) break; /* * We can't use devfs_rule_byid() here since that * requires the rule specified to exist, but we want * getnext(N) to work whether there is a rule N or not * (specifically, getnext(0) must work, but we should * never have a rule 0 since the add command * interprets 0 to mean "auto-number"). */ ds = devfs_ruleset_bynum(rid2rsn(dr->dr_id)); if (ds == NULL) { error = ENOENT; break; } rnum = rid2rn(dr->dr_id); TAILQ_FOREACH(dk, &ds->ds_rules, dk_list) { if (rid2rn(dk->dk_rule.dr_id) > rnum) break; } if (dk == NULL) { error = ENOENT; break; } memcpy(dr, &dk->dk_rule, sizeof(*dr)); break; case DEVFSIO_SUSE: rsnum = *(devfs_rsnum *)data; error = devfs_ruleset_use(rsnum, dm); break; case DEVFSIO_SAPPLY: rsnum = *(devfs_rsnum *)data; rsnum = rid2rsn(devfs_rid_input(mkrid(rsnum, 0), dm)); ds = devfs_ruleset_bynum(rsnum); if (ds == NULL) { error = ESRCH; break; } devfs_ruleset_applydm(ds, dm); break; case DEVFSIO_SGETNEXT: rsnum = *(devfs_rsnum *)data; TAILQ_FOREACH(ds, &devfs_rulesets, ds_list) { if (ds->ds_number > rsnum) break; } if (ds == NULL) { error = ENOENT; break; } *(devfs_rsnum *)data = ds->ds_number; break; default: error = ENOIOCTL; break; } sx_xunlock(&sx_rules); return (error); } /* * Adjust the rule identifier to use the ruleset of dm if one isn't * explicitly specified. * * Note that after this operation, rid2rsn(rid) might still be 0, and * that's okay; ruleset 0 is a valid ruleset, but when it's read in * from the userland, it means "current ruleset for this mount-point". */ static devfs_rid devfs_rid_input(devfs_rid rid, struct devfs_mount *dm) { if (rid2rsn(rid) == 0) return (mkrid(dm->dm_ruleset, rid2rn(rid))); else return (rid); } /* * Apply dk to de and everything under de. * * XXX: This method needs a function call for every nested * subdirectory in a devfs mount. If we plan to have many of these, * we might eventually run out of kernel stack space. * XXX: a linear search could be done through the cdev list instead. */ static void devfs_rule_applyde_recursive(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_dirent *de2; TAILQ_FOREACH(de2, &de->de_dlist, de_list) devfs_rule_applyde_recursive(dk, dm, de2); devfs_rule_run(dk, dm, de, devfs_rule_depth); } /* * Apply dk to all entires in dm. */ static void devfs_rule_applydm(struct devfs_krule *dk, struct devfs_mount *dm) { devfs_rule_applyde_recursive(dk, dm, dm->dm_rootdir); } /* * Automatically select a number for a new rule in ds, and write the * result into rnump. */ static int devfs_rule_autonumber(struct devfs_ruleset *ds, devfs_rnum *rnump) { struct devfs_krule *dk; /* Find the last rule. */ dk = TAILQ_LAST(&ds->ds_rules, rulehead); if (dk == NULL) *rnump = 100; else { *rnump = rid2rn(dk->dk_rule.dr_id) + 100; /* Detect overflow. */ if (*rnump < rid2rn(dk->dk_rule.dr_id)) return (ERANGE); } KASSERT(devfs_rule_byid(mkrid(ds->ds_number, *rnump)) == NULL, ("autonumbering resulted in an already existing rule")); return (0); } /* * Find a krule by id. */ static struct devfs_krule * devfs_rule_byid(devfs_rid rid) { struct devfs_ruleset *ds; struct devfs_krule *dk; devfs_rnum rn; rn = rid2rn(rid); ds = devfs_ruleset_bynum(rid2rsn(rid)); if (ds == NULL) return (NULL); TAILQ_FOREACH(dk, &ds->ds_rules, dk_list) { if (rid2rn(dk->dk_rule.dr_id) == rn) return (dk); else if (rid2rn(dk->dk_rule.dr_id) > rn) break; } return (NULL); } /* * Remove dkp from any lists it may be on and remove memory associated * with it. */ static int devfs_rule_delete(struct devfs_krule *dk) { struct devfs_ruleset *ds; if (dk->dk_rule.dr_iacts & DRA_INCSET) { ds = devfs_ruleset_bynum(dk->dk_rule.dr_incset); KASSERT(ds != NULL, ("DRA_INCSET but bad dr_incset")); --ds->ds_refcount; devfs_ruleset_reap(ds); } ds = dk->dk_ruleset; TAILQ_REMOVE(&ds->ds_rules, dk, dk_list); devfs_ruleset_reap(ds); free(dk, M_DEVFSRULE); return (0); } /* * Get a struct cdev *corresponding to de so we can try to match rules based * on it. If this routine returns NULL, there is no struct cdev *associated * with the dirent (symlinks and directories don't have dev_ts), and * the caller should assume that any critera dependent on a dev_t * don't match. */ static struct cdev * devfs_rule_getdev(struct devfs_dirent *de) { if (de->de_cdp == NULL) return (NULL); if (de->de_cdp->cdp_flags & CDP_ACTIVE) return (&de->de_cdp->cdp_c); else return (NULL); } /* * Do what we need to do to a rule that we just loaded from the * userland. In particular, we need to check the magic, and adjust * the ruleset appropriate if desired. */ static int devfs_rule_input(struct devfs_rule *dr, struct devfs_mount *dm) { if (dr->dr_magic != DEVFS_MAGIC) return (ERPCMISMATCH); dr->dr_id = devfs_rid_input(dr->dr_id, dm); return (0); } /* * Import dr into the appropriate place in the kernel (i.e., make a * krule). The value of dr is copied, so the pointer may be destroyed * after this call completes. */ static int devfs_rule_insert(struct devfs_rule *dr) { struct devfs_ruleset *ds, *dsi; struct devfs_krule *k1; struct devfs_krule *dk; devfs_rsnum rsnum; devfs_rnum dkrn; int error; /* * This stuff seems out of place here, but we want to do it as * soon as possible so that if it fails, we don't have to roll * back any changes we already made (e.g., ruleset creation). */ if (dr->dr_iacts & DRA_INCSET) { dsi = devfs_ruleset_bynum(dr->dr_incset); if (dsi == NULL) return (ESRCH); } else dsi = NULL; rsnum = rid2rsn(dr->dr_id); KASSERT(rsnum != 0, ("Inserting into ruleset zero")); ds = devfs_ruleset_bynum(rsnum); if (ds == NULL) ds = devfs_ruleset_create(rsnum); dkrn = rid2rn(dr->dr_id); if (dkrn == 0) { error = devfs_rule_autonumber(ds, &dkrn); if (error != 0) { devfs_ruleset_reap(ds); return (error); } } dk = malloc(sizeof(*dk), M_DEVFSRULE, M_WAITOK | M_ZERO); dk->dk_ruleset = ds; if (dsi != NULL) ++dsi->ds_refcount; /* XXX: Inspect dr? */ memcpy(&dk->dk_rule, dr, sizeof(*dr)); dk->dk_rule.dr_id = mkrid(rid2rsn(dk->dk_rule.dr_id), dkrn); TAILQ_FOREACH(k1, &ds->ds_rules, dk_list) { if (rid2rn(k1->dk_rule.dr_id) > dkrn) { TAILQ_INSERT_BEFORE(k1, dk, dk_list); break; } } if (k1 == NULL) TAILQ_INSERT_TAIL(&ds->ds_rules, dk, dk_list); return (0); } /* * Determine whether dk matches de. Returns 1 if dk should be run on * de; 0, otherwise. */ static int devfs_rule_match(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_rule *dr = &dk->dk_rule; struct cdev *dev; struct cdevsw *dsw; int ref; dev = devfs_rule_getdev(de); /* * At this point, if dev is NULL, we should assume that any * criteria that depend on it don't match. We should *not* * just ignore them (i.e., act like they weren't specified), * since that makes a rule that only has criteria dependent on * the struct cdev *match all symlinks and directories. * * Note also that the following tests are somewhat reversed: * They're actually testing to see whether the condition does * *not* match, since the default is to assume the rule should * be run (such as if there are no conditions). */ if (dr->dr_icond & DRC_DSWFLAGS) { if (dev == NULL) return (0); dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (0); if ((dsw->d_flags & dr->dr_dswflags) == 0) { dev_relthread(dev, ref); return (0); } dev_relthread(dev, ref); } if (dr->dr_icond & DRC_PATHPTRN) if (!devfs_rule_matchpath(dk, dm, de)) return (0); return (1); } /* * Determine whether dk matches de on account of dr_pathptrn. */ static int devfs_rule_matchpath(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de) { struct devfs_rule *dr = &dk->dk_rule; struct cdev *dev; int match; char *pname, *specname; specname = NULL; dev = devfs_rule_getdev(de); if (dev != NULL) pname = dev->si_name; else if (de->de_dirent->d_type == DT_LNK || (de->de_dirent->d_type == DT_DIR && de != dm->dm_rootdir && (de->de_flags & (DE_DOT | DE_DOTDOT)) == 0)) { specname = malloc(SPECNAMELEN + 1, M_TEMP, M_WAITOK); pname = devfs_fqpn(specname, dm, de, NULL); } else return (0); KASSERT(pname != NULL, ("devfs_rule_matchpath: NULL pname")); match = fnmatch(dr->dr_pathptrn, pname, FNM_PATHNAME) == 0; free(specname, M_TEMP); return (match); } /* * Run dk on de. */ static void devfs_rule_run(struct devfs_krule *dk, struct devfs_mount *dm, struct devfs_dirent *de, unsigned depth) { struct devfs_rule *dr = &dk->dk_rule; struct devfs_ruleset *ds; if (!devfs_rule_match(dk, dm, de)) return; if (dr->dr_iacts & DRA_BACTS) { if (dr->dr_bacts & DRB_HIDE) de->de_flags |= DE_WHITEOUT; if (dr->dr_bacts & DRB_UNHIDE) de->de_flags &= ~DE_WHITEOUT; } if (dr->dr_iacts & DRA_UID) de->de_uid = dr->dr_uid; if (dr->dr_iacts & DRA_GID) de->de_gid = dr->dr_gid; if (dr->dr_iacts & DRA_MODE) de->de_mode = dr->dr_mode; if (dr->dr_iacts & DRA_INCSET) { /* * XXX: we should tell the user if the depth is exceeded here * XXX: but it is not obvious how to. A return value will * XXX: not work as this is called when devices are created * XXX: long time after the rules were instantiated. * XXX: a printf() would probably give too much noise, or * XXX: DoS the machine. I guess a rate-limited message * XXX: might work. */ if (depth > 0) { ds = devfs_ruleset_bynum(dk->dk_rule.dr_incset); KASSERT(ds != NULL, ("DRA_INCSET but bad dr_incset")); devfs_ruleset_applyde(ds, dm, de, depth - 1); } } } /* * Apply all the rules in ds to de. */ static void devfs_ruleset_applyde(struct devfs_ruleset *ds, struct devfs_mount *dm, struct devfs_dirent *de, unsigned depth) { struct devfs_krule *dk; TAILQ_FOREACH(dk, &ds->ds_rules, dk_list) devfs_rule_run(dk, dm, de, depth); } /* * Apply all the rules in ds to all the entires in dm. */ static void devfs_ruleset_applydm(struct devfs_ruleset *ds, struct devfs_mount *dm) { struct devfs_krule *dk; /* * XXX: Does it matter whether we do * * foreach(dk in ds) * foreach(de in dm) * apply(dk to de) * * as opposed to * * foreach(de in dm) * foreach(dk in ds) * apply(dk to de) * * The end result is obviously the same, but does the order * matter? */ TAILQ_FOREACH(dk, &ds->ds_rules, dk_list) devfs_rule_applydm(dk, dm); } /* * Find a ruleset by number. */ static struct devfs_ruleset * devfs_ruleset_bynum(devfs_rsnum rsnum) { struct devfs_ruleset *ds; TAILQ_FOREACH(ds, &devfs_rulesets, ds_list) { if (ds->ds_number == rsnum) return (ds); } return (NULL); } /* * Create a new ruleset. */ static struct devfs_ruleset * devfs_ruleset_create(devfs_rsnum rsnum) { struct devfs_ruleset *s1; struct devfs_ruleset *ds; KASSERT(rsnum != 0, ("creating ruleset zero")); KASSERT(devfs_ruleset_bynum(rsnum) == NULL, ("creating already existent ruleset %d", rsnum)); ds = malloc(sizeof(*ds), M_DEVFSRULE, M_WAITOK | M_ZERO); ds->ds_number = rsnum; TAILQ_INIT(&ds->ds_rules); TAILQ_FOREACH(s1, &devfs_rulesets, ds_list) { if (s1->ds_number > rsnum) { TAILQ_INSERT_BEFORE(s1, ds, ds_list); break; } } if (s1 == NULL) TAILQ_INSERT_TAIL(&devfs_rulesets, ds, ds_list); return (ds); } /* * Remove a ruleset from the system if it's empty and not used * anywhere. This should be called after every time a rule is deleted * from this ruleset or the reference count is decremented. */ static void devfs_ruleset_reap(struct devfs_ruleset *ds) { KASSERT(ds->ds_number != 0, ("reaping ruleset zero ")); if (!TAILQ_EMPTY(&ds->ds_rules) || ds->ds_refcount != 0) return; TAILQ_REMOVE(&devfs_rulesets, ds, ds_list); free(ds, M_DEVFSRULE); } /* * Make rsnum the active ruleset for dm. */ static int devfs_ruleset_use(devfs_rsnum rsnum, struct devfs_mount *dm) { struct devfs_ruleset *cds, *ds; if (dm->dm_ruleset != 0) { cds = devfs_ruleset_bynum(dm->dm_ruleset); --cds->ds_refcount; devfs_ruleset_reap(cds); } if (rsnum == 0) { dm->dm_ruleset = 0; return (0); } ds = devfs_ruleset_bynum(rsnum); if (ds == NULL) ds = devfs_ruleset_create(rsnum); /* These should probably be made atomic somehow. */ ++ds->ds_refcount; dm->dm_ruleset = rsnum; return (0); } void devfs_rules_cleanup(struct devfs_mount *dm) { struct devfs_ruleset *ds; sx_assert(&dm->dm_lock, SX_XLOCKED); if (dm->dm_ruleset != 0) { ds = devfs_ruleset_bynum(dm->dm_ruleset); --ds->ds_refcount; devfs_ruleset_reap(ds); } } /* * Make rsnum the active ruleset for dm (locked) */ void devfs_ruleset_set(devfs_rsnum rsnum, struct devfs_mount *dm) { sx_assert(&dm->dm_lock, SX_XLOCKED); sx_xlock(&sx_rules); devfs_ruleset_use(rsnum, dm); sx_xunlock(&sx_rules); } /* * Apply the current active ruleset on a mount */ void devfs_ruleset_apply(struct devfs_mount *dm) { struct devfs_ruleset *ds; sx_assert(&dm->dm_lock, SX_XLOCKED); sx_xlock(&sx_rules); if (dm->dm_ruleset == 0) { sx_xunlock(&sx_rules); return; } ds = devfs_ruleset_bynum(dm->dm_ruleset); if (ds != NULL) devfs_ruleset_applydm(ds, dm); sx_xunlock(&sx_rules); } Index: head/sys/fs/devfs/devfs_vfsops.c =================================================================== --- head/sys/fs/devfs/devfs_vfsops.c (revision 326267) +++ head/sys/fs/devfs/devfs_vfsops.c (revision 326268) @@ -1,246 +1,248 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 1992, 1993, 1995 * The Regents of the University of California. All rights reserved. * Copyright (c) 2000 * Poul-Henning Kamp. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs_vfsops.c 8.10 (Berkeley) 5/14/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vfsops.c 1.36 * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include static struct unrhdr *devfs_unr; MALLOC_DEFINE(M_DEVFS, "DEVFS", "DEVFS data"); static vfs_mount_t devfs_mount; static vfs_unmount_t devfs_unmount; static vfs_root_t devfs_root; static vfs_statfs_t devfs_statfs; static const char *devfs_opts[] = { "from", "export", "ruleset", NULL }; /* * Mount the filesystem */ static int devfs_mount(struct mount *mp) { int error; struct devfs_mount *fmp; struct vnode *rvp; struct thread *td = curthread; int injail, rsnum; if (devfs_unr == NULL) devfs_unr = new_unrhdr(0, INT_MAX, NULL); error = 0; if (mp->mnt_flag & MNT_ROOTFS) return (EOPNOTSUPP); if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_DEVFS)) return (EPERM); rsnum = 0; injail = jailed(td->td_ucred); if (mp->mnt_optnew != NULL) { if (vfs_filteropt(mp->mnt_optnew, devfs_opts)) return (EINVAL); if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) return (EOPNOTSUPP); if (vfs_getopt(mp->mnt_optnew, "ruleset", NULL, NULL) == 0 && (vfs_scanopt(mp->mnt_optnew, "ruleset", "%d", &rsnum) != 1 || rsnum < 0 || rsnum > 65535)) { vfs_mount_error(mp, "%s", "invalid ruleset specification"); return (EINVAL); } if (injail && rsnum != 0 && rsnum != td->td_ucred->cr_prison->pr_devfs_rsnum) return (EPERM); } /* jails enforce their ruleset */ if (injail) rsnum = td->td_ucred->cr_prison->pr_devfs_rsnum; if (mp->mnt_flag & MNT_UPDATE) { if (rsnum != 0) { fmp = mp->mnt_data; if (fmp != NULL) { sx_xlock(&fmp->dm_lock); devfs_ruleset_set((devfs_rsnum)rsnum, fmp); devfs_ruleset_apply(fmp); sx_xunlock(&fmp->dm_lock); } } return (0); } fmp = malloc(sizeof *fmp, M_DEVFS, M_WAITOK | M_ZERO); fmp->dm_idx = alloc_unr(devfs_unr); sx_init(&fmp->dm_lock, "devfsmount"); fmp->dm_holdcnt = 1; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; #ifdef MAC mp->mnt_flag |= MNT_MULTILABEL; #endif MNT_IUNLOCK(mp); fmp->dm_mount = mp; mp->mnt_data = (void *) fmp; vfs_getnewfsid(mp); fmp->dm_rootdir = devfs_vmkdir(fmp, NULL, 0, NULL, DEVFS_ROOTINO); error = devfs_root(mp, LK_EXCLUSIVE, &rvp); if (error) { sx_destroy(&fmp->dm_lock); free_unr(devfs_unr, fmp->dm_idx); free(fmp, M_DEVFS); return (error); } if (rsnum != 0) { sx_xlock(&fmp->dm_lock); devfs_ruleset_set((devfs_rsnum)rsnum, fmp); sx_xunlock(&fmp->dm_lock); } VOP_UNLOCK(rvp, 0); vfs_mountedfrom(mp, "devfs"); return (0); } void devfs_unmount_final(struct devfs_mount *fmp) { sx_destroy(&fmp->dm_lock); free(fmp, M_DEVFS); } static int devfs_unmount(struct mount *mp, int mntflags) { int error; int flags = 0; struct devfs_mount *fmp; int hold; u_int idx; fmp = VFSTODEVFS(mp); KASSERT(fmp->dm_mount != NULL, ("devfs_unmount unmounted devfs_mount")); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; /* There is 1 extra root vnode reference from devfs_mount(). */ error = vflush(mp, 1, flags, curthread); if (error) return (error); sx_xlock(&fmp->dm_lock); devfs_cleanup(fmp); devfs_rules_cleanup(fmp); fmp->dm_mount = NULL; hold = --fmp->dm_holdcnt; mp->mnt_data = NULL; idx = fmp->dm_idx; sx_xunlock(&fmp->dm_lock); free_unr(devfs_unr, idx); if (hold == 0) devfs_unmount_final(fmp); return 0; } /* Return locked reference to root. */ static int devfs_root(struct mount *mp, int flags, struct vnode **vpp) { int error; struct vnode *vp; struct devfs_mount *dmp; dmp = VFSTODEVFS(mp); sx_xlock(&dmp->dm_lock); error = devfs_allocv(dmp->dm_rootdir, mp, LK_EXCLUSIVE, &vp); if (error) return (error); vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } static int devfs_statfs(struct mount *mp, struct statfs *sbp) { sbp->f_flags = 0; sbp->f_bsize = DEV_BSIZE; sbp->f_iosize = DEV_BSIZE; sbp->f_blocks = 2; /* 1K to keep df happy */ sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; return (0); } static struct vfsops devfs_vfsops = { .vfs_mount = devfs_mount, .vfs_root = devfs_root, .vfs_statfs = devfs_statfs, .vfs_unmount = devfs_unmount, }; VFS_SET(devfs_vfsops, devfs, VFCF_SYNTHETIC | VFCF_JAIL); Index: head/sys/fs/devfs/devfs_vnops.c =================================================================== --- head/sys/fs/devfs/devfs_vnops.c (revision 326267) +++ head/sys/fs/devfs/devfs_vnops.c (revision 326268) @@ -1,1948 +1,1950 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2004 * Poul-Henning Kamp. All rights reserved. * Copyright (c) 1989, 1992-1993, 1995 * The Regents of the University of California. All rights reserved. * * This code is derived from software donated to Berkeley by * Jan-Simon Pendry. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)kernfs_vnops.c 8.15 (Berkeley) 5/21/95 * From: FreeBSD: src/sys/miscfs/kernfs/kernfs_vnops.c 1.43 * * $FreeBSD$ */ /* * TODO: * mkdir: want it ? */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct vop_vector devfs_vnodeops; static struct vop_vector devfs_specops; static struct fileops devfs_ops_f; #include #include #include #include #include #include static MALLOC_DEFINE(M_CDEVPDATA, "DEVFSP", "Metainfo for cdev-fp data"); struct mtx devfs_de_interlock; MTX_SYSINIT(devfs_de_interlock, &devfs_de_interlock, "devfs interlock", MTX_DEF); struct sx clone_drain_lock; SX_SYSINIT(clone_drain_lock, &clone_drain_lock, "clone events drain lock"); struct mtx cdevpriv_mtx; MTX_SYSINIT(cdevpriv_mtx, &cdevpriv_mtx, "cdevpriv lock", MTX_DEF); SYSCTL_DECL(_vfs_devfs); static int devfs_dotimes; SYSCTL_INT(_vfs_devfs, OID_AUTO, dotimes, CTLFLAG_RW, &devfs_dotimes, 0, "Update timestamps on DEVFS with default precision"); /* * Update devfs node timestamp. Note that updates are unlocked and * stat(2) could see partially updated times. */ static void devfs_timestamp(struct timespec *tsp) { time_t ts; if (devfs_dotimes) { vfs_timestamp(tsp); } else { ts = time_second; if (tsp->tv_sec != ts) { tsp->tv_sec = ts; tsp->tv_nsec = 0; } } } static int devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp, int *ref) { *dswp = devvn_refthread(fp->f_vnode, devp, ref); if (*devp != fp->f_data) { if (*dswp != NULL) dev_relthread(*devp, *ref); return (ENXIO); } KASSERT((*devp)->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(*devp))); if (*dswp == NULL) return (ENXIO); curthread->td_fpop = fp; return (0); } int devfs_get_cdevpriv(void **datap) { struct file *fp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (EBADF); p = fp->f_cdevpriv; if (p != NULL) { error = 0; *datap = p->cdpd_data; } else error = ENOENT; return (error); } int devfs_set_cdevpriv(void *priv, d_priv_dtor_t *priv_dtr) { struct file *fp; struct cdev_priv *cdp; struct cdev_privdata *p; int error; fp = curthread->td_fpop; if (fp == NULL) return (ENOENT); cdp = cdev2priv((struct cdev *)fp->f_data); p = malloc(sizeof(struct cdev_privdata), M_CDEVPDATA, M_WAITOK); p->cdpd_data = priv; p->cdpd_dtr = priv_dtr; p->cdpd_fp = fp; mtx_lock(&cdevpriv_mtx); if (fp->f_cdevpriv == NULL) { LIST_INSERT_HEAD(&cdp->cdp_fdpriv, p, cdpd_list); fp->f_cdevpriv = p; mtx_unlock(&cdevpriv_mtx); error = 0; } else { mtx_unlock(&cdevpriv_mtx); free(p, M_CDEVPDATA); error = EBUSY; } return (error); } void devfs_destroy_cdevpriv(struct cdev_privdata *p) { mtx_assert(&cdevpriv_mtx, MA_OWNED); KASSERT(p->cdpd_fp->f_cdevpriv == p, ("devfs_destoy_cdevpriv %p != %p", p->cdpd_fp->f_cdevpriv, p)); p->cdpd_fp->f_cdevpriv = NULL; LIST_REMOVE(p, cdpd_list); mtx_unlock(&cdevpriv_mtx); (p->cdpd_dtr)(p->cdpd_data); free(p, M_CDEVPDATA); } static void devfs_fpdrop(struct file *fp) { struct cdev_privdata *p; mtx_lock(&cdevpriv_mtx); if ((p = fp->f_cdevpriv) == NULL) { mtx_unlock(&cdevpriv_mtx); return; } devfs_destroy_cdevpriv(p); } void devfs_clear_cdevpriv(void) { struct file *fp; fp = curthread->td_fpop; if (fp == NULL) return; devfs_fpdrop(fp); } /* * On success devfs_populate_vp() returns with dmp->dm_lock held. */ static int devfs_populate_vp(struct vnode *vp) { struct devfs_dirent *de; struct devfs_mount *dmp; int locked; ASSERT_VOP_LOCKED(vp, "devfs_populate_vp"); dmp = VFSTODEVFS(vp->v_mount); locked = VOP_ISLOCKED(vp); sx_xlock(&dmp->dm_lock); DEVFS_DMP_HOLD(dmp); /* Can't call devfs_populate() with the vnode lock held. */ VOP_UNLOCK(vp, 0); devfs_populate(dmp); sx_xunlock(&dmp->dm_lock); vn_lock(vp, locked | LK_RETRY); sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ERESTART); } if ((vp->v_iflag & VI_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); return (ERESTART); } de = vp->v_data; KASSERT(de != NULL, ("devfs_populate_vp: vp->v_data == NULL but vnode not doomed")); if ((de->de_flags & DE_DOOMED) != 0) { sx_xunlock(&dmp->dm_lock); return (ERESTART); } return (0); } static int devfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct devfs_mount *dmp; char *buf = ap->a_buf; int *buflen = ap->a_buflen; struct devfs_dirent *dd, *de; int i, error; dmp = VFSTODEVFS(vp->v_mount); error = devfs_populate_vp(vp); if (error != 0) return (error); i = *buflen; dd = vp->v_data; if (vp->v_type == VCHR) { i -= strlen(dd->de_cdp->cdp_c.si_name); if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_cdp->cdp_c.si_name, buf + i, strlen(dd->de_cdp->cdp_c.si_name)); de = dd->de_dir; } else if (vp->v_type == VDIR) { if (dd == dmp->dm_rootdir) { *dvp = vp; vref(*dvp); goto finished; } i -= dd->de_dirent->d_namlen; if (i < 0) { error = ENOMEM; goto finished; } bcopy(dd->de_dirent->d_name, buf + i, dd->de_dirent->d_namlen); de = dd; } else { error = ENOENT; goto finished; } *buflen = i; de = devfs_parent_dirent(de); if (de == NULL) { error = ENOENT; goto finished; } mtx_lock(&devfs_de_interlock); *dvp = de->de_vnode; if (*dvp != NULL) { VI_LOCK(*dvp); mtx_unlock(&devfs_de_interlock); vholdl(*dvp); VI_UNLOCK(*dvp); vref(*dvp); vdrop(*dvp); } else { mtx_unlock(&devfs_de_interlock); error = ENOENT; } finished: sx_xunlock(&dmp->dm_lock); return (error); } /* * Construct the fully qualified path name relative to the mountpoint. * If a NULL cnp is provided, no '/' is appended to the resulting path. */ char * devfs_fqpn(char *buf, struct devfs_mount *dmp, struct devfs_dirent *dd, struct componentname *cnp) { int i; struct devfs_dirent *de; sx_assert(&dmp->dm_lock, SA_LOCKED); i = SPECNAMELEN; buf[i] = '\0'; if (cnp != NULL) i -= cnp->cn_namelen; if (i < 0) return (NULL); if (cnp != NULL) bcopy(cnp->cn_nameptr, buf + i, cnp->cn_namelen); de = dd; while (de != dmp->dm_rootdir) { if (cnp != NULL || i < SPECNAMELEN) { i--; if (i < 0) return (NULL); buf[i] = '/'; } i -= de->de_dirent->d_namlen; if (i < 0) return (NULL); bcopy(de->de_dirent->d_name, buf + i, de->de_dirent->d_namlen); de = devfs_parent_dirent(de); if (de == NULL) return (NULL); } return (buf + i); } static int devfs_allocv_drop_refs(int drop_dm_lock, struct devfs_mount *dmp, struct devfs_dirent *de) { int not_found; not_found = 0; if (de->de_flags & DE_DOOMED) not_found = 1; if (DEVFS_DE_DROP(de)) { KASSERT(not_found == 1, ("DEVFS de dropped but not doomed")); devfs_dirent_free(de); } if (DEVFS_DMP_DROP(dmp)) { KASSERT(not_found == 1, ("DEVFS mount struct freed before dirent")); not_found = 2; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } if (not_found == 1 || (drop_dm_lock && not_found != 2)) sx_unlock(&dmp->dm_lock); return (not_found); } static void devfs_insmntque_dtr(struct vnode *vp, void *arg) { struct devfs_dirent *de; de = (struct devfs_dirent *)arg; mtx_lock(&devfs_de_interlock); vp->v_data = NULL; de->de_vnode = NULL; mtx_unlock(&devfs_de_interlock); vgone(vp); vput(vp); } /* * devfs_allocv shall be entered with dmp->dm_lock held, and it drops * it on return. */ int devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, struct vnode **vpp) { int error; struct vnode *vp; struct cdev *dev; struct devfs_mount *dmp; struct cdevsw *dsw; dmp = VFSTODEVFS(mp); if (de->de_flags & DE_DOOMED) { sx_xunlock(&dmp->dm_lock); return (ENOENT); } loop: DEVFS_DE_HOLD(de); DEVFS_DMP_HOLD(dmp); mtx_lock(&devfs_de_interlock); vp = de->de_vnode; if (vp != NULL) { VI_LOCK(vp); mtx_unlock(&devfs_de_interlock); sx_xunlock(&dmp->dm_lock); vget(vp, lockmode | LK_INTERLOCK | LK_RETRY, curthread); sx_xlock(&dmp->dm_lock); if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } else if ((vp->v_iflag & VI_DOOMED) != 0) { mtx_lock(&devfs_de_interlock); if (de->de_vnode == vp) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vput(vp); goto loop; } sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } mtx_unlock(&devfs_de_interlock); if (de->de_dirent->d_type == DT_CHR) { if (!(de->de_cdp->cdp_flags & CDP_ACTIVE)) { devfs_allocv_drop_refs(1, dmp, de); return (ENOENT); } dev = &de->de_cdp->cdp_c; } else { dev = NULL; } error = getnewvnode("devfs", mp, &devfs_vnodeops, &vp); if (error != 0) { devfs_allocv_drop_refs(1, dmp, de); printf("devfs_allocv: failed to allocate new vnode\n"); return (error); } if (de->de_dirent->d_type == DT_CHR) { vp->v_type = VCHR; VI_LOCK(vp); dev_lock(); dev_refl(dev); /* XXX: v_rdev should be protect by vnode lock */ vp->v_rdev = dev; KASSERT(vp->v_usecount == 1, ("%s %d (%d)\n", __func__, __LINE__, vp->v_usecount)); dev->si_usecount += vp->v_usecount; /* Special casing of ttys for deadfs. Probably redundant. */ dsw = dev->si_devsw; if (dsw != NULL && (dsw->d_flags & D_TTY) != 0) vp->v_vflag |= VV_ISTTY; dev_unlock(); VI_UNLOCK(vp); if ((dev->si_flags & SI_ETERNAL) != 0) vp->v_vflag |= VV_ETERNALDEV; vp->v_op = &devfs_specops; } else if (de->de_dirent->d_type == DT_DIR) { vp->v_type = VDIR; } else if (de->de_dirent->d_type == DT_LNK) { vp->v_type = VLNK; } else { vp->v_type = VBAD; } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_NOWITNESS); VN_LOCK_ASHARE(vp); mtx_lock(&devfs_de_interlock); vp->v_data = de; de->de_vnode = vp; mtx_unlock(&devfs_de_interlock); error = insmntque1(vp, mp, devfs_insmntque_dtr, de); if (error != 0) { (void) devfs_allocv_drop_refs(1, dmp, de); return (error); } if (devfs_allocv_drop_refs(0, dmp, de)) { vput(vp); return (ENOENT); } #ifdef MAC mac_devfs_vnode_associate(mp, de, vp); #endif sx_xunlock(&dmp->dm_lock); *vpp = vp; return (0); } static int devfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct devfs_dirent *de; struct proc *p; int error; de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; error = vaccess(vp->v_type, de->de_mode, de->de_uid, de->de_gid, ap->a_accmode, ap->a_cred, NULL); if (error == 0) return (0); if (error != EACCES) return (error); p = ap->a_td->td_proc; /* We do, however, allow access to the controlling terminal */ PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == de->de_cdp) error = 0; PROC_UNLOCK(p); return (error); } _Static_assert(((FMASK | FCNTLFLAGS) & (FLASTCLOSE | FREVOKE)) == 0, "devfs-only flag reuse failed"); static int devfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp, *oldvp; struct thread *td = ap->a_td; struct proc *p; struct cdev *dev = vp->v_rdev; struct cdevsw *dsw; int dflags, error, ref, vp_locked; /* * XXX: Don't call d_close() if we were called because of * XXX: insmntque1() failure. */ if (vp->v_data == NULL) return (0); /* * Hack: a tty device that is a controlling terminal * has a reference from the session structure. * We cannot easily tell that a character device is * a controlling terminal, unless it is the closing * process' controlling terminal. In that case, * if the reference count is 2 (this last descriptor * plus the session), release the reference from the session. */ if (td != NULL) { p = td->td_proc; PROC_LOCK(p); if (vp == p->p_session->s_ttyvp) { PROC_UNLOCK(p); oldvp = NULL; sx_xlock(&proctree_lock); if (vp == p->p_session->s_ttyvp) { SESS_LOCK(p->p_session); VI_LOCK(vp); if (count_dev(dev) == 2 && (vp->v_iflag & VI_DOOMED) == 0) { p->p_session->s_ttyvp = NULL; p->p_session->s_ttydp = NULL; oldvp = vp; } VI_UNLOCK(vp); SESS_UNLOCK(p->p_session); } sx_xunlock(&proctree_lock); if (oldvp != NULL) vrele(oldvp); } else PROC_UNLOCK(p); } /* * We do not want to really close the device if it * is still in use unless we are trying to close it * forcibly. Since every use (buffer, vnode, swap, cmap) * holds a reference to the vnode, and because we mark * any other vnodes that alias this device, when the * sum of the reference counts on all the aliased * vnodes descends to one, we are on last close. */ dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); dflags = 0; VI_LOCK(vp); if (vp->v_iflag & VI_DOOMED) { /* Forced close. */ dflags |= FREVOKE | FNONBLOCK; } else if (dsw->d_flags & D_TRACKCLOSE) { /* Keep device updated on status. */ } else if (count_dev(dev) > 1) { VI_UNLOCK(vp); dev_relthread(dev, ref); return (0); } if (count_dev(dev) == 1) dflags |= FLASTCLOSE; vholdl(vp); VI_UNLOCK(vp); vp_locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); KASSERT(dev->si_refcount > 0, ("devfs_close() on un-referenced struct cdev *(%s)", devtoname(dev))); error = dsw->d_close(dev, ap->a_fflag | dflags, S_IFCHR, td); dev_relthread(dev, ref); vn_lock(vp, vp_locked | LK_RETRY); vdrop(vp); return (error); } static int devfs_close_f(struct file *fp, struct thread *td) { int error; struct file *fpop; /* * NB: td may be NULL if this descriptor is closed due to * garbage collection from a closed UNIX domain socket. */ fpop = curthread->td_fpop; curthread->td_fpop = fp; error = vnops.fo_close(fp, td); curthread->td_fpop = fpop; /* * The f_cdevpriv cannot be assigned non-NULL value while we * are destroying the file. */ if (fp->f_cdevpriv != NULL) devfs_fpdrop(fp); return (error); } static int devfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct devfs_dirent *de; struct devfs_mount *dmp; struct cdev *dev; struct timeval boottime; int error; error = devfs_populate_vp(vp); if (error != 0) return (error); dmp = VFSTODEVFS(vp->v_mount); sx_xunlock(&dmp->dm_lock); de = vp->v_data; KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp)); if (vp->v_type == VDIR) { de = de->de_dir; KASSERT(de != NULL, ("Null dir dirent in devfs_getattr vp=%p", vp)); } vap->va_uid = de->de_uid; vap->va_gid = de->de_gid; vap->va_mode = de->de_mode; if (vp->v_type == VLNK) vap->va_size = strlen(de->de_symlink); else if (vp->v_type == VDIR) vap->va_size = vap->va_bytes = DEV_BSIZE; else vap->va_size = 0; if (vp->v_type != VDIR) vap->va_bytes = 0; vap->va_blocksize = DEV_BSIZE; vap->va_type = vp->v_type; getboottime(&boottime); #define fix(aa) \ do { \ if ((aa).tv_sec <= 3600) { \ (aa).tv_sec = boottime.tv_sec; \ (aa).tv_nsec = boottime.tv_usec * 1000; \ } \ } while (0) if (vp->v_type != VCHR) { fix(de->de_atime); vap->va_atime = de->de_atime; fix(de->de_mtime); vap->va_mtime = de->de_mtime; fix(de->de_ctime); vap->va_ctime = de->de_ctime; } else { dev = vp->v_rdev; fix(dev->si_atime); vap->va_atime = dev->si_atime; fix(dev->si_mtime); vap->va_mtime = dev->si_mtime; fix(dev->si_ctime); vap->va_ctime = dev->si_ctime; vap->va_rdev = cdev2priv(dev)->cdp_inode; } vap->va_gen = 0; vap->va_flags = 0; vap->va_filerev = 0; vap->va_nlink = de->de_links; vap->va_fileid = de->de_inode; return (error); } /* ARGSUSED */ static int devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struct thread *td) { struct file *fpop; int error; fpop = td->td_fpop; td->td_fpop = fp; error = vnops.fo_ioctl(fp, com, data, cred, td); td->td_fpop = fpop; return (error); } static int devfs_ioctl(struct vop_ioctl_args *ap) { struct fiodgname_arg *fgn; struct vnode *vpold, *vp; struct cdevsw *dsw; struct thread *td; struct cdev *dev; int error, ref, i; const char *p; u_long com; vp = ap->a_vp; com = ap->a_command; td = ap->a_td; dsw = devvn_refthread(vp, &dev, &ref); if (dsw == NULL) return (ENXIO); KASSERT(dev->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); if (com == FIODTYPE) { *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; error = 0; goto out; } else if (com == FIODGNAME) { fgn = ap->a_data; p = devtoname(dev); i = strlen(p) + 1; if (i > fgn->len) error = EINVAL; else error = copyout(p, fgn->buf, i); goto out; } error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); out: dev_relthread(dev, ref); if (error == ENOIOCTL) error = ENOTTY; if (error == 0 && com == TIOCSCTTY) { /* Do nothing if reassigning same control tty */ sx_slock(&proctree_lock); if (td->td_proc->p_session->s_ttyvp == vp) { sx_sunlock(&proctree_lock); return (0); } vpold = td->td_proc->p_session->s_ttyvp; VREF(vp); SESS_LOCK(td->td_proc->p_session); td->td_proc->p_session->s_ttyvp = vp; td->td_proc->p_session->s_ttydp = cdev2priv(dev); SESS_UNLOCK(td->td_proc->p_session); sx_sunlock(&proctree_lock); /* Get rid of reference to old control tty */ if (vpold) vrele(vpold); } return (error); } /* ARGSUSED */ static int devfs_kqfilter_f(struct file *fp, struct knote *kn) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; struct thread *td; td = curthread; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error) return (error); error = dsw->d_kqfilter(dev, kn); td->td_fpop = fpop; dev_relthread(dev, ref); return (error); } static inline int devfs_prison_check(struct devfs_dirent *de, struct thread *td) { struct cdev_priv *cdp; struct ucred *dcr; struct proc *p; int error; cdp = de->de_cdp; if (cdp == NULL) return (0); dcr = cdp->cdp_c.si_cred; if (dcr == NULL) return (0); error = prison_check(td->td_ucred, dcr); if (error == 0) return (0); /* We do, however, allow access to the controlling terminal */ p = td->td_proc; PROC_LOCK(p); if (!(p->p_flag & P_CONTROLT)) { PROC_UNLOCK(p); return (error); } if (p->p_session->s_ttydp == cdp) error = 0; PROC_UNLOCK(p); return (error); } static int devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock) { struct componentname *cnp; struct vnode *dvp, **vpp; struct thread *td; struct devfs_dirent *de, *dd; struct devfs_dirent **dde; struct devfs_mount *dmp; struct cdev *cdev; int error, flags, nameiop, dvplocked; char specname[SPECNAMELEN + 1], *pname; cnp = ap->a_cnp; vpp = ap->a_vpp; dvp = ap->a_dvp; pname = cnp->cn_nameptr; td = cnp->cn_thread; flags = cnp->cn_flags; nameiop = cnp->cn_nameiop; dmp = VFSTODEVFS(dvp->v_mount); dd = dvp->v_data; *vpp = NULLVP; if ((flags & ISLASTCN) && nameiop == RENAME) return (EOPNOTSUPP); if (dvp->v_type != VDIR) return (ENOTDIR); if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) return (EIO); error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td); if (error) return (error); if (cnp->cn_namelen == 1 && *pname == '.') { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); *vpp = dvp; VREF(dvp); return (0); } if (flags & ISDOTDOT) { if ((flags & ISLASTCN) && nameiop != LOOKUP) return (EINVAL); de = devfs_parent_dirent(dd); if (de == NULL) return (ENOENT); dvplocked = VOP_ISLOCKED(dvp); VOP_UNLOCK(dvp, 0); error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; vn_lock(dvp, dvplocked | LK_RETRY); return (error); } dd = dvp->v_data; de = devfs_find(dd, cnp->cn_nameptr, cnp->cn_namelen, 0); while (de == NULL) { /* While(...) so we can use break */ if (nameiop == DELETE) return (ENOENT); /* * OK, we didn't have an entry for the name we were asked for * so we try to see if anybody can create it on demand. */ pname = devfs_fqpn(specname, dmp, dd, cnp); if (pname == NULL) break; cdev = NULL; DEVFS_DMP_HOLD(dmp); sx_xunlock(&dmp->dm_lock); sx_slock(&clone_drain_lock); EVENTHANDLER_INVOKE(dev_clone, td->td_ucred, pname, strlen(pname), &cdev); sx_sunlock(&clone_drain_lock); if (cdev == NULL) sx_xlock(&dmp->dm_lock); else if (devfs_populate_vp(dvp) != 0) { *dm_unlock = 0; sx_xlock(&dmp->dm_lock); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); } else sx_xunlock(&dmp->dm_lock); dev_rel(cdev); return (ENOENT); } if (DEVFS_DMP_DROP(dmp)) { *dm_unlock = 0; sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); if (cdev != NULL) dev_rel(cdev); return (ENOENT); } if (cdev == NULL) break; dev_lock(); dde = &cdev2priv(cdev)->cdp_dirents[dmp->dm_idx]; if (dde != NULL && *dde != NULL) de = *dde; dev_unlock(); dev_rel(cdev); break; } if (de == NULL || de->de_flags & DE_WHITEOUT) { if ((nameiop == CREATE || nameiop == RENAME) && (flags & (LOCKPARENT | WANTPARENT)) && (flags & ISLASTCN)) { cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } return (ENOENT); } if (devfs_prison_check(de, td)) return (ENOENT); if ((cnp->cn_nameiop == DELETE) && (flags & ISLASTCN)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) return (error); if (*vpp == dvp) { VREF(dvp); *vpp = dvp; return (0); } } error = devfs_allocv(de, dvp->v_mount, cnp->cn_lkflags & LK_TYPE_MASK, vpp); *dm_unlock = 0; return (error); } static int devfs_lookup(struct vop_lookup_args *ap) { int j; struct devfs_mount *dmp; int dm_unlock; if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOTDIR); dmp = VFSTODEVFS(ap->a_dvp->v_mount); dm_unlock = 1; j = devfs_lookupx(ap, &dm_unlock); if (dm_unlock == 1) sx_xunlock(&dmp->dm_lock); return (j); } static int devfs_mknod(struct vop_mknod_args *ap) { struct componentname *cnp; struct vnode *dvp, **vpp; struct devfs_dirent *dd, *de; struct devfs_mount *dmp; int error; /* * The only type of node we should be creating here is a * character device, for anything else return EOPNOTSUPP. */ if (ap->a_vap->va_type != VCHR) return (EOPNOTSUPP); dvp = ap->a_dvp; dmp = VFSTODEVFS(dvp->v_mount); cnp = ap->a_cnp; vpp = ap->a_vpp; dd = dvp->v_data; error = ENOENT; sx_xlock(&dmp->dm_lock); TAILQ_FOREACH(de, &dd->de_dlist, de_list) { if (cnp->cn_namelen != de->de_dirent->d_namlen) continue; if (de->de_dirent->d_type == DT_CHR && (de->de_cdp->cdp_flags & CDP_ACTIVE) == 0) continue; if (bcmp(cnp->cn_nameptr, de->de_dirent->d_name, de->de_dirent->d_namlen) != 0) continue; if (de->de_flags & DE_WHITEOUT) break; goto notfound; } if (de == NULL) goto notfound; de->de_flags &= ~DE_WHITEOUT; error = devfs_allocv(de, dvp->v_mount, LK_EXCLUSIVE, vpp); return (error); notfound: sx_xunlock(&dmp->dm_lock); return (error); } /* ARGSUSED */ static int devfs_open(struct vop_open_args *ap) { struct thread *td = ap->a_td; struct vnode *vp = ap->a_vp; struct cdev *dev = vp->v_rdev; struct file *fp = ap->a_fp; int error, ref, vlocked; struct cdevsw *dsw; struct file *fpop; struct mtx *mtxp; if (vp->v_type == VBLK) return (ENXIO); if (dev == NULL) return (ENXIO); /* Make this field valid before any I/O in d_open. */ if (dev->si_iosize_max == 0) dev->si_iosize_max = DFLTPHYS; dsw = dev_refthread(dev, &ref); if (dsw == NULL) return (ENXIO); if (fp == NULL && dsw->d_fdopen != NULL) { dev_relthread(dev, ref); return (ENXIO); } vlocked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); fpop = td->td_fpop; td->td_fpop = fp; if (fp != NULL) { fp->f_data = dev; fp->f_vnode = vp; } if (dsw->d_fdopen != NULL) error = dsw->d_fdopen(dev, ap->a_mode, td, fp); else error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td); /* Clean up any cdevpriv upon error. */ if (error != 0) devfs_clear_cdevpriv(); td->td_fpop = fpop; vn_lock(vp, vlocked | LK_RETRY); dev_relthread(dev, ref); if (error != 0) { if (error == ERESTART) error = EINTR; return (error); } #if 0 /* /dev/console */ KASSERT(fp != NULL, ("Could not vnode bypass device on NULL fp")); #else if (fp == NULL) return (error); #endif if (fp->f_ops == &badfileops) finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f); mtxp = mtx_pool_find(mtxpool_sleep, fp); /* * Hint to the dofilewrite() to not force the buffer draining * on the writer to the file. Most likely, the write would * not need normal buffers. */ mtx_lock(mtxp); fp->f_vnread_flags |= FDEVFS_VNODE; mtx_unlock(mtxp); return (error); } static int devfs_pathconf(struct vop_pathconf_args *ap) { switch (ap->a_name) { case _PC_MAX_CANON: if (ap->a_vp->v_vflag & VV_ISTTY) { *ap->a_retval = MAX_CANON; return (0); } return (EINVAL); case _PC_MAX_INPUT: if (ap->a_vp->v_vflag & VV_ISTTY) { *ap->a_retval = MAX_INPUT; return (0); } return (EINVAL); case _PC_VDISABLE: if (ap->a_vp->v_vflag & VV_ISTTY) { *ap->a_retval = _POSIX_VDISABLE; return (0); } return (EINVAL); case _PC_MAC_PRESENT: #ifdef MAC /* * If MAC is enabled, devfs automatically supports * trivial non-persistant label storage. */ *ap->a_retval = 1; #else *ap->a_retval = 0; #endif return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } /* ARGSUSED */ static int devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; int error, ref; struct file *fpop; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_poll(fp, events, cred, td); return (error); } error = dsw->d_poll(dev, events, td); td->td_fpop = fpop; dev_relthread(dev, ref); return(error); } /* * Print out the contents of a special device vnode. */ static int devfs_print(struct vop_print_args *ap) { printf("\tdev %s\n", devtoname(ap->a_vp->v_rdev)); return (0); } static int devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int ioflag, error, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_read(fp, uio, cred, flags, td); return (error); } resid = uio->uio_resid; ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); error = dsw->d_read(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) devfs_timestamp(&dev->si_atime); td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_readdir(struct vop_readdir_args *ap) { int error; struct uio *uio; struct dirent *dp; struct devfs_dirent *dd; struct devfs_dirent *de; struct devfs_mount *dmp; off_t off; int *tmp_ncookies = NULL; if (ap->a_vp->v_type != VDIR) return (ENOTDIR); uio = ap->a_uio; if (uio->uio_offset < 0) return (EINVAL); /* * XXX: This is a temporary hack to get around this filesystem not * supporting cookies. We store the location of the ncookies pointer * in a temporary variable before calling vfs_subr.c:vfs_read_dirent() * and set the number of cookies to 0. We then set the pointer to * NULL so that vfs_read_dirent doesn't try to call realloc() on * ap->a_cookies. Later in this function, we restore the ap->a_ncookies * pointer to its original location before returning to the caller. */ if (ap->a_ncookies != NULL) { tmp_ncookies = ap->a_ncookies; *ap->a_ncookies = 0; ap->a_ncookies = NULL; } dmp = VFSTODEVFS(ap->a_vp->v_mount); if (devfs_populate_vp(ap->a_vp) != 0) { if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (EIO); } error = 0; de = ap->a_vp->v_data; off = 0; TAILQ_FOREACH(dd, &de->de_dlist, de_list) { KASSERT(dd->de_cdp != (void *)0xdeadc0de, ("%s %d\n", __func__, __LINE__)); if (dd->de_flags & (DE_COVERED | DE_WHITEOUT)) continue; if (devfs_prison_check(dd, uio->uio_td)) continue; if (dd->de_dirent->d_type == DT_DIR) de = dd->de_dir; else de = dd; dp = dd->de_dirent; MPASS(dp->d_reclen == GENERIC_DIRSIZ(dp)); if (dp->d_reclen > uio->uio_resid) break; dp->d_fileno = de->de_inode; if (off >= uio->uio_offset) { error = vfs_read_dirent(ap, dp, off); if (error) break; } off += dp->d_reclen; } sx_xunlock(&dmp->dm_lock); uio->uio_offset = off; /* * Restore ap->a_ncookies if it wasn't originally NULL in the first * place. */ if (tmp_ncookies != NULL) ap->a_ncookies = tmp_ncookies; return (error); } static int devfs_readlink(struct vop_readlink_args *ap) { struct devfs_dirent *de; de = ap->a_vp->v_data; return (uiomove(de->de_symlink, strlen(de->de_symlink), ap->a_uio)); } static int devfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; mtx_lock(&devfs_de_interlock); de = vp->v_data; if (de != NULL) { de->de_vnode = NULL; vp->v_data = NULL; } mtx_unlock(&devfs_de_interlock); vnode_destroy_vobject(vp); return (0); } static int devfs_reclaim_vchr(struct vop_reclaim_args *ap) { struct vnode *vp; struct cdev *dev; vp = ap->a_vp; MPASS(vp->v_type == VCHR); devfs_reclaim(ap); VI_LOCK(vp); dev_lock(); dev = vp->v_rdev; vp->v_rdev = NULL; if (dev != NULL) dev->si_usecount -= vp->v_usecount; dev_unlock(); VI_UNLOCK(vp); if (dev != NULL) dev_rel(dev); return (0); } static int devfs_remove(struct vop_remove_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode *vp = ap->a_vp; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered; struct devfs_mount *dmp = VFSTODEVFS(vp->v_mount); ASSERT_VOP_ELOCKED(dvp, "devfs_remove"); ASSERT_VOP_ELOCKED(vp, "devfs_remove"); sx_xlock(&dmp->dm_lock); dd = ap->a_dvp->v_data; de = vp->v_data; if (de->de_cdp == NULL) { TAILQ_REMOVE(&dd->de_dlist, de, de_list); if (de->de_dirent->d_type == DT_LNK) { de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) de_covered->de_flags &= ~DE_COVERED; } /* We need to unlock dvp because devfs_delete() may lock it. */ VOP_UNLOCK(vp, 0); if (dvp != vp) VOP_UNLOCK(dvp, 0); devfs_delete(dmp, de, 0); sx_xunlock(&dmp->dm_lock); if (dvp != vp) vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); } else { de->de_flags |= DE_WHITEOUT; sx_xunlock(&dmp->dm_lock); } return (0); } /* * Revoke is called on a tty when a terminal session ends. The vnode * is orphaned by setting v_op to deadfs so we need to let go of it * as well so that we create a new one next time around. * */ static int devfs_revoke(struct vop_revoke_args *ap) { struct vnode *vp = ap->a_vp, *vp2; struct cdev *dev; struct cdev_priv *cdp; struct devfs_dirent *de; u_int i; KASSERT((ap->a_flags & REVOKEALL) != 0, ("devfs_revoke !REVOKEALL")); dev = vp->v_rdev; cdp = cdev2priv(dev); dev_lock(); cdp->cdp_inuse++; dev_unlock(); vhold(vp); vgone(vp); vdrop(vp); VOP_UNLOCK(vp,0); loop: for (;;) { mtx_lock(&devfs_de_interlock); dev_lock(); vp2 = NULL; for (i = 0; i <= cdp->cdp_maxdirent; i++) { de = cdp->cdp_dirents[i]; if (de == NULL) continue; vp2 = de->de_vnode; if (vp2 != NULL) { dev_unlock(); VI_LOCK(vp2); mtx_unlock(&devfs_de_interlock); if (vget(vp2, LK_EXCLUSIVE | LK_INTERLOCK, curthread)) goto loop; vhold(vp2); vgone(vp2); vdrop(vp2); vput(vp2); break; } } if (vp2 != NULL) { continue; } dev_unlock(); mtx_unlock(&devfs_de_interlock); break; } dev_lock(); cdp->cdp_inuse--; if (!(cdp->cdp_flags & CDP_ACTIVE) && cdp->cdp_inuse == 0) { TAILQ_REMOVE(&cdevp_list, cdp, cdp_list); dev_unlock(); dev_rel(&cdp->cdp_c); } else dev_unlock(); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); return (0); } static int devfs_rioctl(struct vop_ioctl_args *ap) { struct vnode *vp; struct devfs_mount *dmp; int error; vp = ap->a_vp; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); return (EBADF); } dmp = VFSTODEVFS(vp->v_mount); sx_xlock(&dmp->dm_lock); VOP_UNLOCK(vp, 0); DEVFS_DMP_HOLD(dmp); devfs_populate(dmp); if (DEVFS_DMP_DROP(dmp)) { sx_xunlock(&dmp->dm_lock); devfs_unmount_final(dmp); return (ENOENT); } error = devfs_rules_ioctl(dmp, ap->a_command, ap->a_data, ap->a_td); sx_xunlock(&dmp->dm_lock); return (error); } static int devfs_rread(struct vop_read_args *ap) { if (ap->a_vp->v_type != VDIR) return (EINVAL); return (VOP_READDIR(ap->a_vp, ap->a_uio, ap->a_cred, NULL, NULL, NULL)); } static int devfs_setattr(struct vop_setattr_args *ap) { struct devfs_dirent *de; struct vattr *vap; struct vnode *vp; struct thread *td; int c, error; uid_t uid; gid_t gid; vap = ap->a_vap; vp = ap->a_vp; td = curthread; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_flags != VNOVAL && vap->va_flags != 0) || (vap->va_rdev != VNOVAL) || ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { return (EINVAL); } error = devfs_populate_vp(vp); if (error != 0) return (error); de = vp->v_data; if (vp->v_type == VDIR) de = de->de_dir; c = 0; if (vap->va_uid == (uid_t)VNOVAL) uid = de->de_uid; else uid = vap->va_uid; if (vap->va_gid == (gid_t)VNOVAL) gid = de->de_gid; else gid = vap->va_gid; if (uid != de->de_uid || gid != de->de_gid) { if ((ap->a_cred->cr_uid != de->de_uid) || uid != de->de_uid || (gid != de->de_gid && !groupmember(gid, ap->a_cred))) { error = priv_check(td, PRIV_VFS_CHOWN); if (error != 0) goto ret; } de->de_uid = uid; de->de_gid = gid; c = 1; } if (vap->va_mode != (mode_t)VNOVAL) { if (ap->a_cred->cr_uid != de->de_uid) { error = priv_check(td, PRIV_VFS_ADMIN); if (error != 0) goto ret; } de->de_mode = vap->va_mode; c = 1; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { error = vn_utimes_perm(vp, vap, ap->a_cred, td); if (error != 0) goto ret; if (vap->va_atime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_atime = vap->va_atime; else de->de_atime = vap->va_atime; } if (vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_type == VCHR) vp->v_rdev->si_mtime = vap->va_mtime; else de->de_mtime = vap->va_mtime; } c = 1; } if (c) { if (vp->v_type == VCHR) vfs_timestamp(&vp->v_rdev->si_ctime); else vfs_timestamp(&de->de_mtime); } ret: sx_xunlock(&VFSTODEVFS(vp->v_mount)->dm_lock); return (error); } #ifdef MAC static int devfs_setlabel(struct vop_setlabel_args *ap) { struct vnode *vp; struct devfs_dirent *de; vp = ap->a_vp; de = vp->v_data; mac_vnode_relabel(ap->a_cred, vp, ap->a_label); mac_devfs_update(vp->v_mount, de, vp); return (0); } #endif static int devfs_stat_f(struct file *fp, struct stat *sb, struct ucred *cred, struct thread *td) { return (vnops.fo_stat(fp, sb, cred, td)); } static int devfs_symlink(struct vop_symlink_args *ap) { int i, error; struct devfs_dirent *dd; struct devfs_dirent *de, *de_covered, *de_dotdot; struct devfs_mount *dmp; error = priv_check(curthread, PRIV_DEVFS_SYMLINK); if (error) return(error); dmp = VFSTODEVFS(ap->a_dvp->v_mount); if (devfs_populate_vp(ap->a_dvp) != 0) return (ENOENT); dd = ap->a_dvp->v_data; de = devfs_newdirent(ap->a_cnp->cn_nameptr, ap->a_cnp->cn_namelen); de->de_flags = DE_USER; de->de_uid = 0; de->de_gid = 0; de->de_mode = 0755; de->de_inode = alloc_unr(devfs_inos); de->de_dir = dd; de->de_dirent->d_type = DT_LNK; i = strlen(ap->a_target) + 1; de->de_symlink = malloc(i, M_DEVFS, M_WAITOK); bcopy(ap->a_target, de->de_symlink, i); #ifdef MAC mac_devfs_create_symlink(ap->a_cnp->cn_cred, dmp->dm_mount, dd, de); #endif de_covered = devfs_find(dd, de->de_dirent->d_name, de->de_dirent->d_namlen, 0); if (de_covered != NULL) { if ((de_covered->de_flags & DE_USER) != 0) { devfs_delete(dmp, de, DEVFS_DEL_NORECURSE); sx_xunlock(&dmp->dm_lock); return (EEXIST); } KASSERT((de_covered->de_flags & DE_COVERED) == 0, ("devfs_symlink: entry %p already covered", de_covered)); de_covered->de_flags |= DE_COVERED; } de_dotdot = TAILQ_FIRST(&dd->de_dlist); /* "." */ de_dotdot = TAILQ_NEXT(de_dotdot, de_list); /* ".." */ TAILQ_INSERT_AFTER(&dd->de_dlist, de_dotdot, de, de_list); devfs_dir_ref_de(dmp, dd); devfs_rules_apply(dmp, de); return (devfs_allocv(de, ap->a_dvp->v_mount, LK_EXCLUSIVE, ap->a_vpp)); } static int devfs_truncate_f(struct file *fp, off_t length, struct ucred *cred, struct thread *td) { return (vnops.fo_truncate(fp, length, cred, td)); } static int devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct thread *td) { struct cdev *dev; int error, ioflag, ref; ssize_t resid; struct cdevsw *dsw; struct file *fpop; if (uio->uio_resid > DEVFS_IOSIZE_MAX) return (EINVAL); fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) { error = vnops.fo_write(fp, uio, cred, flags, td); return (error); } KASSERT(uio->uio_td == td, ("uio_td %p is not td %p", uio->uio_td, td)); ioflag = fp->f_flag & (O_NONBLOCK | O_DIRECT | O_FSYNC); if (ioflag & O_DIRECT) ioflag |= IO_DIRECT; foffset_lock_uio(fp, uio, flags | FOF_NOLOCK); resid = uio->uio_resid; error = dsw->d_write(dev, uio, ioflag); if (uio->uio_resid != resid || (error == 0 && resid != 0)) { devfs_timestamp(&dev->si_ctime); dev->si_mtime = dev->si_ctime; } td->td_fpop = fpop; dev_relthread(dev, ref); foffset_unlock_uio(fp, uio, flags | FOF_NOLOCK | FOF_NEXTOFF); return (error); } static int devfs_mmap_f(struct file *fp, vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, vm_prot_t cap_maxprot, int flags, vm_ooffset_t foff, struct thread *td) { struct cdev *dev; struct cdevsw *dsw; struct mount *mp; struct vnode *vp; struct file *fpop; vm_object_t object; vm_prot_t maxprot; int error, ref; vp = fp->f_vnode; /* * Ensure that file and memory protections are * compatible. */ mp = vp->v_mount; if (mp != NULL && (mp->mnt_flag & MNT_NOEXEC) != 0) { maxprot = VM_PROT_NONE; if ((prot & VM_PROT_EXECUTE) != 0) return (EACCES); } else maxprot = VM_PROT_EXECUTE; if ((fp->f_flag & FREAD) != 0) maxprot |= VM_PROT_READ; else if ((prot & VM_PROT_READ) != 0) return (EACCES); /* * If we are sharing potential changes via MAP_SHARED and we * are trying to get write permission although we opened it * without asking for it, bail out. * * Note that most character devices always share mappings. * The one exception is that D_MMAP_ANON devices * (i.e. /dev/zero) permit private writable mappings. * * Rely on vm_mmap_cdev() to fail invalid MAP_PRIVATE requests * as well as updating maxprot to permit writing for * D_MMAP_ANON devices rather than doing that here. */ if ((flags & MAP_SHARED) != 0) { if ((fp->f_flag & FWRITE) != 0) maxprot |= VM_PROT_WRITE; else if ((prot & VM_PROT_WRITE) != 0) return (EACCES); } maxprot &= cap_maxprot; fpop = td->td_fpop; error = devfs_fp_check(fp, &dev, &dsw, &ref); if (error != 0) return (error); error = vm_mmap_cdev(td, size, prot, &maxprot, &flags, dev, dsw, &foff, &object); td->td_fpop = fpop; dev_relthread(dev, ref); if (error != 0) return (error); error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object, foff, FALSE, td); if (error != 0) vm_object_deallocate(object); return (error); } dev_t dev2udev(struct cdev *x) { if (x == NULL) return (NODEV); return (cdev2priv(x)->cdp_inode); } static struct fileops devfs_ops_f = { .fo_read = devfs_read_f, .fo_write = devfs_write_f, .fo_truncate = devfs_truncate_f, .fo_ioctl = devfs_ioctl_f, .fo_poll = devfs_poll_f, .fo_kqfilter = devfs_kqfilter_f, .fo_stat = devfs_stat_f, .fo_close = devfs_close_f, .fo_chmod = vn_chmod, .fo_chown = vn_chown, .fo_sendfile = vn_sendfile, .fo_seek = vn_seek, .fo_fill_kinfo = vn_fill_kinfo, .fo_mmap = devfs_mmap_f, .fo_flags = DFLAG_PASSABLE | DFLAG_SEEKABLE }; /* Vops for non-CHR vnodes in /dev. */ static struct vop_vector devfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_rioctl, .vop_lookup = devfs_lookup, .vop_mknod = devfs_mknod, .vop_pathconf = devfs_pathconf, .vop_read = devfs_rread, .vop_readdir = devfs_readdir, .vop_readlink = devfs_readlink, .vop_reclaim = devfs_reclaim, .vop_remove = devfs_remove, .vop_revoke = devfs_revoke, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_symlink = devfs_symlink, .vop_vptocnp = devfs_vptocnp, }; /* Vops for VCHR vnodes in /dev. */ static struct vop_vector devfs_specops = { .vop_default = &default_vnodeops, .vop_access = devfs_access, .vop_bmap = VOP_PANIC, .vop_close = devfs_close, .vop_create = VOP_PANIC, .vop_fsync = vop_stdfsync, .vop_getattr = devfs_getattr, .vop_ioctl = devfs_ioctl, .vop_link = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = devfs_open, .vop_pathconf = devfs_pathconf, .vop_poll = dead_poll, .vop_print = devfs_print, .vop_read = dead_read, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_reallocblks = VOP_PANIC, .vop_reclaim = devfs_reclaim_vchr, .vop_remove = devfs_remove, .vop_rename = VOP_PANIC, .vop_revoke = devfs_revoke, .vop_rmdir = VOP_PANIC, .vop_setattr = devfs_setattr, #ifdef MAC .vop_setlabel = devfs_setlabel, #endif .vop_strategy = VOP_PANIC, .vop_symlink = VOP_PANIC, .vop_vptocnp = devfs_vptocnp, .vop_write = dead_write, }; /* * Our calling convention to the device drivers used to be that we passed * vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_ * flags instead since that's what open(), close() and ioctl() takes and * we don't really want vnode.h in device drivers. * We solved the source compatibility by redefining some vnode flags to * be the same as the fcntl ones and by sending down the bitwise OR of * the respective fcntl/vnode flags. These CTASSERTS make sure nobody * pulls the rug out under this. */ CTASSERT(O_NONBLOCK == IO_NDELAY); CTASSERT(O_FSYNC == IO_SYNC); Index: head/sys/fs/ext2fs/ext2_dinode.h =================================================================== --- head/sys/fs/ext2fs/ext2_dinode.h (revision 326267) +++ head/sys/fs/ext2fs/ext2_dinode.h (revision 326268) @@ -1,141 +1,143 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Aditya Sarawgi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_EXT2FS_EXT2_DINODE_H_ #define _FS_EXT2FS_EXT2_DINODE_H_ /* * Special inode numbers * The root inode is the root of the file system. Inode 0 can't be used for * normal purposes and bad blocks are normally linked to inode 1, thus * the root inode is 2. * Inode 3 to 10 are reserved in ext2fs. */ #define EXT2_BADBLKINO ((ino_t)1) #define EXT2_ROOTINO ((ino_t)2) #define EXT2_ACLIDXINO ((ino_t)3) #define EXT2_ACLDATAINO ((ino_t)4) #define EXT2_BOOTLOADERINO ((ino_t)5) #define EXT2_UNDELDIRINO ((ino_t)6) #define EXT2_RESIZEINO ((ino_t)7) #define EXT2_JOURNALINO ((ino_t)8) #define EXT2_EXCLUDEINO ((ino_t)9) #define EXT2_REPLICAINO ((ino_t)10) #define EXT2_FIRSTINO ((ino_t)11) /* * Inode flags * The system supports EXT2_IMMUTABLE, EXT2_APPEND and EXT2_NODUMP flags. * The current implementation also uses EXT3_INDEX, EXT4_EXTENTS and * EXT4_HUGE_FILE with some restrictions imposed by the lack of write * support. */ #define EXT2_SECRM 0x00000001 /* Secure deletion */ #define EXT2_UNRM 0x00000002 /* Undelete */ #define EXT2_COMPR 0x00000004 /* Compress file */ #define EXT2_SYNC 0x00000008 /* Synchronous updates */ #define EXT2_IMMUTABLE 0x00000010 /* Immutable file */ #define EXT2_APPEND 0x00000020 /* Writes to file may only append */ #define EXT2_NODUMP 0x00000040 /* Do not dump file */ #define EXT2_NOATIME 0x00000080 /* Do not update atime */ #define EXT3_INDEX 0x00001000 /* Hash-indexed directory */ #define EXT4_IMAGIC 0x00002000 /* AFS directory */ #define EXT4_JOURNAL_DATA 0x00004000 /* File data should be journaled */ #define EXT4_NOTAIL 0x00008000 /* File tail should not be merged */ #define EXT4_DIRSYNC 0x00010000 /* Dirsync behaviour */ #define EXT4_TOPDIR 0x00020000 /* Top of directory hierarchies*/ #define EXT4_HUGE_FILE 0x00040000 /* Set to each huge file */ #define EXT4_EXTENTS 0x00080000 /* Inode uses extents */ #define EXT4_EA_INODE 0x00200000 /* Inode used for large EA */ #define EXT4_EOFBLOCKS 0x00400000 /* Blocks allocated beyond EOF */ #define EXT4_INLINE_DATA 0x10000000 /* Inode has inline data */ #define EXT4_PROJINHERIT 0x20000000 /* Children inherit project ID */ /* * Definitions for nanosecond timestamps. * Ext3 inode versioning, 2006-12-13. */ #define EXT3_EPOCH_BITS 2 #define EXT3_EPOCH_MASK ((1 << EXT3_EPOCH_BITS) - 1) #define EXT3_NSEC_MASK (~0UL << EXT3_EPOCH_BITS) #define E2DI_HAS_XTIME(ip) (EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, \ EXT2F_ROCOMPAT_EXTRA_ISIZE)) #define E2DI_HAS_HUGE_FILE(ip) (EXT2_HAS_RO_COMPAT_FEATURE(ip->i_e2fs, \ EXT2F_ROCOMPAT_HUGE_FILE)) /* * Constants relative to the data blocks */ #define EXT2_NDIR_BLOCKS 12 #define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS #define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) #define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) #define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) #define EXT2_MAXSYMLINKLEN (EXT2_N_BLOCKS * sizeof(uint32_t)) /* * Structure of an inode on the disk */ struct ext2fs_dinode { uint16_t e2di_mode; /* 0: IFMT, permissions; see below. */ uint16_t e2di_uid; /* 2: Owner UID */ uint32_t e2di_size; /* 4: Size (in bytes) */ uint32_t e2di_atime; /* 8: Access time */ uint32_t e2di_ctime; /* 12: Change time */ uint32_t e2di_mtime; /* 16: Modification time */ uint32_t e2di_dtime; /* 20: Deletion time */ uint16_t e2di_gid; /* 24: Owner GID */ uint16_t e2di_nlink; /* 26: File link count */ uint32_t e2di_nblock; /* 28: Blocks count */ uint32_t e2di_flags; /* 32: Status flags (chflags) */ uint32_t e2di_version; /* 36: Low 32 bits inode version */ uint32_t e2di_blocks[EXT2_N_BLOCKS]; /* 40: disk blocks */ uint32_t e2di_gen; /* 100: generation number */ uint32_t e2di_facl; /* 104: Low EA block */ uint32_t e2di_size_high; /* 108: Upper bits of file size */ uint32_t e2di_faddr; /* 112: Fragment address (obsolete) */ uint16_t e2di_nblock_high; /* 116: Blocks count bits 47:32 */ uint16_t e2di_facl_high; /* 118: File EA bits 47:32 */ uint16_t e2di_uid_high; /* 120: Owner UID top 16 bits */ uint16_t e2di_gid_high; /* 122: Owner GID top 16 bits */ uint16_t e2di_chksum_lo; /* 124: Lower inode checksum */ uint16_t e2di_lx_reserved; /* 126: Unused */ uint16_t e2di_extra_isize; /* 128: Size of this inode */ uint16_t e2di_chksum_hi; /* 130: High inode checksum */ uint32_t e2di_ctime_extra; /* 132: Extra change time */ uint32_t e2di_mtime_extra; /* 136: Extra modification time */ uint32_t e2di_atime_extra; /* 140: Extra access time */ uint32_t e2di_crtime; /* 144: Creation (birth)time */ uint32_t e2di_crtime_extra; /* 148: Extra creation (birth)time */ uint32_t e2di_version_hi; /* 152: High bits of inode version */ uint32_t e2di_projid; /* 156: Project ID */ }; #endif /* !_FS_EXT2FS_EXT2_DINODE_H_ */ Index: head/sys/fs/ext2fs/ext2_dir.h =================================================================== --- head/sys/fs/ext2fs/ext2_dir.h (revision 326267) +++ head/sys/fs/ext2fs/ext2_dir.h (revision 326268) @@ -1,102 +1,104 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Aditya Sarawgi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_EXT2FS_EXT2_DIR_H_ #define _FS_EXT2FS_EXT2_DIR_H_ /* * Structure of a directory entry */ #define EXT2FS_MAXNAMLEN 255 struct ext2fs_direct { uint32_t e2d_ino; /* inode number of entry */ uint16_t e2d_reclen; /* length of this record */ uint16_t e2d_namlen; /* length of string in e2d_name */ char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */ }; enum slotstatus { NONE, COMPACT, FOUND }; struct ext2fs_searchslot { enum slotstatus slotstatus; doff_t slotoffset; /* offset of area with free space */ int slotsize; /* size of area at slotoffset */ int slotfreespace; /* amount of space free in slot */ int slotneeded; /* sizeof the entry we are seeking */ }; /* * The new version of the directory entry. Since EXT2 structures are * stored in intel byte order, and the name_len field could never be * bigger than 255 chars, it's safe to reclaim the extra byte for the * file_type field. */ struct ext2fs_direct_2 { uint32_t e2d_ino; /* inode number of entry */ uint16_t e2d_reclen; /* length of this record */ uint8_t e2d_namlen; /* length of string in e2d_name */ uint8_t e2d_type; /* file type */ char e2d_name[EXT2FS_MAXNAMLEN]; /* name with * length<=EXT2FS_MAXNAMLEN */ }; /* * Maximal count of links to a file */ #define EXT2_LINK_MAX 32000 #define EXT4_LINK_MAX 65000 /* * Ext2 directory file types. Only the low 3 bits are used. The * other bits are reserved for now. */ #define EXT2_FT_UNKNOWN 0 #define EXT2_FT_REG_FILE 1 #define EXT2_FT_DIR 2 #define EXT2_FT_CHRDEV 3 #define EXT2_FT_BLKDEV 4 #define EXT2_FT_FIFO 5 #define EXT2_FT_SOCK 6 #define EXT2_FT_SYMLINK 7 #define EXT2_FT_MAX 8 /* * EXT2_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 4 */ #define EXT2_DIR_PAD 4 #define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) #define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ ~EXT2_DIR_ROUND) #endif /* !_FS_EXT2FS_EXT2_DIR_H_ */ Index: head/sys/fs/ext2fs/ext2_extents.c =================================================================== --- head/sys/fs/ext2fs/ext2_extents.c (revision 326267) +++ head/sys/fs/ext2fs/ext2_extents.c (revision 326268) @@ -1,1581 +1,1583 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010 Zheng Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_EXT2EXTENTS, "ext2_extents", "EXT2 extents"); #ifdef EXT2FS_DEBUG static void ext4_ext_print_extent(struct ext4_extent *ep) { printf(" ext %p => (blk %u len %u start %lu)\n", ep, ep->e_blk, ep->e_len, (uint64_t)ep->e_start_hi << 32 | ep->e_start_lo); } static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp); static void ext4_ext_print_index(struct inode *ip, struct ext4_extent_index *ex, int do_walk) { struct m_ext2fs *fs; struct buf *bp; int error; fs = ip->i_e2fs; printf(" index %p => (blk %u pblk %lu)\n", ex, ex->ei_blk, (uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo); if(!do_walk) return; if ((error = bread(ip->i_devvp, fsbtodb(fs, ((uint64_t)ex->ei_leaf_hi << 32 | ex->ei_leaf_lo)), (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { brelse(bp); return; } ext4_ext_print_header(ip, (struct ext4_extent_header *)bp->b_data); brelse(bp); } static void ext4_ext_print_header(struct inode *ip, struct ext4_extent_header *ehp) { int i; printf("header %p => (magic 0x%x entries %d max %d depth %d gen %d)\n", ehp, ehp->eh_magic, ehp->eh_ecount, ehp->eh_max, ehp->eh_depth, ehp->eh_gen); for (i = 0; i < ehp->eh_ecount; i++) if (ehp->eh_depth != 0) ext4_ext_print_index(ip, (struct ext4_extent_index *)(ehp + 1 + i), 1); else ext4_ext_print_extent((struct ext4_extent *)(ehp + 1 + i)); } static void ext4_ext_print_path(struct inode *ip, struct ext4_extent_path *path) { int k, l; l = path->ep_depth printf("ip=%d, Path:\n", ip->i_number); for (k = 0; k <= l; k++, path++) { if (path->ep_index) { ext4_ext_print_index(ip, path->ep_index, 0); } else if (path->ep_ext) { ext4_ext_print_extent(path->ep_ext); } } } void ext4_ext_print_extent_tree_status(struct inode * ip) { struct m_ext2fs *fs; struct ext4_extent_header *ehp; fs = ip->i_e2fs; ehp = (struct ext4_extent_header *)(char *)ip->i_db; printf("Extent status:ip=%d\n", ip->i_number); if (!(ip->i_flag & IN_E4EXTENTS)) return; ext4_ext_print_header(ip, ehp); return; } #endif static inline struct ext4_extent_header * ext4_ext_inode_header(struct inode *ip) { return ((struct ext4_extent_header *)ip->i_db); } static inline struct ext4_extent_header * ext4_ext_block_header(char *bdata) { return ((struct ext4_extent_header *)bdata); } static inline unsigned short ext4_ext_inode_depth(struct inode *ip) { struct ext4_extent_header *ehp; ehp = (struct ext4_extent_header *)ip->i_data; return (ehp->eh_depth); } static inline e4fs_daddr_t ext4_ext_index_pblock(struct ext4_extent_index *index) { e4fs_daddr_t blk; blk = index->ei_leaf_lo; blk |= (e4fs_daddr_t)index->ei_leaf_hi << 32; return (blk); } static inline void ext4_index_store_pblock(struct ext4_extent_index *index, e4fs_daddr_t pb) { index->ei_leaf_lo = pb & 0xffffffff; index->ei_leaf_hi = (pb >> 32) & 0xffff; } static inline e4fs_daddr_t ext4_ext_extent_pblock(struct ext4_extent *extent) { e4fs_daddr_t blk; blk = extent->e_start_lo; blk |= (e4fs_daddr_t)extent->e_start_hi << 32; return (blk); } static inline void ext4_ext_store_pblock(struct ext4_extent *ex, e4fs_daddr_t pb) { ex->e_start_lo = pb & 0xffffffff; ex->e_start_hi = (pb >> 32) & 0xffff; } int ext4_ext_in_cache(struct inode *ip, daddr_t lbn, struct ext4_extent *ep) { struct ext4_extent_cache *ecp; int ret = EXT4_EXT_CACHE_NO; ecp = &ip->i_ext_cache; if (ecp->ec_type == EXT4_EXT_CACHE_NO) return (ret); if (lbn >= ecp->ec_blk && lbn < ecp->ec_blk + ecp->ec_len) { ep->e_blk = ecp->ec_blk; ep->e_start_lo = ecp->ec_start & 0xffffffff; ep->e_start_hi = ecp->ec_start >> 32 & 0xffff; ep->e_len = ecp->ec_len; ret = ecp->ec_type; } return (ret); } static int ext4_ext_check_header(struct inode *ip, struct ext4_extent_header *eh) { struct m_ext2fs *fs; char *error_msg; fs = ip->i_e2fs; if (eh->eh_magic != EXT4_EXT_MAGIC) { error_msg = "invalid magic"; goto corrupted; } if (eh->eh_max == 0) { error_msg = "invalid eh_max"; goto corrupted; } if (eh->eh_ecount > eh->eh_max) { error_msg = "invalid eh_entries"; goto corrupted; } return (0); corrupted: ext2_fserr(fs, ip->i_uid, error_msg); return (EIO); } static void ext4_ext_binsearch_index(struct ext4_extent_path *path, int blk) { struct ext4_extent_header *eh; struct ext4_extent_index *r, *l, *m; eh = path->ep_header; KASSERT(eh->eh_ecount <= eh->eh_max && eh->eh_ecount > 0, ("ext4_ext_binsearch_index: bad args")); l = EXT_FIRST_INDEX(eh) + 1; r = EXT_FIRST_INDEX(eh) + eh->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (blk < m->ei_blk) r = m - 1; else l = m + 1; } path->ep_index = l - 1; } static void ext4_ext_binsearch_ext(struct ext4_extent_path *path, int blk) { struct ext4_extent_header *eh; struct ext4_extent *r, *l, *m; eh = path->ep_header; KASSERT(eh->eh_ecount <= eh->eh_max, ("ext4_ext_binsearch_ext: bad args")); if (eh->eh_ecount == 0) return; l = EXT_FIRST_EXTENT(eh) + 1; r = EXT_FIRST_EXTENT(eh) + eh->eh_ecount - 1; while (l <= r) { m = l + (r - l) / 2; if (blk < m->e_blk) r = m - 1; else l = m + 1; } path->ep_ext = l - 1; } static int ext4_ext_fill_path_bdata(struct ext4_extent_path *path, struct buf *bp, uint64_t blk) { KASSERT(path->ep_data == NULL, ("ext4_ext_fill_path_bdata: bad ep_data")); path->ep_data = malloc(bp->b_bufsize, M_EXT2EXTENTS, M_WAITOK); if (!path->ep_data) return (ENOMEM); memcpy(path->ep_data, bp->b_data, bp->b_bufsize); path->ep_blk = blk; return (0); } static void ext4_ext_fill_path_buf(struct ext4_extent_path *path, struct buf *bp) { KASSERT(path->ep_data != NULL, ("ext4_ext_fill_path_buf: bad ep_data")); memcpy(bp->b_data, path->ep_data, bp->b_bufsize); } static void ext4_ext_drop_refs(struct ext4_extent_path *path) { int depth, i; if (!path) return; depth = path->ep_depth; for (i = 0; i <= depth; i++, path++) if (path->ep_data) { free(path->ep_data, M_EXT2EXTENTS); path->ep_data = NULL; } } void ext4_ext_path_free(struct ext4_extent_path *path) { if (!path) return; ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); } int ext4_ext_find_extent(struct inode *ip, daddr_t block, struct ext4_extent_path **ppath) { struct m_ext2fs *fs; struct ext4_extent_header *eh; struct ext4_extent_path *path; struct buf *bp; uint64_t blk; int error, depth, i, ppos, alloc; fs = ip->i_e2fs; eh = ext4_ext_inode_header(ip); depth = ext4_ext_inode_depth(ip); ppos = 0; alloc = 0; error = ext4_ext_check_header(ip, eh); if (error) return (error); if (ppath == NULL) return (EINVAL); path = *ppath; if (path == NULL) { path = malloc(EXT4_EXT_DEPTH_MAX * sizeof(struct ext4_extent_path), M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!path) return (ENOMEM); *ppath = path; alloc = 1; } path[0].ep_header = eh; path[0].ep_data = NULL; /* Walk through the tree. */ i = depth; while (i) { ext4_ext_binsearch_index(&path[ppos], block); blk = ext4_ext_index_pblock(path[ppos].ep_index); path[ppos].ep_depth = i; path[ppos].ep_ext = NULL; error = bread(ip->i_devvp, fsbtodb(ip->i_e2fs, blk), ip->i_e2fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto error; } ppos++; if (ppos > depth) { ext2_fserr(fs, ip->i_uid, "ppos > depth => extent corrupted"); error = EIO; brelse(bp); goto error; } ext4_ext_fill_path_bdata(&path[ppos], bp, blk); bqrelse(bp); eh = ext4_ext_block_header(path[ppos].ep_data); error = ext4_ext_check_header(ip, eh); if (error) goto error; path[ppos].ep_header = eh; i--; } error = ext4_ext_check_header(ip, eh); if (error) goto error; /* Find extent. */ path[ppos].ep_depth = i; path[ppos].ep_header = eh; path[ppos].ep_ext = NULL; path[ppos].ep_index = NULL; ext4_ext_binsearch_ext(&path[ppos], block); return (0); error: ext4_ext_drop_refs(path); if (alloc) free(path, M_EXT2EXTENTS); *ppath = NULL; return (error); } static inline int ext4_ext_space_root(struct inode *ip) { int size; size = sizeof(ip->i_data); size -= sizeof(struct ext4_extent_header); size /= sizeof(struct ext4_extent); return (size); } static inline int ext4_ext_space_block(struct inode *ip) { struct m_ext2fs *fs; int size; fs = ip->i_e2fs; size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent); return (size); } static inline int ext4_ext_space_block_index(struct inode *ip) { struct m_ext2fs *fs; int size; fs = ip->i_e2fs; size = (fs->e2fs_bsize - sizeof(struct ext4_extent_header)) / sizeof(struct ext4_extent_index); return (size); } void ext4_ext_tree_init(struct inode *ip) { struct ext4_extent_header *ehp; ip->i_flag |= IN_E4EXTENTS; memset(ip->i_data, 0, EXT2_NDADDR + EXT2_NIADDR); ehp = (struct ext4_extent_header *)ip->i_data; ehp->eh_magic = EXT4_EXT_MAGIC; ehp->eh_max = ext4_ext_space_root(ip); ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } static inline void ext4_ext_put_in_cache(struct inode *ip, uint32_t blk, uint32_t len, uint32_t start, int type) { KASSERT(len != 0, ("ext4_ext_put_in_cache: bad input")); ip->i_ext_cache.ec_type = type; ip->i_ext_cache.ec_blk = blk; ip->i_ext_cache.ec_len = len; ip->i_ext_cache.ec_start = start; } static e4fs_daddr_t ext4_ext_blkpref(struct inode *ip, struct ext4_extent_path *path, e4fs_daddr_t block) { struct m_ext2fs *fs; struct ext4_extent *ex; e4fs_daddr_t bg_start; int depth; fs = ip->i_e2fs; if (path) { depth = path->ep_depth; ex = path[depth].ep_ext; if (ex) { e4fs_daddr_t pblk = ext4_ext_extent_pblock(ex); e2fs_daddr_t blk = ex->e_blk; if (block > blk) return (pblk + (block - blk)); else return (pblk - (blk - block)); } /* Try to get block from index itself. */ if (path[depth].ep_data) return (path[depth].ep_blk); } /* Use inode's group. */ bg_start = (ip->i_block_group * EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + fs->e2fs->e2fs_first_dblock; return (bg_start + block); } static int inline ext4_can_extents_be_merged(struct ext4_extent *ex1, struct ext4_extent *ex2) { if (ex1->e_blk + ex1->e_len != ex2->e_blk) return (0); if (ex1->e_len + ex2->e_len > EXT4_MAX_LEN) return (0); if (ext4_ext_extent_pblock(ex1) + ex1->e_len == ext4_ext_extent_pblock(ex2)) return (1); return (0); } static unsigned ext4_ext_next_leaf_block(struct inode *ip, struct ext4_extent_path *path) { int depth = path->ep_depth; /* Empty tree */ if (depth == 0) return (EXT4_MAX_BLOCKS); /* Go to indexes. */ depth--; while (depth >= 0) { if (path[depth].ep_index != EXT_LAST_INDEX(path[depth].ep_header)) return (path[depth].ep_index[1].ei_blk); depth--; } return (EXT4_MAX_BLOCKS); } static int ext4_ext_dirty(struct inode *ip, struct ext4_extent_path *path) { struct m_ext2fs *fs; struct buf *bp; uint64_t blk; int error; fs = ip->i_e2fs; if (!path) return (EINVAL); if (path->ep_data) { blk = path->ep_blk; bp = getblk(ip->i_devvp, fsbtodb(fs, blk), fs->e2fs_bsize, 0, 0, 0); if (!bp) return (EIO); ext4_ext_fill_path_buf(path, bp); error = bwrite(bp); } else { ip->i_flag |= IN_CHANGE | IN_UPDATE; error = ext2_update(ip->i_vnode, 1); } return (error); } static int ext4_ext_insert_index(struct inode *ip, struct ext4_extent_path *path, uint32_t lblk, e4fs_daddr_t blk) { struct m_ext2fs *fs; struct ext4_extent_index *idx; int len; fs = ip->i_e2fs; if (lblk == path->ep_index->ei_blk) { ext2_fserr(fs, ip->i_uid, "lblk == index blk => extent corrupted"); return (EIO); } if (path->ep_header->eh_ecount >= path->ep_header->eh_max) { ext2_fserr(fs, ip->i_uid, "ecout > maxcount => extent corrupted"); return (EIO); } if (lblk > path->ep_index->ei_blk) { /* Insert after. */ idx = path->ep_index + 1; } else { /* Insert before. */ idx = path->ep_index; } len = EXT_LAST_INDEX(path->ep_header) - idx + 1; if (len > 0) memmove(idx + 1, idx, len * sizeof(struct ext4_extent_index)); if (idx > EXT_MAX_INDEX(path->ep_header)) { ext2_fserr(fs, ip->i_uid, "index is out of range => extent corrupted"); return (EIO); } idx->ei_blk = lblk; ext4_index_store_pblock(idx, blk); path->ep_header->eh_ecount++; return (ext4_ext_dirty(ip, path)); } static e4fs_daddr_t ext4_ext_alloc_meta(struct inode *ip) { e4fs_daddr_t blk = ext2_alloc_meta(ip); if (blk) { ip->i_blocks += btodb(ip->i_e2fs->e2fs_bsize); ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } return (blk); } static void ext4_ext_blkfree(struct inode *ip, uint64_t blk, int count, int flags) { struct m_ext2fs *fs; int i, blocksreleased; fs = ip->i_e2fs; blocksreleased = count; for(i = 0; i < count; i++) ext2_blkfree(ip, blk + i, fs->e2fs_bsize); if (ip->i_blocks >= blocksreleased) ip->i_blocks -= (btodb(fs->e2fs_bsize)*blocksreleased); else ip->i_blocks = 0; ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } static int ext4_ext_split(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext, int at) { struct m_ext2fs *fs; struct buf *bp; int depth = ext4_ext_inode_depth(ip); struct ext4_extent_header *neh; struct ext4_extent_index *fidx; struct ext4_extent *ex; int i = at, k, m, a; e4fs_daddr_t newblk, oldblk; uint32_t border; e4fs_daddr_t *ablks = NULL; int error = 0; fs = ip->i_e2fs; bp = NULL; /* * We will split at current extent for now. */ if (path[depth].ep_ext > EXT_MAX_EXTENT(path[depth].ep_header)) { ext2_fserr(fs, ip->i_uid, "extent is out of range => extent corrupted"); return (EIO); } if (path[depth].ep_ext != EXT_MAX_EXTENT(path[depth].ep_header)) border = path[depth].ep_ext[1].e_blk; else border = newext->e_blk; /* Allocate new blocks. */ ablks = malloc(sizeof(e4fs_daddr_t) * depth, M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!ablks) return (ENOMEM); for (a = 0; a < depth - at; a++) { newblk = ext4_ext_alloc_meta(ip); if (newblk == 0) goto cleanup; ablks[a] = newblk; } newblk = ablks[--a]; bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); if (!bp) { error = EIO; goto cleanup; } neh = ext4_ext_block_header(bp->b_data); neh->eh_ecount = 0; neh->eh_max = ext4_ext_space_block(ip); neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_depth = 0; ex = EXT_FIRST_EXTENT(neh); if (path[depth].ep_header->eh_ecount != path[depth].ep_header->eh_max) { ext2_fserr(fs, ip->i_uid, "extents count out of range => extent corrupted"); error = EIO; goto cleanup; } /* Start copy from next extent. */ m = 0; path[depth].ep_ext++; while (path[depth].ep_ext <= EXT_MAX_EXTENT(path[depth].ep_header)) { path[depth].ep_ext++; m++; } if (m) { memmove(ex, path[depth].ep_ext - m, sizeof(struct ext4_extent) * m); neh->eh_ecount = neh->eh_ecount + m; } bwrite(bp); bp = NULL; /* Fix old leaf. */ if (m) { path[depth].ep_header->eh_ecount = path[depth].ep_header->eh_ecount - m; ext4_ext_dirty(ip, path + depth); } /* Create intermediate indexes. */ k = depth - at - 1; KASSERT(k >= 0, ("ext4_ext_split: negative k")); /* Insert new index into current index block. */ i = depth - 1; while (k--) { oldblk = newblk; newblk = ablks[--a]; error = bread(ip->i_devvp, fsbtodb(fs, newblk), (int)fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); goto cleanup; } neh = (struct ext4_extent_header *)bp->b_data; neh->eh_ecount = 1; neh->eh_magic = EXT4_EXT_MAGIC; neh->eh_max = ext4_ext_space_block_index(ip); neh->eh_depth = depth - i; fidx = EXT_FIRST_INDEX(neh); fidx->ei_blk = border; ext4_index_store_pblock(fidx, oldblk); m = 0; path[i].ep_index++; while (path[i].ep_index <= EXT_MAX_INDEX(path[i].ep_header)) { path[i].ep_index++; m++; } if (m) { memmove(++fidx, path[i].ep_index - m, sizeof(struct ext4_extent_index) * m); neh->eh_ecount = neh->eh_ecount + m; } bwrite(bp); bp = NULL; /* Fix old index. */ if (m) { path[i].ep_header->eh_ecount = path[i].ep_header->eh_ecount - m; ext4_ext_dirty(ip, path + i); } i--; } error = ext4_ext_insert_index(ip, path + at, border, newblk); cleanup: if (bp) brelse(bp); if (error) { for (i = 0; i < depth; i++) { if (!ablks[i]) continue; ext4_ext_blkfree(ip, ablks[i], 1, 0); } } free(ablks, M_EXT2EXTENTS); return (error); } static int ext4_ext_grow_indepth(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct m_ext2fs *fs; struct ext4_extent_path *curpath; struct ext4_extent_header *neh; struct ext4_extent_index *fidx; struct buf *bp; e4fs_daddr_t newblk; int error = 0; fs = ip->i_e2fs; curpath = path; newblk = ext4_ext_alloc_meta(ip); if (newblk == 0) return (error); bp = getblk(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, 0, 0, 0); if (!bp) return (EIO); /* Move top-level index/leaf into new block. */ memmove(bp->b_data, curpath->ep_header, sizeof(ip->i_data)); /* Set size of new block */ neh = ext4_ext_block_header(bp->b_data); neh->eh_magic = EXT4_EXT_MAGIC; if (ext4_ext_inode_depth(ip)) neh->eh_max = ext4_ext_space_block_index(ip); else neh->eh_max = ext4_ext_space_block(ip); error = bwrite(bp); if (error) goto out; bp = NULL; curpath->ep_header->eh_magic = EXT4_EXT_MAGIC; curpath->ep_header->eh_max = ext4_ext_space_root(ip); curpath->ep_header->eh_ecount = 1; curpath->ep_index = EXT_FIRST_INDEX(curpath->ep_header); curpath->ep_index->ei_blk = EXT_FIRST_EXTENT(path[0].ep_header)->e_blk; ext4_index_store_pblock(curpath->ep_index, newblk); neh = ext4_ext_inode_header(ip); fidx = EXT_FIRST_INDEX(neh); neh->eh_depth = path->ep_depth + 1; ext4_ext_dirty(ip, curpath); out: brelse(bp); return (error); } static int ext4_ext_create_new_leaf(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct m_ext2fs *fs; struct ext4_extent_path *curpath; int depth, i, error; fs = ip->i_e2fs; repeat: i = depth = ext4_ext_inode_depth(ip); /* Look for free index entry int the tree */ curpath = path + depth; while (i > 0 && !EXT_HAS_FREE_INDEX(curpath)) { i--; curpath--; } /* * We use already allocated block for index block, * so subsequent data blocks should be contiguous. */ if (EXT_HAS_FREE_INDEX(curpath)) { error = ext4_ext_split(ip, path, newext, i); if (error) goto out; /* Refill path. */ ext4_ext_drop_refs(path); error = ext4_ext_find_extent(ip, newext->e_blk, &path); if (error) goto out; } else { /* Tree is full, do grow in depth. */ error = ext4_ext_grow_indepth(ip, path, newext); if (error) goto out; /* Refill path. */ ext4_ext_drop_refs(path); error = ext4_ext_find_extent(ip, newext->e_blk, &path); if (error) goto out; /* Check and split tree if required. */ depth = ext4_ext_inode_depth(ip); if (path[depth].ep_header->eh_ecount == path[depth].ep_header->eh_max) goto repeat; } out: return (error); } static int ext4_ext_correct_indexes(struct inode *ip, struct ext4_extent_path *path) { struct ext4_extent_header *eh; struct ext4_extent *ex; int32_t border; int depth, k; depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; ex = path[depth].ep_ext; if (ex == NULL || eh == NULL) return (EIO); if (!depth) return (0); /* We will correct tree if first leaf got modified only. */ if (ex != EXT_FIRST_EXTENT(eh)) return (0); k = depth - 1; border = path[depth].ep_ext->e_blk; path[k].ep_index->ei_blk = border; ext4_ext_dirty(ip, path + k); while (k--) { /* Change all left-side indexes. */ if (path[k+1].ep_index != EXT_FIRST_INDEX(path[k+1].ep_header)) break; path[k].ep_index->ei_blk = border; ext4_ext_dirty(ip, path + k); } return (0); } static int ext4_ext_insert_extent(struct inode *ip, struct ext4_extent_path *path, struct ext4_extent *newext) { struct m_ext2fs *fs; struct ext4_extent_header * eh; struct ext4_extent *ex, *nex, *nearex; struct ext4_extent_path *npath; int depth, len, error, next; fs = ip->i_e2fs; depth = ext4_ext_inode_depth(ip); ex = path[depth].ep_ext; npath = NULL; if (newext->e_len == 0 || path[depth].ep_header == NULL) return (EINVAL); /* Insert block into found extent. */ if (ex && ext4_can_extents_be_merged(ex, newext)) { ex->e_len = ex->e_len + newext->e_len; eh = path[depth].ep_header; nearex = ex; goto merge; } repeat: depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; if (eh->eh_ecount < eh->eh_max) goto has_space; /* Try next leaf */ nex = EXT_LAST_EXTENT(eh); next = ext4_ext_next_leaf_block(ip, path); if (newext->e_blk > nex->e_blk && next != EXT4_MAX_BLOCKS) { KASSERT(npath == NULL, ("ext4_ext_insert_extent: bad path")); error = ext4_ext_find_extent(ip, next, &npath); if (error) goto cleanup; if (npath->ep_depth != path->ep_depth) { error = EIO; goto cleanup; } eh = npath[depth].ep_header; if (eh->eh_ecount < eh->eh_max) { path = npath; goto repeat; } } /* * There is no free space in the found leaf, * try to add a new leaf to the tree. */ error = ext4_ext_create_new_leaf(ip, path, newext); if (error) goto cleanup; depth = ext4_ext_inode_depth(ip); eh = path[depth].ep_header; has_space: nearex = path[depth].ep_ext; if (!nearex) { /* Create new extent in the leaf. */ path[depth].ep_ext = EXT_FIRST_EXTENT(eh); } else if (newext->e_blk > nearex->e_blk) { if (nearex != EXT_LAST_EXTENT(eh)) { len = EXT_MAX_EXTENT(eh) - nearex; len = (len - 1) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; memmove(nearex + 2, nearex + 1, len); } path[depth].ep_ext = nearex + 1; } else { len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent); len = len < 0 ? 0 : len; memmove(nearex + 1, nearex, len); path[depth].ep_ext = nearex; } eh->eh_ecount = eh->eh_ecount + 1; nearex = path[depth].ep_ext; nearex->e_blk = newext->e_blk; nearex->e_start_lo = newext->e_start_lo; nearex->e_start_hi = newext->e_start_hi; nearex->e_len = newext->e_len; merge: /* Try to merge extents to the right. */ while (nearex < EXT_LAST_EXTENT(eh)) { if (!ext4_can_extents_be_merged(nearex, nearex + 1)) break; /* Merge with next extent. */ nearex->e_len = nearex->e_len + nearex[1].e_len; if (nearex + 1 < EXT_LAST_EXTENT(eh)) { len = (EXT_LAST_EXTENT(eh) - nearex - 1) * sizeof(struct ext4_extent); memmove(nearex + 1, nearex + 2, len); } eh->eh_ecount = eh->eh_ecount - 1; KASSERT(eh->eh_ecount != 0, ("ext4_ext_insert_extent: bad ecount")); } /* * Try to merge extents to the left, * start from inexes correction. */ error = ext4_ext_correct_indexes(ip, path); if (error) goto cleanup; ext4_ext_dirty(ip, path + depth); cleanup: if (npath) { ext4_ext_drop_refs(npath); free(npath, M_EXT2EXTENTS); } ip->i_ext_cache.ec_type = EXT4_EXT_CACHE_NO; return (error); } static e4fs_daddr_t ext4_new_blocks(struct inode *ip, daddr_t lbn, e4fs_daddr_t pref, struct ucred *cred, unsigned long *count, int *perror) { struct m_ext2fs *fs; struct ext2mount *ump; e4fs_daddr_t newblk; fs = ip->i_e2fs; ump = ip->i_ump; /* * We will allocate only single block for now. */ if (*count > 1) return (0); EXT2_LOCK(ip->i_ump); *perror = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newblk); if (*perror) return (0); if (newblk) { ip->i_flag |= IN_CHANGE | IN_UPDATE; ext2_update(ip->i_vnode, 1); } return (newblk); } int ext4_ext_get_blocks(struct inode *ip, e4fs_daddr_t iblk, unsigned long max_blocks, struct ucred *cred, struct buf **bpp, int *pallocated, uint32_t *nb) { struct m_ext2fs *fs; struct buf *bp = NULL; struct ext4_extent_path *path; struct ext4_extent newex, *ex; e4fs_daddr_t bpref, newblk = 0; unsigned long allocated = 0; int error = 0, depth; fs = ip->i_e2fs; *pallocated = 0; path = NULL; if(bpp) *bpp = NULL; /* Check cache. */ if ((bpref = ext4_ext_in_cache(ip, iblk, &newex))) { if (bpref == EXT4_EXT_CACHE_IN) { /* Block is already allocated. */ newblk = iblk - newex.e_blk + ext4_ext_extent_pblock(&newex); allocated = newex.e_len - (iblk - newex.e_blk); goto out; } else { error = EIO; goto out2; } } error = ext4_ext_find_extent(ip, iblk, &path); if (error) { goto out2; } depth = ext4_ext_inode_depth(ip); if (path[depth].ep_ext == NULL && depth != 0) { error = EIO; goto out2; } if ((ex = path[depth].ep_ext)) { uint64_t lblk = ex->e_blk; uint16_t e_len = ex->e_len; e4fs_daddr_t e_start = ext4_ext_extent_pblock(ex); if (e_len > EXT4_MAX_LEN) goto out2; /* If we found extent covers block, simply return it. */ if (iblk >= lblk && iblk < lblk + e_len) { newblk = iblk - lblk + e_start; allocated = e_len - (iblk - lblk); ext4_ext_put_in_cache(ip, lblk, e_len, e_start, EXT4_EXT_CACHE_IN); goto out; } } /* Allocate the new block. */ if (S_ISREG(ip->i_mode) && (!ip->i_next_alloc_block)) { ip->i_next_alloc_goal = 0; } bpref = ext4_ext_blkpref(ip, path, iblk); allocated = max_blocks; newblk = ext4_new_blocks(ip, iblk, bpref, cred, &allocated, &error); if (!newblk) goto out2; /* Try to insert new extent into found leaf and return. */ newex.e_blk = iblk; ext4_ext_store_pblock(&newex, newblk); newex.e_len = allocated; error = ext4_ext_insert_extent(ip, path, &newex); if (error) goto out2; newblk = ext4_ext_extent_pblock(&newex); ext4_ext_put_in_cache(ip, iblk, allocated, newblk, EXT4_EXT_CACHE_IN); *pallocated = 1; out: if (allocated > max_blocks) allocated = max_blocks; if (bpp) { error = bread(ip->i_devvp, fsbtodb(fs, newblk), fs->e2fs_bsize, cred, &bp); if (error) { brelse(bp); } else { *bpp = bp; } } out2: if (path) { ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); } if (nb) *nb = newblk; return (error); } static inline uint16_t ext4_ext_get_actual_len(struct ext4_extent *ext) { return (ext->e_len <= EXT_INIT_MAX_LEN ? ext->e_len : (ext->e_len - EXT_INIT_MAX_LEN)); } static inline struct ext4_extent_header * ext4_ext_header(struct inode *ip) { return (struct ext4_extent_header *)ip->i_db; } static int ext4_remove_blocks(struct inode *ip, struct ext4_extent *ex, unsigned long from, unsigned long to) { unsigned long num, start; if (from >= ex->e_blk && to == ex->e_blk + ext4_ext_get_actual_len(ex) - 1) { /* Tail cleanup. */ num = ex->e_blk + ext4_ext_get_actual_len(ex) - from; start = ext4_ext_extent_pblock(ex) + ext4_ext_get_actual_len(ex) - num; ext4_ext_blkfree(ip, start, num, 0); } return (0); } static int ext4_ext_rm_index(struct inode *ip, struct ext4_extent_path *path) { e4fs_daddr_t leaf; /* Free index block. */ path--; leaf = ext4_ext_index_pblock(path->ep_index); KASSERT(path->ep_header->eh_ecount != 0, ("ext4_ext_rm_index: bad ecount")); path->ep_header->eh_ecount--; ext4_ext_dirty(ip, path); ext4_ext_blkfree(ip, leaf, 1, 0); return (0); } static int ext4_ext_rm_leaf(struct inode *ip, struct ext4_extent_path *path, uint64_t start) { struct m_ext2fs *fs; int depth; struct ext4_extent_header *eh; unsigned int a, b, block, num; unsigned long ex_blk; unsigned short ex_len; struct ext4_extent *ex; int error, correct_index; fs = ip->i_e2fs; depth = ext4_ext_inode_depth(ip); correct_index = 0; if (!path[depth].ep_header) { if (path[depth].ep_data == NULL) return (EINVAL); path[depth].ep_header = (struct ext4_extent_header* )path[depth].ep_data; } eh = path[depth].ep_header; if (!eh) { ext2_fserr(fs, ip->i_uid, "bad header => extent corrupted"); return (EIO); } ex = EXT_LAST_EXTENT(eh); ex_blk = ex->e_blk; ex_len = ext4_ext_get_actual_len(ex); while (ex >= EXT_FIRST_EXTENT(eh) && ex_blk + ex_len > start) { path[depth].ep_ext = ex; a = ex_blk > start ? ex_blk : start; b = (uint64_t)ex_blk + ex_len - 1 < EXT4_MAX_BLOCKS ? ex_blk + ex_len - 1 : EXT4_MAX_BLOCKS; if (a != ex_blk && b != ex_blk + ex_len - 1) return (EINVAL); else if (a != ex_blk) { /* Remove tail of the extent. */ block = ex_blk; num = a - block; } else if (b != ex_blk + ex_len - 1) { /* Remove head of the extent, not implemented. */ return (EINVAL); } else { /* Remove whole extent. */ block = ex_blk; num = 0; } if (ex == EXT_FIRST_EXTENT(eh)) correct_index = 1; error = ext4_remove_blocks(ip, ex, a, b); if (error) goto out; if (num == 0) { ext4_ext_store_pblock(ex, 0); eh->eh_ecount--; } ex->e_blk = block; ex->e_len = num; ext4_ext_dirty(ip, path + depth); ex--; ex_blk = ex->e_blk; ex_len = ext4_ext_get_actual_len(ex); }; if (correct_index && eh->eh_ecount) error = ext4_ext_correct_indexes(ip, path); /* * If this leaf is free, we should * remove it from index block above. */ if (error == 0 && eh->eh_ecount == 0 && path[depth].ep_data != NULL) error = ext4_ext_rm_index(ip, path + depth); out: return (error); } static struct buf * ext4_read_extent_tree_block(struct inode *ip, e4fs_daddr_t pblk, int depth, int flags) { struct m_ext2fs *fs; struct ext4_extent_header *eh; struct buf *bp; int error; fs = ip->i_e2fs; error = bread(ip->i_devvp, fsbtodb(fs, pblk), fs->e2fs_bsize, NOCRED, &bp); if (error) { brelse(bp); return (NULL); } eh = ext4_ext_block_header(bp->b_data); if (eh->eh_depth != depth) { ext2_fserr(fs, ip->i_uid, "unexpected eh_depth"); goto err; } error = ext4_ext_check_header(ip, eh); if (error) goto err; return (bp); err: brelse(bp); return (NULL); } static int inline ext4_ext_more_to_rm(struct ext4_extent_path *path) { KASSERT(path->ep_index != NULL, ("ext4_ext_more_to_rm: bad index from path")); if (path->ep_index < EXT_FIRST_INDEX(path->ep_header)) return (0); if (path->ep_header->eh_ecount == path->index_count) return (0); return (1); } int ext4_ext_remove_space(struct inode *ip, off_t length, int flags, struct ucred *cred, struct thread *td) { struct buf *bp; struct ext4_extent_header *ehp; struct ext4_extent_path *path; int depth; int i, error; ehp = (struct ext4_extent_header *)ip->i_db; depth = ext4_ext_inode_depth(ip); error = ext4_ext_check_header(ip, ehp); if(error) return (error); path = malloc(sizeof(struct ext4_extent_path) * (depth + 1), M_EXT2EXTENTS, M_WAITOK | M_ZERO); if (!path) return (ENOMEM); i = 0; path[0].ep_header = ehp; path[0].ep_depth = depth; while (i >= 0 && error == 0) { if (i == depth) { /* This is leaf. */ error = ext4_ext_rm_leaf(ip, path, length); if (error) break; free(path[i].ep_data, M_EXT2EXTENTS); path[i].ep_data = NULL; i--; continue; } /* This is index. */ if (!path[i].ep_header) path[i].ep_header = (struct ext4_extent_header *)path[i].ep_data; if (!path[i].ep_index) { /* This level hasn't touched yet. */ path[i].ep_index = EXT_LAST_INDEX(path[i].ep_header); path[i].index_count = path[i].ep_header->eh_ecount + 1; } else { /* We've already was here, see at next index. */ path[i].ep_index--; } if (ext4_ext_more_to_rm(path + i)) { memset(path + i + 1, 0, sizeof(*path)); bp = ext4_read_extent_tree_block(ip, ext4_ext_index_pblock(path[i].ep_index), path[0].ep_depth - (i + 1), 0); if (!bp) { error = EIO; break; } ext4_ext_fill_path_bdata(&path[i+1], bp, ext4_ext_index_pblock(path[i].ep_index)); brelse(bp); path[i].index_count = path[i].ep_header->eh_ecount; i++; } else { if (path[i].ep_header->eh_ecount == 0 && i > 0) { /* Index is empty, remove it. */ error = ext4_ext_rm_index(ip, path + i); } free(path[i].ep_data, M_EXT2EXTENTS); path[i].ep_data = NULL; i--; } } if (path->ep_header->eh_ecount == 0) { /* * Truncate the tree to zero. */ ext4_ext_header(ip)->eh_depth = 0; ext4_ext_header(ip)->eh_max = ext4_ext_space_root(ip); ext4_ext_dirty(ip, path); } ext4_ext_drop_refs(path); free(path, M_EXT2EXTENTS); return (error); } Index: head/sys/fs/ext2fs/ext2_extents.h =================================================================== --- head/sys/fs/ext2fs/ext2_extents.h (revision 326267) +++ head/sys/fs/ext2fs/ext2_extents.h (revision 326268) @@ -1,126 +1,128 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2012, 2010 Zheng Liu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_EXT2FS_EXT2_EXTENTS_H_ #define _FS_EXT2FS_EXT2_EXTENTS_H_ #include #define EXT4_EXT_MAGIC 0xf30a #define EXT4_MAX_BLOCKS 0xffffffff #define EXT_INIT_MAX_LEN (1UL << 15) #define EXT4_MAX_LEN (EXT_INIT_MAX_LEN - 1) #define EXT4_EXT_DEPTH_MAX 5 #define EXT4_EXT_CACHE_NO 0 #define EXT4_EXT_CACHE_GAP 1 #define EXT4_EXT_CACHE_IN 2 /* * Ext4 file system extent on disk. */ struct ext4_extent { uint32_t e_blk; /* first logical block */ uint16_t e_len; /* number of blocks */ uint16_t e_start_hi; /* high 16 bits of physical block */ uint32_t e_start_lo; /* low 32 bits of physical block */ }; /* * Extent index on disk. */ struct ext4_extent_index { uint32_t ei_blk; /* indexes logical blocks */ uint32_t ei_leaf_lo; /* points to physical block of the * next level */ uint16_t ei_leaf_hi; /* high 16 bits of physical block */ uint16_t ei_unused; }; /* * Extent tree header. */ struct ext4_extent_header { uint16_t eh_magic; /* magic number: 0xf30a */ uint16_t eh_ecount; /* number of valid entries */ uint16_t eh_max; /* capacity of store in entries */ uint16_t eh_depth; /* the depth of extent tree */ uint32_t eh_gen; /* generation of extent tree */ }; /* * Save cached extent. */ struct ext4_extent_cache { daddr_t ec_start; /* extent start */ uint32_t ec_blk; /* logical block */ uint32_t ec_len; uint32_t ec_type; }; /* * Save path to some extent. */ struct ext4_extent_path { int index_count; uint16_t ep_depth; uint64_t ep_blk; char *ep_data; struct ext4_extent *ep_ext; struct ext4_extent_index *ep_index; struct ext4_extent_header *ep_header; }; #define EXT_FIRST_EXTENT(hdr) ((struct ext4_extent *)(((char *)(hdr)) + \ sizeof(struct ext4_extent_header))) #define EXT_FIRST_INDEX(hdr) ((struct ext4_extent_index *)(((char *)(hdr)) + \ sizeof(struct ext4_extent_header))) #define EXT_LAST_EXTENT(hdr) (EXT_FIRST_EXTENT((hdr)) + (hdr)->eh_ecount - 1) #define EXT_LAST_INDEX(hdr) (EXT_FIRST_INDEX((hdr)) + (hdr)->eh_ecount - 1) #define EXT4_EXTENT_TAIL_OFFSET(hdr) (sizeof(struct ext4_extent_header) + \ (sizeof(struct ext4_extent) * (hdr)->eh_max)) #define EXT_HAS_FREE_INDEX(path) \ ((path)->ep_header->eh_ecount < (path)->ep_header->eh_max) #define EXT_MAX_EXTENT(hdr) (EXT_FIRST_EXTENT(hdr) + ((hdr)->eh_max) - 1) #define EXT_MAX_INDEX(hdr) (EXT_FIRST_INDEX((hdr)) + (hdr)->eh_max - 1) struct inode; struct m_ext2fs; void ext4_ext_tree_init(struct inode *ip); int ext4_ext_in_cache(struct inode *, daddr_t, struct ext4_extent *); void ext4_ext_put_cache(struct inode *, struct ext4_extent *, int); int ext4_ext_find_extent(struct inode *, daddr_t, struct ext4_extent_path **); void ext4_ext_path_free(struct ext4_extent_path *path); int ext4_ext_remove_space(struct inode *ip, off_t length, int flags, struct ucred *cred, struct thread *td); int ext4_ext_get_blocks(struct inode *ip, int64_t iblock, unsigned long max_blocks, struct ucred *cred, struct buf **bpp, int *allocate, uint32_t *); #ifdef EXT2FS_DEBUG void ext4_ext_print_extent_tree_status(struct inode * ip); #endif #endif /* !_FS_EXT2FS_EXT2_EXTENTS_H_ */ Index: head/sys/fs/ext2fs/ext2_hash.c =================================================================== --- head/sys/fs/ext2fs/ext2_hash.c (revision 326267) +++ head/sys/fs/ext2fs/ext2_hash.c (revision 326268) @@ -1,316 +1,318 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010, 2013 Zheng Liu * Copyright (c) 2012, Vyacheslav Matyushin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * The following notice applies to the code in ext2_half_md4(): * * Copyright (C) 1990-2, RSA Data Security, Inc. All rights reserved. * * License to copy and use this software is granted provided that it * is identified as the "RSA Data Security, Inc. MD4 Message-Digest * Algorithm" in all material mentioning or referencing this software * or this function. * * License is also granted to make and use derivative works provided * that such works are identified as "derived from the RSA Data * Security, Inc. MD4 Message-Digest Algorithm" in all material * mentioning or referencing the derived work. * * RSA Data Security, Inc. makes no representations concerning either * the merchantability of this software or the suitability of this * software for any particular purpose. It is provided "as is" * without express or implied warranty of any kind. * * These notices must be retained in any copies of any part of this * documentation and/or software. */ #include #include #include #include #include #include #include #include #include #include /* F, G, and H are MD4 functions */ #define F(x, y, z) (((x) & (y)) | ((~x) & (z))) #define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) #define H(x, y, z) ((x) ^ (y) ^ (z)) /* ROTATE_LEFT rotates x left n bits */ #define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) /* * FF, GG, and HH are transformations for rounds 1, 2, and 3. * Rotation is separated from addition to prevent recomputation. */ #define FF(a, b, c, d, x, s) { \ (a) += F ((b), (c), (d)) + (x); \ (a) = ROTATE_LEFT ((a), (s)); \ } #define GG(a, b, c, d, x, s) { \ (a) += G ((b), (c), (d)) + (x) + (uint32_t)0x5A827999; \ (a) = ROTATE_LEFT ((a), (s)); \ } #define HH(a, b, c, d, x, s) { \ (a) += H ((b), (c), (d)) + (x) + (uint32_t)0x6ED9EBA1; \ (a) = ROTATE_LEFT ((a), (s)); \ } /* * MD4 basic transformation. It transforms state based on block. * * This is a half md4 algorithm since Linux uses this algorithm for dir * index. This function is derived from the RSA Data Security, Inc. MD4 * Message-Digest Algorithm and was modified as necessary. * * The return value of this function is uint32_t in Linux, but actually we don't * need to check this value, so in our version this function doesn't return any * value. */ static void ext2_half_md4(uint32_t hash[4], uint32_t data[8]) { uint32_t a = hash[0], b = hash[1], c = hash[2], d = hash[3]; /* Round 1 */ FF(a, b, c, d, data[0], 3); FF(d, a, b, c, data[1], 7); FF(c, d, a, b, data[2], 11); FF(b, c, d, a, data[3], 19); FF(a, b, c, d, data[4], 3); FF(d, a, b, c, data[5], 7); FF(c, d, a, b, data[6], 11); FF(b, c, d, a, data[7], 19); /* Round 2 */ GG(a, b, c, d, data[1], 3); GG(d, a, b, c, data[3], 5); GG(c, d, a, b, data[5], 9); GG(b, c, d, a, data[7], 13); GG(a, b, c, d, data[0], 3); GG(d, a, b, c, data[2], 5); GG(c, d, a, b, data[4], 9); GG(b, c, d, a, data[6], 13); /* Round 3 */ HH(a, b, c, d, data[3], 3); HH(d, a, b, c, data[7], 9); HH(c, d, a, b, data[2], 11); HH(b, c, d, a, data[6], 15); HH(a, b, c, d, data[1], 3); HH(d, a, b, c, data[5], 9); HH(c, d, a, b, data[0], 11); HH(b, c, d, a, data[4], 15); hash[0] += a; hash[1] += b; hash[2] += c; hash[3] += d; } /* * Tiny Encryption Algorithm. */ static void ext2_tea(uint32_t hash[4], uint32_t data[8]) { uint32_t tea_delta = 0x9E3779B9; uint32_t sum; uint32_t x = hash[0], y = hash[1]; int n = 16; int i = 1; while (n-- > 0) { sum = i * tea_delta; x += ((y << 4) + data[0]) ^ (y + sum) ^ ((y >> 5) + data[1]); y += ((x << 4) + data[2]) ^ (x + sum) ^ ((x >> 5) + data[3]); i++; } hash[0] += x; hash[1] += y; } static uint32_t ext2_legacy_hash(const char *name, int len, int unsigned_char) { uint32_t h0, h1 = 0x12A3FE2D, h2 = 0x37ABE8F9; uint32_t multi = 0x6D22F5; const unsigned char *uname = (const unsigned char *)name; const signed char *sname = (const signed char *)name; int val, i; for (i = 0; i < len; i++) { if (unsigned_char) val = (u_int)*uname++; else val = (int)*sname++; h0 = h2 + (h1 ^ (val * multi)); if (h0 & 0x80000000) h0 -= 0x7FFFFFFF; h2 = h1; h1 = h0; } return (h1 << 1); } static void ext2_prep_hashbuf(const char *src, int slen, uint32_t *dst, int dlen, int unsigned_char) { uint32_t padding = slen | (slen << 8) | (slen << 16) | (slen << 24); uint32_t buf_val; const unsigned char *ubuf = (const unsigned char *)src; const signed char *sbuf = (const signed char *)src; int len, i; int buf_byte; if (slen > dlen) len = dlen; else len = slen; buf_val = padding; for (i = 0; i < len; i++) { if (unsigned_char) buf_byte = (u_int)ubuf[i]; else buf_byte = (int)sbuf[i]; if ((i % 4) == 0) buf_val = padding; buf_val <<= 8; buf_val += buf_byte; if ((i % 4) == 3) { *dst++ = buf_val; dlen -= sizeof(uint32_t); buf_val = padding; } } dlen -= sizeof(uint32_t); if (dlen >= 0) *dst++ = buf_val; dlen -= sizeof(uint32_t); while (dlen >= 0) { *dst++ = padding; dlen -= sizeof(uint32_t); } } int ext2_htree_hash(const char *name, int len, uint32_t *hash_seed, int hash_version, uint32_t *hash_major, uint32_t *hash_minor) { uint32_t hash[4]; uint32_t data[8]; uint32_t major = 0, minor = 0; int unsigned_char = 0; if (!name || !hash_major) return (-1); if (len < 1 || len > 255) goto error; hash[0] = 0x67452301; hash[1] = 0xEFCDAB89; hash[2] = 0x98BADCFE; hash[3] = 0x10325476; if (hash_seed) memcpy(hash, hash_seed, sizeof(hash)); switch (hash_version) { case EXT2_HTREE_TEA_UNSIGNED: unsigned_char = 1; /* FALLTHROUGH */ case EXT2_HTREE_TEA: while (len > 0) { ext2_prep_hashbuf(name, len, data, 16, unsigned_char); ext2_tea(hash, data); len -= 16; name += 16; } major = hash[0]; minor = hash[1]; break; case EXT2_HTREE_LEGACY_UNSIGNED: unsigned_char = 1; /* FALLTHROUGH */ case EXT2_HTREE_LEGACY: major = ext2_legacy_hash(name, len, unsigned_char); break; case EXT2_HTREE_HALF_MD4_UNSIGNED: unsigned_char = 1; /* FALLTHROUGH */ case EXT2_HTREE_HALF_MD4: while (len > 0) { ext2_prep_hashbuf(name, len, data, 32, unsigned_char); ext2_half_md4(hash, data); len -= 32; name += 32; } major = hash[1]; minor = hash[2]; break; default: goto error; } major &= ~1; if (major == (EXT2_HTREE_EOF << 1)) major = (EXT2_HTREE_EOF - 1) << 1; *hash_major = major; if (hash_minor) *hash_minor = minor; return (0); error: *hash_major = 0; if (hash_minor) *hash_minor = 0; return (-1); } Index: head/sys/fs/ext2fs/ext2_htree.c =================================================================== --- head/sys/fs/ext2fs/ext2_htree.c (revision 326267) +++ head/sys/fs/ext2fs/ext2_htree.c (revision 326268) @@ -1,901 +1,903 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010, 2012 Zheng Liu * Copyright (c) 2012, Vyacheslav Matyushin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void ext2_append_entry(char *block, uint32_t blksize, struct ext2fs_direct_2 *last_entry, struct ext2fs_direct_2 *new_entry); static int ext2_htree_append_block(struct vnode *vp, char *data, struct componentname *cnp, uint32_t blksize); static int ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name, struct ext2fs_htree_lookup_info *info); static int ext2_htree_cmp_sort_entry(const void *e1, const void *e2); static int ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen, uint32_t *hash, uint8_t *hash_version, struct ext2fs_htree_lookup_info *info); static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep); static uint16_t ext2_htree_get_count(struct ext2fs_htree_entry *ep); static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep); static uint16_t ext2_htree_get_limit(struct ext2fs_htree_entry *ep); static void ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level, uint32_t hash, uint32_t blk); static void ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info, uint32_t hash, uint32_t blk); static uint32_t ext2_htree_node_limit(struct inode *ip); static void ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk); static void ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt); static void ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash); static void ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit); static int ext2_htree_split_dirblock(char *block1, char *block2, uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version, uint32_t *split_hash, struct ext2fs_direct_2 *entry); static void ext2_htree_release(struct ext2fs_htree_lookup_info *info); static uint32_t ext2_htree_root_limit(struct inode *ip, int len); static int ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info); int ext2_htree_has_idx(struct inode *ip) { if (EXT2_HAS_COMPAT_FEATURE(ip->i_e2fs, EXT2F_COMPAT_DIRHASHINDEX) && ip->i_flag & IN_E3INDEX) return (1); else return (0); } static int ext2_htree_check_next(struct inode *ip, uint32_t hash, const char *name, struct ext2fs_htree_lookup_info *info) { struct vnode *vp = ITOV(ip); struct ext2fs_htree_lookup_level *level; struct buf *bp; uint32_t next_hash; int idx = info->h_levels_num - 1; int levels = 0; do { level = &info->h_levels[idx]; level->h_entry++; if (level->h_entry < level->h_entries + ext2_htree_get_count(level->h_entries)) break; if (idx == 0) return (0); idx--; levels++; } while (1); next_hash = ext2_htree_get_hash(level->h_entry); if ((hash & 1) == 0) { if (hash != (next_hash & ~1)) return (0); } while (levels > 0) { levels--; if (ext2_blkatoff(vp, ext2_htree_get_block(level->h_entry) * ip->i_e2fs->e2fs_bsize, NULL, &bp) != 0) return (0); level = &info->h_levels[idx + 1]; brelse(level->h_bp); level->h_bp = bp; level->h_entry = level->h_entries = ((struct ext2fs_htree_node *)bp->b_data)->h_entries; } return (1); } static uint32_t ext2_htree_get_block(struct ext2fs_htree_entry *ep) { return (ep->h_blk & 0x00FFFFFF); } static void ext2_htree_set_block(struct ext2fs_htree_entry *ep, uint32_t blk) { ep->h_blk = blk; } static uint16_t ext2_htree_get_count(struct ext2fs_htree_entry *ep) { return (((struct ext2fs_htree_count *)(ep))->h_entries_num); } static void ext2_htree_set_count(struct ext2fs_htree_entry *ep, uint16_t cnt) { ((struct ext2fs_htree_count *)(ep))->h_entries_num = cnt; } static uint32_t ext2_htree_get_hash(struct ext2fs_htree_entry *ep) { return (ep->h_hash); } static uint16_t ext2_htree_get_limit(struct ext2fs_htree_entry *ep) { return (((struct ext2fs_htree_count *)(ep))->h_entries_max); } static void ext2_htree_set_hash(struct ext2fs_htree_entry *ep, uint32_t hash) { ep->h_hash = hash; } static void ext2_htree_set_limit(struct ext2fs_htree_entry *ep, uint16_t limit) { ((struct ext2fs_htree_count *)(ep))->h_entries_max = limit; } static void ext2_htree_release(struct ext2fs_htree_lookup_info *info) { u_int i; for (i = 0; i < info->h_levels_num; i++) { struct buf *bp = info->h_levels[i].h_bp; if (bp != NULL) brelse(bp); } } static uint32_t ext2_htree_root_limit(struct inode *ip, int len) { uint32_t space; space = ip->i_e2fs->e2fs_bsize - EXT2_DIR_REC_LEN(1) - EXT2_DIR_REC_LEN(2) - len; return (space / sizeof(struct ext2fs_htree_entry)); } static uint32_t ext2_htree_node_limit(struct inode *ip) { struct m_ext2fs *fs; uint32_t space; fs = ip->i_e2fs; space = fs->e2fs_bsize - EXT2_DIR_REC_LEN(0); return (space / sizeof(struct ext2fs_htree_entry)); } static int ext2_htree_find_leaf(struct inode *ip, const char *name, int namelen, uint32_t *hash, uint8_t *hash_ver, struct ext2fs_htree_lookup_info *info) { struct vnode *vp; struct ext2fs *fs; struct m_ext2fs *m_fs; struct buf *bp = NULL; struct ext2fs_htree_root *rootp; struct ext2fs_htree_entry *entp, *start, *end, *middle, *found; struct ext2fs_htree_lookup_level *level_info; uint32_t hash_major = 0, hash_minor = 0; uint32_t levels, cnt; uint8_t hash_version; if (name == NULL || info == NULL) return (-1); vp = ITOV(ip); fs = ip->i_e2fs->e2fs; m_fs = ip->i_e2fs; if (ext2_blkatoff(vp, 0, NULL, &bp) != 0) return (-1); info->h_levels_num = 1; info->h_levels[0].h_bp = bp; rootp = (struct ext2fs_htree_root *)bp->b_data; if (rootp->h_info.h_hash_version != EXT2_HTREE_LEGACY && rootp->h_info.h_hash_version != EXT2_HTREE_HALF_MD4 && rootp->h_info.h_hash_version != EXT2_HTREE_TEA) goto error; hash_version = rootp->h_info.h_hash_version; if (hash_version <= EXT2_HTREE_TEA) hash_version += m_fs->e2fs_uhash; *hash_ver = hash_version; ext2_htree_hash(name, namelen, fs->e3fs_hash_seed, hash_version, &hash_major, &hash_minor); *hash = hash_major; if ((levels = rootp->h_info.h_ind_levels) > 1) goto error; entp = (struct ext2fs_htree_entry *)(((char *)&rootp->h_info) + rootp->h_info.h_info_len); if (ext2_htree_get_limit(entp) != ext2_htree_root_limit(ip, rootp->h_info.h_info_len)) goto error; while (1) { cnt = ext2_htree_get_count(entp); if (cnt == 0 || cnt > ext2_htree_get_limit(entp)) goto error; start = entp + 1; end = entp + cnt - 1; while (start <= end) { middle = start + (end - start) / 2; if (ext2_htree_get_hash(middle) > hash_major) end = middle - 1; else start = middle + 1; } found = start - 1; level_info = &(info->h_levels[info->h_levels_num - 1]); level_info->h_bp = bp; level_info->h_entries = entp; level_info->h_entry = found; if (levels == 0) return (0); levels--; if (ext2_blkatoff(vp, ext2_htree_get_block(found) * m_fs->e2fs_bsize, NULL, &bp) != 0) goto error; entp = ((struct ext2fs_htree_node *)bp->b_data)->h_entries; info->h_levels_num++; info->h_levels[info->h_levels_num - 1].h_bp = bp; } error: ext2_htree_release(info); return (-1); } /* * Try to lookup a directory entry in HTree index */ int ext2_htree_lookup(struct inode *ip, const char *name, int namelen, struct buf **bpp, int *entryoffp, doff_t *offp, doff_t *prevoffp, doff_t *endusefulp, struct ext2fs_searchslot *ss) { struct vnode *vp; struct ext2fs_htree_lookup_info info; struct ext2fs_htree_entry *leaf_node; struct m_ext2fs *m_fs; struct buf *bp; uint32_t blk; uint32_t dirhash; uint32_t bsize; uint8_t hash_version; int search_next; int found = 0; m_fs = ip->i_e2fs; bsize = m_fs->e2fs_bsize; vp = ITOV(ip); /* TODO: print error msg because we don't lookup '.' and '..' */ memset(&info, 0, sizeof(info)); if (ext2_htree_find_leaf(ip, name, namelen, &dirhash, &hash_version, &info)) return (-1); do { leaf_node = info.h_levels[info.h_levels_num - 1].h_entry; blk = ext2_htree_get_block(leaf_node); if (ext2_blkatoff(vp, blk * bsize, NULL, &bp) != 0) { ext2_htree_release(&info); return (-1); } *offp = blk * bsize; *entryoffp = 0; *prevoffp = blk * bsize; *endusefulp = blk * bsize; if (ss->slotstatus == NONE) { ss->slotoffset = -1; ss->slotfreespace = 0; } if (ext2_search_dirblock(ip, bp->b_data, &found, name, namelen, entryoffp, offp, prevoffp, endusefulp, ss) != 0) { brelse(bp); ext2_htree_release(&info); return (-1); } if (found) { *bpp = bp; ext2_htree_release(&info); return (0); } brelse(bp); search_next = ext2_htree_check_next(ip, dirhash, name, &info); } while (search_next); ext2_htree_release(&info); return (ENOENT); } static int ext2_htree_append_block(struct vnode *vp, char *data, struct componentname *cnp, uint32_t blksize) { struct iovec aiov; struct uio auio; struct inode *dp = VTOI(vp); uint64_t cursize, newsize; int error; cursize = roundup(dp->i_size, blksize); newsize = cursize + blksize; auio.uio_offset = cursize; auio.uio_resid = blksize; aiov.iov_len = blksize; aiov.iov_base = data; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_rw = UIO_WRITE; auio.uio_segflg = UIO_SYSSPACE; error = VOP_WRITE(vp, &auio, IO_SYNC, cnp->cn_cred); if (!error) dp->i_size = newsize; return (error); } static int ext2_htree_writebuf(struct ext2fs_htree_lookup_info *info) { int i, error; for (i = 0; i < info->h_levels_num; i++) { struct buf *bp = info->h_levels[i].h_bp; error = bwrite(bp); if (error) return (error); } return (0); } static void ext2_htree_insert_entry_to_level(struct ext2fs_htree_lookup_level *level, uint32_t hash, uint32_t blk) { struct ext2fs_htree_entry *target; int entries_num; target = level->h_entry + 1; entries_num = ext2_htree_get_count(level->h_entries); memmove(target + 1, target, (char *)(level->h_entries + entries_num) - (char *)target); ext2_htree_set_block(target, blk); ext2_htree_set_hash(target, hash); ext2_htree_set_count(level->h_entries, entries_num + 1); } /* * Insert an index entry to the index node. */ static void ext2_htree_insert_entry(struct ext2fs_htree_lookup_info *info, uint32_t hash, uint32_t blk) { struct ext2fs_htree_lookup_level *level; level = &info->h_levels[info->h_levels_num - 1]; ext2_htree_insert_entry_to_level(level, hash, blk); } /* * Compare two entry sort descriptors by name hash value. * This is used together with qsort. */ static int ext2_htree_cmp_sort_entry(const void *e1, const void *e2) { const struct ext2fs_htree_sort_entry *entry1, *entry2; entry1 = (const struct ext2fs_htree_sort_entry *)e1; entry2 = (const struct ext2fs_htree_sort_entry *)e2; if (entry1->h_hash < entry2->h_hash) return (-1); if (entry1->h_hash > entry2->h_hash) return (1); return (0); } /* * Append an entry to the end of the directory block. */ static void ext2_append_entry(char *block, uint32_t blksize, struct ext2fs_direct_2 *last_entry, struct ext2fs_direct_2 *new_entry) { uint16_t entry_len; entry_len = EXT2_DIR_REC_LEN(last_entry->e2d_namlen); last_entry->e2d_reclen = entry_len; last_entry = (struct ext2fs_direct_2 *)((char *)last_entry + entry_len); new_entry->e2d_reclen = block + blksize - (char *)last_entry; memcpy(last_entry, new_entry, EXT2_DIR_REC_LEN(new_entry->e2d_namlen)); } /* * Move half of entries from the old directory block to the new one. */ static int ext2_htree_split_dirblock(char *block1, char *block2, uint32_t blksize, uint32_t *hash_seed, uint8_t hash_version, uint32_t *split_hash, struct ext2fs_direct_2 *entry) { int entry_cnt = 0; int size = 0; int i, k; uint32_t offset; uint16_t entry_len = 0; uint32_t entry_hash; struct ext2fs_direct_2 *ep, *last; char *dest; struct ext2fs_htree_sort_entry *sort_info; ep = (struct ext2fs_direct_2 *)block1; dest = block2; sort_info = (struct ext2fs_htree_sort_entry *) ((char *)block2 + blksize); /* * Calculate name hash value for the entry which is to be added. */ ext2_htree_hash(entry->e2d_name, entry->e2d_namlen, hash_seed, hash_version, &entry_hash, NULL); /* * Fill in directory entry sort descriptors. */ while ((char *)ep < block1 + blksize) { if (ep->e2d_ino && ep->e2d_namlen) { entry_cnt++; sort_info--; sort_info->h_size = ep->e2d_reclen; sort_info->h_offset = (char *)ep - block1; ext2_htree_hash(ep->e2d_name, ep->e2d_namlen, hash_seed, hash_version, &sort_info->h_hash, NULL); } ep = (struct ext2fs_direct_2 *) ((char *)ep + ep->e2d_reclen); } /* * Sort directory entry descriptors by name hash value. */ qsort(sort_info, entry_cnt, sizeof(struct ext2fs_htree_sort_entry), ext2_htree_cmp_sort_entry); /* * Count the number of entries to move to directory block 2. */ for (i = entry_cnt - 1; i >= 0; i--) { if (sort_info[i].h_size + size > blksize / 2) break; size += sort_info[i].h_size; } *split_hash = sort_info[i + 1].h_hash; /* * Set collision bit. */ if (*split_hash == sort_info[i].h_hash) *split_hash += 1; /* * Move half of directory entries from block 1 to block 2. */ for (k = i + 1; k < entry_cnt; k++) { ep = (struct ext2fs_direct_2 *)((char *)block1 + sort_info[k].h_offset); entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen); memcpy(dest, ep, entry_len); ((struct ext2fs_direct_2 *)dest)->e2d_reclen = entry_len; /* Mark directory entry as unused. */ ep->e2d_ino = 0; dest += entry_len; } dest -= entry_len; /* Shrink directory entries in block 1. */ last = (struct ext2fs_direct_2 *)block1; entry_len = 0; for (offset = 0; offset < blksize; ) { ep = (struct ext2fs_direct_2 *)(block1 + offset); offset += ep->e2d_reclen; if (ep->e2d_ino) { last = (struct ext2fs_direct_2 *) ((char *)last + entry_len); entry_len = EXT2_DIR_REC_LEN(ep->e2d_namlen); memcpy((void *)last, (void *)ep, entry_len); last->e2d_reclen = entry_len; } } if (entry_hash >= *split_hash) { /* Add entry to block 2. */ ext2_append_entry(block2, blksize, (struct ext2fs_direct_2 *)dest, entry); /* Adjust length field of last entry of block 1. */ last->e2d_reclen = block1 + blksize - (char *)last; } else { /* Add entry to block 1. */ ext2_append_entry(block1, blksize, last, entry); /* Adjust length field of last entry of block 2. */ ((struct ext2fs_direct_2 *)dest)->e2d_reclen = block2 + blksize - dest; } return (0); } /* * Create an HTree index for a directory */ int ext2_htree_create_index(struct vnode *vp, struct componentname *cnp, struct ext2fs_direct_2 *new_entry) { struct buf *bp = NULL; struct inode *dp; struct ext2fs *fs; struct m_ext2fs *m_fs; struct ext2fs_direct_2 *ep, *dotdot; struct ext2fs_htree_root *root; struct ext2fs_htree_lookup_info info; uint32_t blksize, dirlen, split_hash; uint8_t hash_version; char *buf1 = NULL; char *buf2 = NULL; int error = 0; dp = VTOI(vp); fs = dp->i_e2fs->e2fs; m_fs = dp->i_e2fs; blksize = m_fs->e2fs_bsize; buf1 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO); buf2 = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO); if ((error = ext2_blkatoff(vp, 0, NULL, &bp)) != 0) goto out; root = (struct ext2fs_htree_root *)bp->b_data; dotdot = (struct ext2fs_direct_2 *)((char *)&(root->h_dotdot)); ep = (struct ext2fs_direct_2 *)((char *)dotdot + dotdot->e2d_reclen); dirlen = (char *)root + blksize - (char *)ep; memcpy(buf1, ep, dirlen); ep = (struct ext2fs_direct_2 *)buf1; while ((char *)ep < buf1 + dirlen) ep = (struct ext2fs_direct_2 *) ((char *)ep + ep->e2d_reclen); ep->e2d_reclen = buf1 + blksize - (char *)ep; dp->i_flag |= IN_E3INDEX; /* * Initialize index root. */ dotdot->e2d_reclen = blksize - EXT2_DIR_REC_LEN(1); memset(&root->h_info, 0, sizeof(root->h_info)); root->h_info.h_hash_version = fs->e3fs_def_hash_version; root->h_info.h_info_len = sizeof(root->h_info); ext2_htree_set_block(root->h_entries, 1); ext2_htree_set_count(root->h_entries, 1); ext2_htree_set_limit(root->h_entries, ext2_htree_root_limit(dp, sizeof(root->h_info))); memset(&info, 0, sizeof(info)); info.h_levels_num = 1; info.h_levels[0].h_entries = root->h_entries; info.h_levels[0].h_entry = root->h_entries; hash_version = root->h_info.h_hash_version; if (hash_version <= EXT2_HTREE_TEA) hash_version += m_fs->e2fs_uhash; ext2_htree_split_dirblock(buf1, buf2, blksize, fs->e3fs_hash_seed, hash_version, &split_hash, new_entry); ext2_htree_insert_entry(&info, split_hash, 2); /* * Write directory block 0. */ if (DOINGASYNC(vp)) { bdwrite(bp); error = 0; } else { error = bwrite(bp); } dp->i_flag |= IN_CHANGE | IN_UPDATE; if (error) goto out; /* * Write directory block 1. */ error = ext2_htree_append_block(vp, buf1, cnp, blksize); if (error) goto out1; /* * Write directory block 2. */ error = ext2_htree_append_block(vp, buf2, cnp, blksize); free(buf1, M_TEMP); free(buf2, M_TEMP); return (error); out: if (bp != NULL) brelse(bp); out1: free(buf1, M_TEMP); free(buf2, M_TEMP); return (error); } /* * Add an entry to the directory using htree index. */ int ext2_htree_add_entry(struct vnode *dvp, struct ext2fs_direct_2 *entry, struct componentname *cnp) { struct ext2fs_htree_entry *entries, *leaf_node; struct ext2fs_htree_lookup_info info; struct buf *bp = NULL; struct ext2fs *fs; struct m_ext2fs *m_fs; struct inode *ip; uint16_t ent_num; uint32_t dirhash, split_hash; uint32_t blksize, blknum; uint64_t cursize, dirsize; uint8_t hash_version; char *newdirblock = NULL; char *newidxblock = NULL; struct ext2fs_htree_node *dst_node; struct ext2fs_htree_entry *dst_entries; struct ext2fs_htree_entry *root_entires; struct buf *dst_bp = NULL; int error, write_bp = 0, write_dst_bp = 0, write_info = 0; ip = VTOI(dvp); m_fs = ip->i_e2fs; fs = m_fs->e2fs; blksize = m_fs->e2fs_bsize; if (ip->i_count != 0) return ext2_add_entry(dvp, entry); /* Target directory block is full, split it */ memset(&info, 0, sizeof(info)); error = ext2_htree_find_leaf(ip, entry->e2d_name, entry->e2d_namlen, &dirhash, &hash_version, &info); if (error) return (error); entries = info.h_levels[info.h_levels_num - 1].h_entries; ent_num = ext2_htree_get_count(entries); if (ent_num == ext2_htree_get_limit(entries)) { /* Split the index node. */ root_entires = info.h_levels[0].h_entries; newidxblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO); dst_node = (struct ext2fs_htree_node *)newidxblock; memset(&dst_node->h_fake_dirent, 0, sizeof(dst_node->h_fake_dirent)); dst_node->h_fake_dirent.e2d_reclen = blksize; cursize = roundup(ip->i_size, blksize); dirsize = cursize + blksize; blknum = dirsize / blksize - 1; error = ext2_htree_append_block(dvp, newidxblock, cnp, blksize); if (error) goto finish; error = ext2_blkatoff(dvp, cursize, NULL, &dst_bp); if (error) goto finish; dst_node = (struct ext2fs_htree_node *)dst_bp->b_data; dst_entries = dst_node->h_entries; if (info.h_levels_num == 2) { uint16_t src_ent_num, dst_ent_num; if (ext2_htree_get_count(root_entires) == ext2_htree_get_limit(root_entires)) { /* Directory index is full */ error = EIO; goto finish; } src_ent_num = ent_num / 2; dst_ent_num = ent_num - src_ent_num; split_hash = ext2_htree_get_hash(entries + src_ent_num); /* Move half of index entries to the new index node */ memcpy(dst_entries, entries + src_ent_num, dst_ent_num * sizeof(struct ext2fs_htree_entry)); ext2_htree_set_count(entries, src_ent_num); ext2_htree_set_count(dst_entries, dst_ent_num); ext2_htree_set_limit(dst_entries, ext2_htree_node_limit(ip)); if (info.h_levels[1].h_entry >= entries + src_ent_num) { struct buf *tmp = info.h_levels[1].h_bp; info.h_levels[1].h_bp = dst_bp; dst_bp = tmp; info.h_levels[1].h_entry = info.h_levels[1].h_entry - (entries + src_ent_num) + dst_entries; info.h_levels[1].h_entries = dst_entries; } ext2_htree_insert_entry_to_level(&info.h_levels[0], split_hash, blknum); /* Write new index node to disk */ error = bwrite(dst_bp); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (error) goto finish; write_dst_bp = 1; } else { /* Create second level for htree index */ struct ext2fs_htree_root *idx_root; memcpy(dst_entries, entries, ent_num * sizeof(struct ext2fs_htree_entry)); ext2_htree_set_limit(dst_entries, ext2_htree_node_limit(ip)); idx_root = (struct ext2fs_htree_root *) info.h_levels[0].h_bp->b_data; idx_root->h_info.h_ind_levels = 1; ext2_htree_set_count(entries, 1); ext2_htree_set_block(entries, blknum); info.h_levels_num = 2; info.h_levels[1].h_entries = dst_entries; info.h_levels[1].h_entry = info.h_levels[0].h_entry - info.h_levels[0].h_entries + dst_entries; info.h_levels[1].h_bp = dst_bp; dst_bp = NULL; } } leaf_node = info.h_levels[info.h_levels_num - 1].h_entry; blknum = ext2_htree_get_block(leaf_node); error = ext2_blkatoff(dvp, blknum * blksize, NULL, &bp); if (error) goto finish; /* Split target directory block */ newdirblock = malloc(blksize, M_TEMP, M_WAITOK | M_ZERO); ext2_htree_split_dirblock((char *)bp->b_data, newdirblock, blksize, fs->e3fs_hash_seed, hash_version, &split_hash, entry); cursize = roundup(ip->i_size, blksize); dirsize = cursize + blksize; blknum = dirsize / blksize - 1; /* Add index entry for the new directory block */ ext2_htree_insert_entry(&info, split_hash, blknum); /* Write the new directory block to the end of the directory */ error = ext2_htree_append_block(dvp, newdirblock, cnp, blksize); if (error) goto finish; /* Write the target directory block */ error = bwrite(bp); ip->i_flag |= IN_CHANGE | IN_UPDATE; if (error) goto finish; write_bp = 1; /* Write the index block */ error = ext2_htree_writebuf(&info); if (!error) write_info = 1; finish: if (dst_bp != NULL && !write_dst_bp) brelse(dst_bp); if (bp != NULL && !write_bp) brelse(bp); if (newdirblock != NULL) free(newdirblock, M_TEMP); if (newidxblock != NULL) free(newidxblock, M_TEMP); if (!write_info) ext2_htree_release(&info); return (error); } Index: head/sys/fs/ext2fs/ext2fs.h =================================================================== --- head/sys/fs/ext2fs/ext2fs.h (revision 326267) +++ head/sys/fs/ext2fs/ext2fs.h (revision 326268) @@ -1,407 +1,409 @@ /*- * modified for EXT2FS support in Lites 1.1 * * Aug 1995, Godmar Back (gback@cs.utah.edu) * University of Utah, Department of Computer Science * * $FreeBSD$ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Aditya Sarawgi * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * */ #ifndef _FS_EXT2FS_EXT2FS_H_ #define _FS_EXT2FS_EXT2FS_H_ #include /* * Super block for an ext2fs file system. */ struct ext2fs { uint32_t e2fs_icount; /* Inode count */ uint32_t e2fs_bcount; /* blocks count */ uint32_t e2fs_rbcount; /* reserved blocks count */ uint32_t e2fs_fbcount; /* free blocks count */ uint32_t e2fs_ficount; /* free inodes count */ uint32_t e2fs_first_dblock; /* first data block */ uint32_t e2fs_log_bsize; /* block size = 1024*(2^e2fs_log_bsize) */ uint32_t e2fs_log_fsize; /* fragment size */ uint32_t e2fs_bpg; /* blocks per group */ uint32_t e2fs_fpg; /* frags per group */ uint32_t e2fs_ipg; /* inodes per group */ uint32_t e2fs_mtime; /* mount time */ uint32_t e2fs_wtime; /* write time */ uint16_t e2fs_mnt_count; /* mount count */ uint16_t e2fs_max_mnt_count; /* max mount count */ uint16_t e2fs_magic; /* magic number */ uint16_t e2fs_state; /* file system state */ uint16_t e2fs_beh; /* behavior on errors */ uint16_t e2fs_minrev; /* minor revision level */ uint32_t e2fs_lastfsck; /* time of last fsck */ uint32_t e2fs_fsckintv; /* max time between fscks */ uint32_t e2fs_creator; /* creator OS */ uint32_t e2fs_rev; /* revision level */ uint16_t e2fs_ruid; /* default uid for reserved blocks */ uint16_t e2fs_rgid; /* default gid for reserved blocks */ /* EXT2_DYNAMIC_REV superblocks */ uint32_t e2fs_first_ino; /* first non-reserved inode */ uint16_t e2fs_inode_size; /* size of inode structure */ uint16_t e2fs_block_group_nr; /* block grp number of this sblk*/ uint32_t e2fs_features_compat; /* compatible feature set */ uint32_t e2fs_features_incompat; /* incompatible feature set */ uint32_t e2fs_features_rocompat; /* RO-compatible feature set */ uint8_t e2fs_uuid[16]; /* 128-bit uuid for volume */ char e2fs_vname[16]; /* volume name */ char e2fs_fsmnt[64]; /* name mounted on */ uint32_t e2fs_algo; /* For compression */ uint8_t e2fs_prealloc; /* # of blocks for old prealloc */ uint8_t e2fs_dir_prealloc; /* # of blocks for old prealloc dirs */ uint16_t e2fs_reserved_ngdb; /* # of reserved gd blocks for resize */ char e3fs_journal_uuid[16]; /* uuid of journal superblock */ uint32_t e3fs_journal_inum; /* inode number of journal file */ uint32_t e3fs_journal_dev; /* device number of journal file */ uint32_t e3fs_last_orphan; /* start of list of inodes to delete */ uint32_t e3fs_hash_seed[4]; /* HTREE hash seed */ char e3fs_def_hash_version;/* Default hash version to use */ char e3fs_jnl_backup_type; uint16_t e3fs_desc_size; /* size of group descriptor */ uint32_t e3fs_default_mount_opts; uint32_t e3fs_first_meta_bg; /* First metablock block group */ uint32_t e3fs_mkfs_time; /* when the fs was created */ uint32_t e3fs_jnl_blks[17]; /* backup of the journal inode */ uint32_t e4fs_bcount_hi; /* high bits of blocks count */ uint32_t e4fs_rbcount_hi; /* high bits of reserved blocks count */ uint32_t e4fs_fbcount_hi; /* high bits of free blocks count */ uint16_t e4fs_min_extra_isize; /* all inodes have some bytes */ uint16_t e4fs_want_extra_isize;/* inodes must reserve some bytes */ uint32_t e4fs_flags; /* miscellaneous flags */ uint16_t e4fs_raid_stride; /* RAID stride */ uint16_t e4fs_mmpintv; /* seconds to wait in MMP checking */ uint64_t e4fs_mmpblk; /* block for multi-mount protection */ uint32_t e4fs_raid_stripe_wid; /* blocks on data disks (N * stride) */ uint8_t e4fs_log_gpf; /* FLEX_BG group size */ uint8_t e4fs_chksum_type; /* metadata checksum algorithm used */ uint8_t e4fs_encrypt; /* versioning level for encryption */ uint8_t e4fs_reserved_pad; uint64_t e4fs_kbytes_written; /* number of lifetime kilobytes */ uint32_t e4fs_snapinum; /* inode number of active snapshot */ uint32_t e4fs_snapid; /* sequential ID of active snapshot */ uint64_t e4fs_snaprbcount; /* reserved blocks for active snapshot */ uint32_t e4fs_snaplist; /* inode number for on-disk snapshot */ uint32_t e4fs_errcount; /* number of file system errors */ uint32_t e4fs_first_errtime; /* first time an error happened */ uint32_t e4fs_first_errino; /* inode involved in first error */ uint64_t e4fs_first_errblk; /* block involved of first error */ uint8_t e4fs_first_errfunc[32];/* function where error happened */ uint32_t e4fs_first_errline; /* line number where error happened */ uint32_t e4fs_last_errtime; /* most recent time of an error */ uint32_t e4fs_last_errino; /* inode involved in last error */ uint32_t e4fs_last_errline; /* line number where error happened */ uint64_t e4fs_last_errblk; /* block involved of last error */ uint8_t e4fs_last_errfunc[32]; /* function where error happened */ uint8_t e4fs_mount_opts[64]; uint32_t e4fs_usrquota_inum; /* inode for tracking user quota */ uint32_t e4fs_grpquota_inum; /* inode for tracking group quota */ uint32_t e4fs_overhead_clusters;/* overhead blocks/clusters */ uint32_t e4fs_backup_bgs[2]; /* groups with sparse_super2 SBs */ uint8_t e4fs_encrypt_algos[4];/* encryption algorithms in use */ uint8_t e4fs_encrypt_pw_salt[16];/* salt used for string2key */ uint32_t e4fs_lpf_ino; /* location of the lost+found inode */ uint32_t e4fs_proj_quota_inum; /* inode for tracking project quota */ uint32_t e4fs_chksum_seed; /* checksum seed */ uint32_t e4fs_reserved[98]; /* padding to the end of the block */ uint32_t e4fs_sbchksum; /* superblock checksum */ }; /* * The path name on which the file system is mounted is maintained * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in * the super block for this name. */ #define MAXMNTLEN 512 /* * In-Memory Superblock */ struct m_ext2fs { struct ext2fs * e2fs; char e2fs_fsmnt[MAXMNTLEN];/* name mounted on */ char e2fs_ronly; /* mounted read-only flag */ char e2fs_fmod; /* super block modified flag */ uint32_t e2fs_bsize; /* Block size */ uint32_t e2fs_bshift; /* calc of logical block no */ uint32_t e2fs_bpg; /* Number of blocks per group */ int64_t e2fs_qbmask; /* = s_blocksize -1 */ uint32_t e2fs_fsbtodb; /* Shift to get disk block */ uint32_t e2fs_ipg; /* Number of inodes per group */ uint32_t e2fs_ipb; /* Number of inodes per block */ uint32_t e2fs_itpg; /* Number of inode table per group */ uint32_t e2fs_fsize; /* Size of fragments per block */ uint32_t e2fs_fpb; /* Number of fragments per block */ uint32_t e2fs_fpg; /* Number of fragments per group */ uint32_t e2fs_gdbcount; /* Number of group descriptors */ uint32_t e2fs_gcount; /* Number of groups */ uint32_t e2fs_isize; /* Size of inode */ uint32_t e2fs_total_dir; /* Total number of directories */ uint8_t *e2fs_contigdirs; /* (u) # of contig. allocated dirs */ char e2fs_wasvalid; /* valid at mount time */ off_t e2fs_maxfilesize; struct ext2_gd *e2fs_gd; /* Group Descriptors */ int32_t e2fs_contigsumsize; /* size of cluster summary array */ int32_t *e2fs_maxcluster; /* max cluster in each cyl group */ struct csum *e2fs_clustersum; /* cluster summary in each cyl group */ int32_t e2fs_uhash; /* 3 if hash should be signed, 0 if not */ }; /* cluster summary information */ struct csum { int8_t cs_init; /* cluster summary has been initialized */ int32_t *cs_sum; /* cluster summary array */ }; /* * The second extended file system magic number */ #define E2FS_MAGIC 0xEF53 /* * Revision levels */ #define E2FS_REV0 0 /* The good old (original) format */ #define E2FS_REV1 1 /* V2 format w/ dynamic inode sizes */ #define E2FS_REV0_INODE_SIZE 128 /* * compatible/incompatible features */ #define EXT2F_COMPAT_PREALLOC 0x0001 #define EXT2F_COMPAT_IMAGIC_INODES 0x0002 #define EXT2F_COMPAT_HASJOURNAL 0x0004 #define EXT2F_COMPAT_EXT_ATTR 0x0008 #define EXT2F_COMPAT_RESIZE 0x0010 #define EXT2F_COMPAT_DIRHASHINDEX 0x0020 #define EXT2F_COMPAT_LAZY_BG 0x0040 #define EXT2F_COMPAT_EXCLUDE_BITMAP 0x0100 #define EXT2F_COMPAT_SPARSESUPER2 0x0200 #define EXT2F_ROCOMPAT_SPARSESUPER 0x0001 #define EXT2F_ROCOMPAT_LARGEFILE 0x0002 #define EXT2F_ROCOMPAT_BTREE_DIR 0x0004 #define EXT2F_ROCOMPAT_HUGE_FILE 0x0008 #define EXT2F_ROCOMPAT_GDT_CSUM 0x0010 #define EXT2F_ROCOMPAT_DIR_NLINK 0x0020 #define EXT2F_ROCOMPAT_EXTRA_ISIZE 0x0040 #define EXT2F_ROCOMPAT_HAS_SNAPSHOT 0x0080 #define EXT2F_ROCOMPAT_QUOTA 0x0100 #define EXT2F_ROCOMPAT_BIGALLOC 0x0200 #define EXT2F_ROCOMPAT_METADATA_CKSUM 0x0400 #define EXT2F_ROCOMPAT_REPLICA 0x0800 #define EXT2F_ROCOMPAT_READONLY 0x1000 #define EXT2F_ROCOMPAT_PROJECT 0x2000 #define EXT2F_INCOMPAT_COMP 0x0001 #define EXT2F_INCOMPAT_FTYPE 0x0002 #define EXT2F_INCOMPAT_RECOVER 0x0004 #define EXT2F_INCOMPAT_JOURNAL_DEV 0x0008 #define EXT2F_INCOMPAT_META_BG 0x0010 #define EXT2F_INCOMPAT_EXTENTS 0x0040 #define EXT2F_INCOMPAT_64BIT 0x0080 #define EXT2F_INCOMPAT_MMP 0x0100 #define EXT2F_INCOMPAT_FLEX_BG 0x0200 #define EXT2F_INCOMPAT_EA_INODE 0x0400 #define EXT2F_INCOMPAT_DIRDATA 0x1000 #define EXT2F_INCOMPAT_CSUM_SEED 0x2000 #define EXT2F_INCOMPAT_LARGEDIR 0x4000 #define EXT2F_INCOMPAT_INLINE_DATA 0x8000 #define EXT2F_INCOMPAT_ENCRYPT 0x10000 struct ext2_feature { int mask; const char *name; }; static const struct ext2_feature compat[] = { { EXT2F_COMPAT_PREALLOC, "dir_prealloc" }, { EXT2F_COMPAT_IMAGIC_INODES, "imagic_inodes" }, { EXT2F_COMPAT_HASJOURNAL, "has_journal" }, { EXT2F_COMPAT_EXT_ATTR, "ext_attr" }, { EXT2F_COMPAT_RESIZE, "resize_inode" }, { EXT2F_COMPAT_DIRHASHINDEX, "dir_index" }, { EXT2F_COMPAT_EXCLUDE_BITMAP, "snapshot_bitmap" }, { EXT2F_COMPAT_SPARSESUPER2, "sparse_super2" } }; static const struct ext2_feature ro_compat[] = { { EXT2F_ROCOMPAT_SPARSESUPER, "sparse_super" }, { EXT2F_ROCOMPAT_LARGEFILE, "large_file" }, { EXT2F_ROCOMPAT_BTREE_DIR, "btree_dir" }, { EXT2F_ROCOMPAT_HUGE_FILE, "huge_file" }, { EXT2F_ROCOMPAT_GDT_CSUM, "uninit_groups" }, { EXT2F_ROCOMPAT_DIR_NLINK, "dir_nlink" }, { EXT2F_ROCOMPAT_EXTRA_ISIZE, "extra_isize" }, { EXT2F_ROCOMPAT_HAS_SNAPSHOT, "snapshot" }, { EXT2F_ROCOMPAT_QUOTA, "quota" }, { EXT2F_ROCOMPAT_BIGALLOC, "bigalloc" }, { EXT2F_ROCOMPAT_METADATA_CKSUM, "metadata_csum" }, { EXT2F_ROCOMPAT_REPLICA, "replica" }, { EXT2F_ROCOMPAT_READONLY, "ro" }, { EXT2F_ROCOMPAT_PROJECT, "project" } }; static const struct ext2_feature incompat[] = { { EXT2F_INCOMPAT_COMP, "compression" }, { EXT2F_INCOMPAT_FTYPE, "filetype" }, { EXT2F_INCOMPAT_RECOVER, "needs_recovery" }, { EXT2F_INCOMPAT_JOURNAL_DEV, "journal_dev" }, { EXT2F_INCOMPAT_META_BG, "meta_bg" }, { EXT2F_INCOMPAT_EXTENTS, "extents" }, { EXT2F_INCOMPAT_64BIT, "64bit" }, { EXT2F_INCOMPAT_MMP, "mmp" }, { EXT2F_INCOMPAT_FLEX_BG, "flex_bg" }, { EXT2F_INCOMPAT_EA_INODE, "ea_inode" }, { EXT2F_INCOMPAT_DIRDATA, "dirdata" }, { EXT2F_INCOMPAT_CSUM_SEED, "metadata_csum_seed" }, { EXT2F_INCOMPAT_LARGEDIR, "large_dir" }, { EXT2F_INCOMPAT_INLINE_DATA, "inline_data" }, { EXT2F_INCOMPAT_ENCRYPT, "encrypt" } }; /* * Features supported in this implementation * * We support the following REV1 features: * - EXT2F_ROCOMPAT_SPARSESUPER * - EXT2F_ROCOMPAT_LARGEFILE * - EXT2F_ROCOMPAT_EXTRA_ISIZE * - EXT2F_INCOMPAT_FTYPE * * We partially (read-only) support the following EXT4 features: * - EXT2F_ROCOMPAT_HUGE_FILE * - EXT2F_INCOMPAT_EXTENTS * * We do not support these EXT4 features but they are irrelevant * for read-only support: * - EXT2F_INCOMPAT_RECOVER * - EXT2F_INCOMPAT_FLEX_BG * - EXT2F_INCOMPAT_META_BG */ #define EXT2F_COMPAT_SUPP EXT2F_COMPAT_DIRHASHINDEX #define EXT2F_ROCOMPAT_SUPP (EXT2F_ROCOMPAT_SPARSESUPER | \ EXT2F_ROCOMPAT_LARGEFILE | \ EXT2F_ROCOMPAT_GDT_CSUM | \ EXT2F_ROCOMPAT_DIR_NLINK | \ EXT2F_ROCOMPAT_HUGE_FILE | \ EXT2F_ROCOMPAT_EXTRA_ISIZE) #define EXT2F_INCOMPAT_SUPP EXT2F_INCOMPAT_FTYPE #define EXT4F_RO_INCOMPAT_SUPP (EXT2F_INCOMPAT_EXTENTS | \ EXT2F_INCOMPAT_RECOVER | \ EXT2F_INCOMPAT_FLEX_BG | \ EXT2F_INCOMPAT_META_BG ) /* Assume that user mode programs are passing in an ext2fs superblock, not * a kernel struct super_block. This will allow us to call the feature-test * macros from user land. */ #define EXT2_SB(sb) (sb) /* * Feature set definitions */ #define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ ( EXT2_SB(sb)->e2fs->e2fs_features_compat & htole32(mask) ) #define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \ ( EXT2_SB(sb)->e2fs->e2fs_features_rocompat & htole32(mask) ) #define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \ ( EXT2_SB(sb)->e2fs->e2fs_features_incompat & htole32(mask) ) /* * File clean flags */ #define E2FS_ISCLEAN 0x0001 /* Unmounted cleanly */ #define E2FS_ERRORS 0x0002 /* Errors detected */ /* * Filesystem miscellaneous flags */ #define E2FS_SIGNED_HASH 0x0001 #define E2FS_UNSIGNED_HASH 0x0002 #define EXT2_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ #define EXT2_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ #define EXT2_BG_INODE_ZEROED 0x0004 /* On-disk itable initialized to zero */ /* ext2 file system block group descriptor */ struct ext2_gd { uint32_t ext2bgd_b_bitmap; /* blocks bitmap block */ uint32_t ext2bgd_i_bitmap; /* inodes bitmap block */ uint32_t ext2bgd_i_tables; /* inodes table block */ uint16_t ext2bgd_nbfree; /* number of free blocks */ uint16_t ext2bgd_nifree; /* number of free inodes */ uint16_t ext2bgd_ndirs; /* number of directories */ uint16_t ext4bgd_flags; /* block group flags */ uint32_t ext4bgd_x_bitmap; /* snapshot exclusion bitmap loc. */ uint16_t ext4bgd_b_bmap_csum; /* block bitmap checksum */ uint16_t ext4bgd_i_bmap_csum; /* inode bitmap checksum */ uint16_t ext4bgd_i_unused; /* unused inode count */ uint16_t ext4bgd_csum; /* group descriptor checksum */ }; /* EXT2FS metadata is stored in little-endian byte order. These macros * help reading it. */ #define e2fs_cgload(old, new, size) memcpy((new), (old), (size)); #define e2fs_cgsave(old, new, size) memcpy((new), (old), (size)); /* * Macro-instructions used to manage several block sizes */ #define EXT2_MIN_BLOCK_LOG_SIZE 10 #define EXT2_BLOCK_SIZE(s) ((s)->e2fs_bsize) #define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof(uint32_t)) #define EXT2_INODE_SIZE(s) (EXT2_SB(s)->e2fs_isize) /* * Macro-instructions used to manage fragments */ #define EXT2_MIN_FRAG_SIZE 1024 #define EXT2_MAX_FRAG_SIZE 4096 #define EXT2_MIN_FRAG_LOG_SIZE 10 #define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->e2fs_fsize) #define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->e2fs_fpb) /* * Macro-instructions used to manage group descriptors */ #define EXT2_BLOCKS_PER_GROUP(s) (EXT2_SB(s)->e2fs_bpg) #endif /* !_FS_EXT2FS_EXT2FS_H_ */ Index: head/sys/fs/ext2fs/htree.h =================================================================== --- head/sys/fs/ext2fs/htree.h (revision 326267) +++ head/sys/fs/ext2fs/htree.h (revision 326268) @@ -1,100 +1,102 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010, 2012 Zheng Liu * Copyright (c) 2012, Vyacheslav Matyushin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_EXT2FS_HTREE_H_ #define _FS_EXT2FS_HTREE_H_ /* EXT3 HTree directory indexing */ #define EXT2_HTREE_LEGACY 0 #define EXT2_HTREE_HALF_MD4 1 #define EXT2_HTREE_TEA 2 #define EXT2_HTREE_LEGACY_UNSIGNED 3 #define EXT2_HTREE_HALF_MD4_UNSIGNED 4 #define EXT2_HTREE_TEA_UNSIGNED 5 #define EXT2_HTREE_EOF 0x7FFFFFFF struct ext2fs_fake_direct { uint32_t e2d_ino; /* inode number of entry */ uint16_t e2d_reclen; /* length of this record */ uint8_t e2d_namlen; /* length of string in d_name */ uint8_t e2d_type; /* file type */ }; struct ext2fs_htree_count { uint16_t h_entries_max; uint16_t h_entries_num; }; struct ext2fs_htree_entry { uint32_t h_hash; uint32_t h_blk; }; struct ext2fs_htree_root_info { uint32_t h_reserved1; uint8_t h_hash_version; uint8_t h_info_len; uint8_t h_ind_levels; uint8_t h_reserved2; }; struct ext2fs_htree_root { struct ext2fs_fake_direct h_dot; char h_dot_name[4]; struct ext2fs_fake_direct h_dotdot; char h_dotdot_name[4]; struct ext2fs_htree_root_info h_info; struct ext2fs_htree_entry h_entries[0]; }; struct ext2fs_htree_node { struct ext2fs_fake_direct h_fake_dirent; struct ext2fs_htree_entry h_entries[0]; }; struct ext2fs_htree_lookup_level { struct buf *h_bp; struct ext2fs_htree_entry *h_entries; struct ext2fs_htree_entry *h_entry; }; struct ext2fs_htree_lookup_info { struct ext2fs_htree_lookup_level h_levels[2]; uint32_t h_levels_num; }; struct ext2fs_htree_sort_entry { uint16_t h_offset; uint16_t h_size; uint32_t h_hash; }; #endif /* !_FS_EXT2FS_HTREE_H_ */ Index: head/sys/fs/msdosfs/denode.h =================================================================== --- head/sys/fs/msdosfs/denode.h (revision 326267) +++ head/sys/fs/msdosfs/denode.h (revision 326268) @@ -1,284 +1,286 @@ /* $FreeBSD$ */ /* $NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #ifndef _FS_MSDOSFS_DENODE_H_ #define _FS_MSDOSFS_DENODE_H_ /* * This is the pc filesystem specific portion of the vnode structure. * * To describe a file uniquely the de_dirclust, de_diroffset, and * de_StartCluster fields are used. * * de_dirclust contains the cluster number of the directory cluster * containing the entry for a file or directory. * de_diroffset is the index into the cluster for the entry describing * a file or directory. * de_StartCluster is the number of the first cluster of the file or directory. * * Now to describe the quirks of the pc filesystem. * - Clusters 0 and 1 are reserved. * - The first allocatable cluster is 2. * - The root directory is of fixed size and all blocks that make it up * are contiguous. * - Cluster 0 refers to the root directory when it is found in the * startcluster field of a directory entry that points to another directory. * - Cluster 0 implies a 0 length file when found in the start cluster field * of a directory entry that points to a file. * - You can't use the cluster number 0 to derive the address of the root * directory. * - Multiple directory entries can point to a directory. The entry in the * parent directory points to a child directory. Any directories in the * child directory contain a ".." entry that points back to the parent. * The child directory itself contains a "." entry that points to itself. * - The root directory does not contain a "." or ".." entry. * - Directory entries for directories are never changed once they are created * (except when removed). The size stays 0, and the last modification time * is never changed. This is because so many directory entries can point to * the physical clusters that make up a directory. It would lead to an * update nightmare. * - The length field in a directory entry pointing to a directory contains 0 * (always). The only way to find the end of a directory is to follow the * cluster chain until the "last cluster" marker is found. * * My extensions to make this house of cards work. These apply only to the in * memory copy of the directory entry. * - A reference count for each denode will be kept since dos doesn't keep such * things. */ /* * Internal pseudo-offset for (nonexistent) directory entry for the root * dir in the root dir */ #define MSDOSFSROOT_OFS 0x1fffffff /* * The FAT cache structure. fc_fsrcn is the filesystem relative cluster * number that corresponds to the file relative cluster number in this * structure (fc_frcn). */ struct fatcache { u_long fc_frcn; /* file relative cluster number */ u_long fc_fsrcn; /* filesystem relative cluster number */ }; /* * The FAT entry cache as it stands helps make extending files a "quick" * operation by avoiding having to scan the FAT to discover the last * cluster of the file. The cache also helps sequential reads by * remembering the last cluster read from the file. This also prevents us * from having to rescan the FAT to find the next cluster to read. This * cache is probably pretty worthless if a file is opened by multiple * processes. */ #define FC_SIZE 3 /* number of entries in the cache */ #define FC_LASTMAP 0 /* entry the last call to pcbmap() resolved * to */ #define FC_LASTFC 1 /* entry for the last cluster in the file */ #define FC_NEXTTOLASTFC 2 /* entry for a close to the last cluster in * the file */ #define FCE_EMPTY 0xffffffff /* doesn't represent an actual cluster # */ /* * Set a slot in the FAT cache. */ #define fc_setcache(dep, slot, frcn, fsrcn) \ (dep)->de_fc[(slot)].fc_frcn = (frcn); \ (dep)->de_fc[(slot)].fc_fsrcn = (fsrcn); /* * This is the in memory variant of a dos directory entry. It is usually * contained within a vnode. */ struct denode { struct vnode *de_vnode; /* addr of vnode we are part of */ u_long de_flag; /* flag bits */ u_long de_dirclust; /* cluster of the directory file containing this entry */ u_long de_diroffset; /* offset of this entry in the directory cluster */ u_long de_fndoffset; /* offset of found dir entry */ int de_fndcnt; /* number of slots before de_fndoffset */ long de_refcnt; /* reference count */ struct msdosfsmount *de_pmp; /* addr of our mount struct */ u_char de_Name[12]; /* name, from DOS directory entry */ u_char de_Attributes; /* attributes, from directory entry */ u_char de_LowerCase; /* NT VFAT lower case flags */ u_char de_CHun; /* Hundredth of second of CTime*/ u_short de_CTime; /* creation time */ u_short de_CDate; /* creation date */ u_short de_ADate; /* access date */ u_short de_MTime; /* modification time */ u_short de_MDate; /* modification date */ u_long de_StartCluster; /* starting cluster of file */ u_long de_FileSize; /* size of file in bytes */ struct fatcache de_fc[FC_SIZE]; /* FAT cache */ u_quad_t de_modrev; /* Revision level for lease. */ uint64_t de_inode; /* Inode number (really byte offset of direntry) */ }; /* * Values for the de_flag field of the denode. */ #define DE_UPDATE 0x0004 /* Modification time update request */ #define DE_CREATE 0x0008 /* Creation time update */ #define DE_ACCESS 0x0010 /* Access time update */ #define DE_MODIFIED 0x0020 /* Denode has been modified */ #define DE_RENAME 0x0040 /* Denode is in the process of being renamed */ /* * Transfer directory entries between internal and external form. * dep is a struct denode * (internal form), * dp is a struct direntry * (external form). */ #define DE_INTERNALIZE32(dep, dp) \ ((dep)->de_StartCluster |= getushort((dp)->deHighClust) << 16) #define DE_INTERNALIZE(dep, dp) \ (memcpy((dep)->de_Name, (dp)->deName, 11), \ (dep)->de_Attributes = (dp)->deAttributes, \ (dep)->de_LowerCase = (dp)->deLowerCase, \ (dep)->de_CHun = (dp)->deCHundredth, \ (dep)->de_CTime = getushort((dp)->deCTime), \ (dep)->de_CDate = getushort((dp)->deCDate), \ (dep)->de_ADate = getushort((dp)->deADate), \ (dep)->de_MTime = getushort((dp)->deMTime), \ (dep)->de_MDate = getushort((dp)->deMDate), \ (dep)->de_StartCluster = getushort((dp)->deStartCluster), \ (dep)->de_FileSize = getulong((dp)->deFileSize), \ (FAT32((dep)->de_pmp) ? DE_INTERNALIZE32((dep), (dp)) : 0)) #define DE_EXTERNALIZE(dp, dep) \ (memcpy((dp)->deName, (dep)->de_Name, 11), \ (dp)->deAttributes = (dep)->de_Attributes, \ (dp)->deLowerCase = (dep)->de_LowerCase, \ (dp)->deCHundredth = (dep)->de_CHun, \ putushort((dp)->deCTime, (dep)->de_CTime), \ putushort((dp)->deCDate, (dep)->de_CDate), \ putushort((dp)->deADate, (dep)->de_ADate), \ putushort((dp)->deMTime, (dep)->de_MTime), \ putushort((dp)->deMDate, (dep)->de_MDate), \ putushort((dp)->deStartCluster, (dep)->de_StartCluster), \ putulong((dp)->deFileSize, \ ((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \ putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16)) #if defined(_KERNEL) || defined(MAKEFS) #define VTODE(vp) ((struct denode *)(vp)->v_data) #define DETOV(de) ((de)->de_vnode) #define DETIMES(dep, acc, mod, cre) do { \ if ((dep)->de_flag & DE_UPDATE) { \ (dep)->de_flag |= DE_MODIFIED; \ timespec2fattime((mod), 0, &(dep)->de_MDate, \ &(dep)->de_MTime, NULL); \ (dep)->de_Attributes |= ATTR_ARCHIVE; \ } \ if ((dep)->de_pmp->pm_flags & MSDOSFSMNT_NOWIN95) { \ (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \ break; \ } \ if ((dep)->de_flag & DE_ACCESS) { \ uint16_t adate; \ \ timespec2fattime((acc), 0, &adate, NULL, NULL); \ if (adate != (dep)->de_ADate) { \ (dep)->de_flag |= DE_MODIFIED; \ (dep)->de_ADate = adate; \ } \ } \ if ((dep)->de_flag & DE_CREATE) { \ timespec2fattime((cre), 0, &(dep)->de_CDate, \ &(dep)->de_CTime, &(dep)->de_CHun); \ (dep)->de_flag |= DE_MODIFIED; \ } \ (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \ } while (0) /* * This overlays the fid structure (see mount.h) */ struct defid { u_short defid_len; /* length of structure */ u_short defid_pad; /* force long alignment */ uint32_t defid_dirclust; /* cluster this dir entry came from */ uint32_t defid_dirofs; /* offset of entry within the cluster */ #if 0 uint32_t defid_gen; /* generation number */ #endif }; extern struct vop_vector msdosfs_vnodeops; int msdosfs_lookup(struct vop_cachedlookup_args *); int msdosfs_inactive(struct vop_inactive_args *); int msdosfs_reclaim(struct vop_reclaim_args *); /* * Internal service routine prototypes. */ int deget(struct msdosfsmount *, u_long, u_long, struct denode **); int uniqdosname(struct denode *, struct componentname *, u_char *); int readep(struct msdosfsmount *pmp, u_long dirclu, u_long dirofs, struct buf **bpp, struct direntry **epp); int readde(struct denode *dep, struct buf **bpp, struct direntry **epp); int deextend(struct denode *dep, u_long length, struct ucred *cred); int fillinusemap(struct msdosfsmount *pmp); void reinsert(struct denode *dep); int dosdirempty(struct denode *dep); int createde(struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp); int deupdat(struct denode *dep, int waitfor); int removede(struct denode *pdep, struct denode *dep); int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred); int doscheckpath( struct denode *source, struct denode *target); #endif /* _KERNEL || MAKEFS */ #endif /* !_FS_MSDOSFS_DENODE_H_ */ Index: head/sys/fs/msdosfs/direntry.h =================================================================== --- head/sys/fs/msdosfs/direntry.h (revision 326267) +++ head/sys/fs/msdosfs/direntry.h (revision 326268) @@ -1,163 +1,165 @@ /* $FreeBSD$ */ /* $NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #ifndef _FS_MSDOSFS_DIRENTRY_H_ #define _FS_MSDOSFS_DIRENTRY_H_ /* * Structure of a dos directory entry. */ struct direntry { uint8_t deName[11]; /* filename, blank filled */ #define SLOT_EMPTY 0x00 /* slot has never been used */ #define SLOT_E5 0x05 /* the real value is 0xe5 */ #define SLOT_DELETED 0xe5 /* file in this slot deleted */ uint8_t deAttributes; /* file attributes */ #define ATTR_NORMAL 0x00 /* normal file */ #define ATTR_READONLY 0x01 /* file is readonly */ #define ATTR_HIDDEN 0x02 /* file is hidden */ #define ATTR_SYSTEM 0x04 /* file is a system file */ #define ATTR_VOLUME 0x08 /* entry is a volume label */ #define ATTR_DIRECTORY 0x10 /* entry is a directory name */ #define ATTR_ARCHIVE 0x20 /* file is new or modified */ uint8_t deLowerCase; /* NT VFAT lower case flags */ #define LCASE_BASE 0x08 /* filename base in lower case */ #define LCASE_EXT 0x10 /* filename extension in lower case */ uint8_t deCHundredth; /* hundredth of seconds in CTime */ uint8_t deCTime[2]; /* create time */ uint8_t deCDate[2]; /* create date */ uint8_t deADate[2]; /* access date */ uint8_t deHighClust[2]; /* high bytes of cluster number */ uint8_t deMTime[2]; /* last update time */ uint8_t deMDate[2]; /* last update date */ uint8_t deStartCluster[2]; /* starting cluster of file */ uint8_t deFileSize[4]; /* size of file in bytes */ }; /* * Structure of a Win95 long name directory entry */ struct winentry { uint8_t weCnt; #define WIN_LAST 0x40 #define WIN_CNT 0x3f uint8_t wePart1[10]; uint8_t weAttributes; #define ATTR_WIN95 0x0f uint8_t weReserved1; uint8_t weChksum; uint8_t wePart2[12]; uint16_t weReserved2; uint8_t wePart3[4]; }; #define WIN_CHARS 13 /* Number of chars per winentry */ /* * Maximum number of winentries for a filename. */ #define WIN_MAXSUBENTRIES 20 /* * Maximum filename length in Win95 * Note: Must be < sizeof(dirent.d_name) */ #define WIN_MAXLEN 255 /* * This is the format of the contents of the deTime field in the direntry * structure. * We don't use bitfields because we don't know how compilers for * arbitrary machines will lay them out. */ #define DT_2SECONDS_MASK 0x1F /* seconds divided by 2 */ #define DT_2SECONDS_SHIFT 0 #define DT_MINUTES_MASK 0x7E0 /* minutes */ #define DT_MINUTES_SHIFT 5 #define DT_HOURS_MASK 0xF800 /* hours */ #define DT_HOURS_SHIFT 11 /* * This is the format of the contents of the deDate field in the direntry * structure. */ #define DD_DAY_MASK 0x1F /* day of month */ #define DD_DAY_SHIFT 0 #define DD_MONTH_MASK 0x1E0 /* month */ #define DD_MONTH_SHIFT 5 #define DD_YEAR_MASK 0xFE00 /* year - 1980 */ #define DD_YEAR_SHIFT 9 #if defined(_KERNEL) || defined(MAKEFS) struct mbnambuf { size_t nb_len; int nb_last_id; char nb_buf[WIN_MAXLEN + 1]; }; struct dirent; struct msdosfsmount; char *mbnambuf_flush(struct mbnambuf *nbp, struct dirent *dp); void mbnambuf_init(struct mbnambuf *nbp); int mbnambuf_write(struct mbnambuf *nbp, char *name, int id); int dos2unixfn(u_char dn[11], u_char *un, int lower, struct msdosfsmount *pmp); int unix2dosfn(const u_char *un, u_char dn[12], size_t unlen, u_int gen, struct msdosfsmount *pmp); int unix2winfn(const u_char *un, size_t unlen, struct winentry *wep, int cnt, int chksum, struct msdosfsmount *pmp); int winChkName(struct mbnambuf *nbp, const u_char *un, size_t unlen, int chksum, struct msdosfsmount *pmp); int win2unixfn(struct mbnambuf *nbp, struct winentry *wep, int chksum, struct msdosfsmount *pmp); uint8_t winChksum(uint8_t *name); int winSlotCnt(const u_char *un, size_t unlen, struct msdosfsmount *pmp); size_t winLenFixup(const u_char *un, size_t unlen); #endif /* _KERNEL || MAKEFS */ #endif /* !_FS_MSDOSFS_DIRENTRY_H_ */ Index: head/sys/fs/msdosfs/fat.h =================================================================== --- head/sys/fs/msdosfs/fat.h (revision 326267) +++ head/sys/fs/msdosfs/fat.h (revision 326268) @@ -1,107 +1,109 @@ /* $FreeBSD$ */ /* $NetBSD: fat.h,v 1.12 1997/11/17 15:36:36 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #ifndef _FS_MSDOSFS_FAT_H_ #define _FS_MSDOSFS_FAT_H_ /* * Some useful cluster numbers. */ #define MSDOSFSROOT 0 /* cluster 0 means the root dir */ #define CLUST_FREE 0 /* cluster 0 also means a free cluster */ #define MSDOSFSFREE CLUST_FREE #define CLUST_FIRST 2 /* first legal cluster number */ #define CLUST_RSRVD 0xfffffff6 /* reserved cluster range */ #define CLUST_BAD 0xfffffff7 /* a cluster with a defect */ #define CLUST_EOFS 0xfffffff8 /* start of eof cluster range */ #define CLUST_EOFE 0xffffffff /* end of eof cluster range */ #define FAT12_MASK 0x00000fff /* mask for 12 bit cluster numbers */ #define FAT16_MASK 0x0000ffff /* mask for 16 bit cluster numbers */ #define FAT32_MASK 0x0fffffff /* mask for FAT32 cluster numbers */ /* * MSDOSFS: * Return true if filesystem uses 12 bit FATs. Microsoft Programmer's * Reference says if the maximum cluster number in a filesystem is greater * than 4084 ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK) then we've got a * 16 bit FAT filesystem. While mounting, the result of this test is stored * in pm_fatentrysize. */ #define FAT12(pmp) (pmp->pm_fatmask == FAT12_MASK) #define FAT16(pmp) (pmp->pm_fatmask == FAT16_MASK) #define FAT32(pmp) (pmp->pm_fatmask == FAT32_MASK) #define MSDOSFSEOF(pmp, cn) ((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS) #if defined(_KERNEL) || defined(MAKEFS) /* * These are the values for the function argument to the function * fatentry(). */ #define FAT_GET 0x0001 /* get a FAT entry */ #define FAT_SET 0x0002 /* set a FAT entry */ #define FAT_GET_AND_SET (FAT_GET | FAT_SET) /* * Flags to extendfile: */ #define DE_CLEAR 1 /* Zero out the blocks allocated */ int pcbmap(struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int* sp); int clusterfree(struct msdosfsmount *pmp, u_long cn, u_long *oldcnp); int clusteralloc(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got); int fatentry(int function, struct msdosfsmount *pmp, u_long cluster, u_long *oldcontents, u_long newcontents); int freeclusterchain(struct msdosfsmount *pmp, u_long startchain); int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags); void fc_purge(struct denode *dep, u_int frcn); int markvoldirty(struct msdosfsmount *pmp, int dirty); #endif /* _KERNEL || MAKEFS */ #endif /* !_FS_MSDOSFS_FAT_H_ */ Index: head/sys/fs/msdosfs/msdosfs_conv.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_conv.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_conv.c (revision 326268) @@ -1,1078 +1,1080 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include extern struct iconv_functions *msdosfs_iconv; static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle); static u_char * dos2unixchr(u_char *, const u_char **, size_t *, int, struct msdosfsmount *); static uint16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *); static u_char * win2unixchr(u_char *, uint16_t, struct msdosfsmount *); static uint16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *); /* * 0 - character disallowed in long file name. * 1 - character should be replaced by '_' in DOS file name, * and generation number inserted. * 2 - character ('.' and ' ') should be skipped in DOS file name, * and generation number inserted. */ static const u_char unix2dos[256] = { /* iso8859-1 -> cp850 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */ 0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */ 2, 0x21, 0, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 0x28, 0x29, 0, 1, 1, 0x2d, 2, 0, /* 28-2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 0x38, 0x39, 0, 1, 0, 1, 0, 0, /* 38-3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ 0x58, 0x59, 0x5a, 1, 0, 1, 0x5e, 0x5f, /* 58-5f */ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 60-67 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 68-6f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 70-77 */ 0x58, 0x59, 0x5a, 0x7b, 0, 0x7d, 0x7e, 0, /* 78-7f */ 0, 0, 0, 0, 0, 0, 0, 0, /* 80-87 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */ 0, 0, 0, 0, 0, 0, 0, 0, /* 90-97 */ 0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */ 0, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, /* a0-a7 */ 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, /* a8-af */ 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, /* b0-b7 */ 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, /* b8-bf */ 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* c0-c7 */ 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* c8-cf */ 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, /* d0-d7 */ 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, /* d8-df */ 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* e0-e7 */ 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* e8-ef */ 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6, /* f0-f7 */ 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98, /* f8-ff */ }; static const u_char dos2unix[256] = { /* cp850 -> iso8859-1 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 00-07 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 08-0f */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 10-17 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 18-1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, /* 80-87 */ 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, /* 88-8f */ 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, /* 90-97 */ 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f, /* 98-9f */ 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, /* a0-a7 */ 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, /* a8-af */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0, /* b0-b7 */ 0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f, /* b8-bf */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3, /* c0-c7 */ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4, /* c8-cf */ 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce, /* d0-d7 */ 0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f, /* d8-df */ 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, /* e0-e7 */ 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f, /* e8-ef */ 0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, /* f0-f7 */ 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f, /* f8-ff */ }; static const u_char u2l[256] = { /* tolower */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ }; static const u_char l2u[256] = { /* toupper */ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ }; /* * DOS filenames are made of 2 parts, the name part and the extension part. * The name part is 8 characters long and the extension part is 3 * characters long. They may contain trailing blanks if the name or * extension are not long enough to fill their respective fields. */ /* * Convert a DOS filename to a unix filename. And, return the number of * characters in the resulting unix filename excluding the terminating * null. */ int dos2unixfn(u_char dn[11], u_char *un, int lower, struct msdosfsmount *pmp) { size_t i; int thislong = 0; u_char *c, tmpbuf[5]; /* * If first char of the filename is SLOT_E5 (0x05), then the real * first char of the filename should be 0xe5. But, they couldn't * just have a 0xe5 mean 0xe5 because that is used to mean a freed * directory slot. Another dos quirk. */ if (*dn == SLOT_E5) *dn = 0xe5; /* * Copy the name portion into the unix filename string. */ for (i = 8; i > 0 && *dn != ' ';) { c = dos2unixchr(tmpbuf, __DECONST(const u_char **, &dn), &i, lower & LCASE_BASE, pmp); while (*c != '\0') { *un++ = *c++; thislong++; } } dn += i; /* * Now, if there is an extension then put in a period and copy in * the extension. */ if (*dn != ' ') { *un++ = '.'; thislong++; for (i = 3; i > 0 && *dn != ' ';) { c = dos2unixchr(tmpbuf, __DECONST(const u_char **, &dn), &i, lower & LCASE_EXT, pmp); while (*c != '\0') { *un++ = *c++; thislong++; } } } *un++ = 0; return (thislong); } /* * Convert a unix filename to a DOS filename according to Win95 rules. * If applicable and gen is not 0, it is inserted into the converted * filename as a generation number. * Returns * 0 if name couldn't be converted * 1 if the converted name is the same as the original * (no long filename entry necessary for Win95) * 2 if conversion was successful * 3 if conversion was successful and generation number was inserted */ int unix2dosfn(const u_char *un, u_char dn[12], size_t unlen, u_int gen, struct msdosfsmount *pmp) { ssize_t i, j; int l; int conv = 1; const u_char *cp, *dp, *dp1; u_char gentext[6], *wcp; uint16_t c; /* * Fill the dos filename string with blanks. These are DOS's pad * characters. */ for (i = 0; i < 11; i++) dn[i] = ' '; dn[11] = 0; /* * The filenames "." and ".." are handled specially, since they * don't follow dos filename rules. */ if (un[0] == '.' && unlen == 1) { dn[0] = '.'; return gen <= 1; } if (un[0] == '.' && un[1] == '.' && unlen == 2) { dn[0] = '.'; dn[1] = '.'; return gen <= 1; } /* * Filenames with only blanks and dots are not allowed! */ for (cp = un, i = unlen; --i >= 0; cp++) if (*cp != ' ' && *cp != '.') break; if (i < 0) return 0; /* * Filenames with some characters are not allowed! */ for (cp = un, i = unlen; i > 0;) if (unix2doschr(&cp, (size_t *)&i, pmp) == 0) return 0; /* * Now find the extension * Note: dot as first char doesn't start extension * and trailing dots and blanks are ignored * Note(2003/7): It seems recent Windows has * defferent rule than this code, that Windows * ignores all dots before extension, and use all * chars as filename except for dots. */ dp = dp1 = NULL; for (cp = un + 1, i = unlen - 1; --i >= 0;) { switch (*cp++) { case '.': if (!dp1) dp1 = cp; break; case ' ': break; default: if (dp1) dp = dp1; dp1 = NULL; break; } } /* * Now convert it (this part is for extension). * As Windows XP do, if it's not ascii char, * this function should return 2 or 3, so that checkng out Unicode name. */ if (dp) { if (dp1) l = dp1 - dp; else l = unlen - (dp - un); for (cp = dp, i = l, j = 8; i > 0 && j < 11; j++) { c = unix2doschr(&cp, (size_t *)&i, pmp); if (c & 0xff00) { dn[j] = c >> 8; if (++j < 11) { dn[j] = c; if (conv != 3) conv = 2; continue; } else { conv = 3; dn[j-1] = ' '; break; } } else { dn[j] = c; } if (((dn[j] & 0x80) || *(cp - 1) != dn[j]) && conv != 3) conv = 2; if (dn[j] == 1) { conv = 3; dn[j] = '_'; } if (dn[j] == 2) { conv = 3; dn[j--] = ' '; } } if (i > 0) conv = 3; dp--; } else { for (dp = cp; *--dp == ' ' || *dp == '.';); dp++; } /* * Now convert the rest of the name */ for (i = dp - un, j = 0; un < dp && j < 8; j++) { c = unix2doschr(&un, &i, pmp); if (c & 0xff00) { dn[j] = c >> 8; if (++j < 8) { dn[j] = c; if (conv != 3) conv = 2; continue; } else { conv = 3; dn[j-1] = ' '; break; } } else { dn[j] = c; } if (((dn[j] & 0x80) || *(un - 1) != dn[j]) && conv != 3) conv = 2; if (dn[j] == 1) { conv = 3; dn[j] = '_'; } if (dn[j] == 2) { conv = 3; dn[j--] = ' '; } } if (un < dp) conv = 3; /* * If we didn't have any chars in filename, * generate a default */ if (!j) dn[0] = '_'; /* * If there wasn't any char dropped, * there is no place for generation numbers */ if (conv != 3) { if (gen > 1) conv = 0; goto done; } /* * Now insert the generation number into the filename part */ if (gen == 0) goto done; for (wcp = gentext + sizeof(gentext); wcp > gentext && gen; gen /= 10) *--wcp = gen % 10 + '0'; if (gen) { conv = 0; goto done; } for (i = 8; dn[--i] == ' ';); i++; if (gentext + sizeof(gentext) - wcp + 1 > 8 - i) i = 8 - (gentext + sizeof(gentext) - wcp + 1); /* * Correct posision to where insert the generation number */ cp = dn; i -= mbsadjpos((const char**)&cp, i, unlen, 1, pmp->pm_flags, pmp->pm_d2u); dn[i++] = '~'; while (wcp < gentext + sizeof(gentext)) dn[i++] = *wcp++; /* * Tail of the filename should be space */ while (i < 8) dn[i++] = ' '; conv = 3; done: /* * The first character cannot be E5, * because that means a deleted entry */ if (dn[0] == 0xe5) dn[0] = SLOT_E5; return conv; } /* * Create a Win95 long name directory entry * Note: assumes that the filename is valid, * i.e. doesn't consist solely of blanks and dots */ int unix2winfn(const u_char *un, size_t unlen, struct winentry *wep, int cnt, int chksum, struct msdosfsmount *pmp) { uint8_t *wcp; int i, end; uint16_t code; /* * Drop trailing blanks and dots */ unlen = winLenFixup(un, unlen); /* * Cut *un for this slot */ unlen = mbsadjpos((const char **)&un, unlen, (cnt - 1) * WIN_CHARS, 2, pmp->pm_flags, pmp->pm_u2w); /* * Initialize winentry to some useful default */ memset(wep, 0xff, sizeof(*wep)); wep->weCnt = cnt; wep->weAttributes = ATTR_WIN95; wep->weReserved1 = 0; wep->weChksum = chksum; wep->weReserved2 = 0; /* * Now convert the filename parts */ end = 0; for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0 && !end;) { code = unix2winchr(&un, &unlen, 0, pmp); *wcp++ = code; *wcp++ = code >> 8; if (!code) end = WIN_LAST; } for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0 && !end;) { code = unix2winchr(&un, &unlen, 0, pmp); *wcp++ = code; *wcp++ = code >> 8; if (!code) end = WIN_LAST; } for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0 && !end;) { code = unix2winchr(&un, &unlen, 0, pmp); *wcp++ = code; *wcp++ = code >> 8; if (!code) end = WIN_LAST; } if (*un == '\0') end = WIN_LAST; wep->weCnt |= end; return !end; } /* * Compare our filename to the one in the Win95 entry * Returns the checksum or -1 if no match */ int winChkName(struct mbnambuf *nbp, const u_char *un, size_t unlen, int chksum, struct msdosfsmount *pmp) { size_t len; uint16_t c1, c2; u_char *np; struct dirent dirbuf; /* * We already have winentry in *nbp. */ if (!mbnambuf_flush(nbp, &dirbuf) || dirbuf.d_namlen == 0) return -1; #ifdef MSDOSFS_DEBUG printf("winChkName(): un=%s:%zu,d_name=%s:%d\n", un, unlen, dirbuf.d_name, dirbuf.d_namlen); #endif /* * Compare the name parts */ len = dirbuf.d_namlen; if (unlen != len) return -2; for (np = dirbuf.d_name; unlen > 0 && len > 0;) { /* * Comparison must be case insensitive, because FAT disallows * to look up or create files in case sensitive even when * it's a long file name. */ c1 = unix2winchr(__DECONST(const u_char **, &np), &len, LCASE_BASE, pmp); c2 = unix2winchr(&un, &unlen, LCASE_BASE, pmp); if (c1 != c2) return -2; } return chksum; } /* * Convert Win95 filename to dirbuf. * Returns the checksum or -1 if impossible */ int win2unixfn(struct mbnambuf *nbp, struct winentry *wep, int chksum, struct msdosfsmount *pmp) { u_char *c, tmpbuf[5]; uint8_t *cp; uint8_t *np, name[WIN_CHARS * 3 + 1]; uint16_t code; int i; if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS) || !(wep->weCnt&WIN_CNT)) return -1; /* * First compare checksums */ if (wep->weCnt&WIN_LAST) { chksum = wep->weChksum; } else if (chksum != wep->weChksum) chksum = -1; if (chksum == -1) return -1; /* * Convert the name parts */ np = name; for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { code = (cp[1] << 8) | cp[0]; switch (code) { case 0: *np = '\0'; if (mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1) != 0) return -1; return chksum; case '/': *np = '\0'; return -1; default: c = win2unixchr(tmpbuf, code, pmp); while (*c != '\0') *np++ = *c++; break; } cp += 2; } for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { code = (cp[1] << 8) | cp[0]; switch (code) { case 0: *np = '\0'; if (mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1) != 0) return -1; return chksum; case '/': *np = '\0'; return -1; default: c = win2unixchr(tmpbuf, code, pmp); while (*c != '\0') *np++ = *c++; break; } cp += 2; } for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { code = (cp[1] << 8) | cp[0]; switch (code) { case 0: *np = '\0'; if (mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1) != 0) return -1; return chksum; case '/': *np = '\0'; return -1; default: c = win2unixchr(tmpbuf, code, pmp); while (*c != '\0') *np++ = *c++; break; } cp += 2; } *np = '\0'; if (mbnambuf_write(nbp, name, (wep->weCnt & WIN_CNT) - 1) != 0) return -1; return chksum; } /* * Compute the unrolled checksum of a DOS filename for Win95 LFN use. */ uint8_t winChksum(uint8_t *name) { int i; uint8_t s; for (s = 0, i = 11; --i >= 0; s += *name++) s = (s << 7)|(s >> 1); return (s); } /* * Determine the number of slots necessary for Win95 names */ int winSlotCnt(const u_char *un, size_t unlen, struct msdosfsmount *pmp) { size_t wlen; char wn[WIN_MAXLEN * 2 + 1], *wnp; unlen = winLenFixup(un, unlen); if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { wlen = WIN_MAXLEN * 2; wnp = wn; msdosfs_iconv->conv(pmp->pm_u2w, (const char **)&un, &unlen, &wnp, &wlen); if (unlen > 0) return 0; return howmany(WIN_MAXLEN - wlen/2, WIN_CHARS); } if (unlen > WIN_MAXLEN) return 0; return howmany(unlen, WIN_CHARS); } /* * Determine the number of bytes necessary for Win95 names */ size_t winLenFixup(const u_char *un, size_t unlen) { for (un += unlen; unlen > 0; unlen--) if (*--un != ' ' && *un != '.') break; return unlen; } /* * Store an area with multi byte string instr, and returns left * byte of instr and moves pointer forward. The area's size is * inlen or outlen. */ static int mbsadjpos(const char **instr, size_t inlen, size_t outlen, int weight, int flag, void *handle) { char *outp, outstr[outlen * weight + 1]; if (flag & MSDOSFSMNT_KICONV && msdosfs_iconv) { outp = outstr; outlen *= weight; msdosfs_iconv->conv(handle, instr, &inlen, &outp, &outlen); return (inlen); } (*instr) += min(inlen, outlen); return (inlen - min(inlen, outlen)); } /* * Convert DOS char to Local char */ static u_char * dos2unixchr(u_char *outbuf, const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) { u_char c, *outp; size_t len, olen; outp = outbuf; if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { olen = len = 4; if (lower & (LCASE_BASE | LCASE_EXT)) msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr, ilen, (char **)&outp, &olen, KICONV_LOWER); else msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr, ilen, (char **)&outp, &olen); len -= olen; /* * return '?' if failed to convert */ if (len == 0) { (*ilen)--; (*instr)++; *outp++ = '?'; } } else { (*ilen)--; c = *(*instr)++; c = dos2unix[c]; if (lower & (LCASE_BASE | LCASE_EXT)) c = u2l[c]; *outp++ = c; outbuf[1] = '\0'; } *outp = '\0'; outp = outbuf; return (outp); } /* * Convert Local char to DOS char */ static uint16_t unix2doschr(const u_char **instr, size_t *ilen, struct msdosfsmount *pmp) { u_char c; char *up, *outp, unicode[3], outbuf[3]; uint16_t wc; size_t len, ucslen, unixlen, olen; if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { /* * to hide an invisible character, using a unicode filter */ ucslen = 2; len = *ilen; up = unicode; msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr, ilen, &up, &ucslen); unixlen = len - *ilen; /* * cannot be converted */ if (unixlen == 0) { (*ilen)--; (*instr)++; return (0); } /* * return magic number for ascii char */ if (unixlen == 1) { c = *(*instr -1); if (! (c & 0x80)) { c = unix2dos[c]; if (c <= 2) return (c); } } /* * now convert using libiconv */ *instr -= unixlen; *ilen = len; olen = len = 2; outp = outbuf; msdosfs_iconv->convchr_case(pmp->pm_u2d, (const char **)instr, ilen, &outp, &olen, KICONV_FROM_UPPER); len -= olen; /* * cannot be converted, but has unicode char, should return magic number */ if (len == 0) { (*ilen) -= unixlen; (*instr) += unixlen; return (1); } wc = 0; while(len--) wc |= (*(outp - len - 1) & 0xff) << (len << 3); return (wc); } (*ilen)--; c = *(*instr)++; c = l2u[c]; c = unix2dos[c]; return ((uint16_t)c); } /* * Convert Windows char to Local char */ static u_char * win2unixchr(u_char *outbuf, uint16_t wc, struct msdosfsmount *pmp) { u_char *inp, *outp, inbuf[3]; size_t ilen, olen, len; outp = outbuf; if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { inbuf[0] = (u_char)(wc>>8); inbuf[1] = (u_char)wc; inbuf[2] = '\0'; ilen = 2; olen = len = 4; inp = inbuf; msdosfs_iconv->convchr(pmp->pm_w2u, __DECONST(const char **, &inp), &ilen, (char **)&outp, &olen); len -= olen; /* * return '?' if failed to convert */ if (len == 0) *outp++ = '?'; } else { *outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff); } *outp = '\0'; outp = outbuf; return (outp); } /* * Convert Local char to Windows char */ static uint16_t unix2winchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp) { u_char *outp, outbuf[3]; uint16_t wc; size_t olen; if (*ilen == 0) return (0); if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { outp = outbuf; olen = 2; if (lower & (LCASE_BASE | LCASE_EXT)) msdosfs_iconv->convchr_case(pmp->pm_u2w, (const char **)instr, ilen, (char **)&outp, &olen, KICONV_FROM_LOWER); else msdosfs_iconv->convchr(pmp->pm_u2w, (const char **)instr, ilen, (char **)&outp, &olen); /* * return '0' if end of filename */ if (olen == 2) return (0); wc = (outbuf[0]<<8) | outbuf[1]; return (wc); } (*ilen)--; wc = (*instr)[0]; if (lower & (LCASE_BASE | LCASE_EXT)) wc = u2l[wc]; (*instr)++; return (wc); } /* * Initialize the temporary concatenation buffer. */ void mbnambuf_init(struct mbnambuf *nbp) { nbp->nb_len = 0; nbp->nb_last_id = -1; nbp->nb_buf[sizeof(nbp->nb_buf) - 1] = '\0'; } /* * Fill out our concatenation buffer with the given substring, at the offset * specified by its id. Since this function must be called with ids in * descending order, we take advantage of the fact that ASCII substrings are * exactly WIN_CHARS in length. For non-ASCII substrings, we shift all * previous (i.e. higher id) substrings upwards to make room for this one. * This only penalizes portions of substrings that contain more than * WIN_CHARS bytes when they are first encountered. */ int mbnambuf_write(struct mbnambuf *nbp, char *name, int id) { char *slot; size_t count, newlen; if (nbp->nb_len != 0 && id != nbp->nb_last_id - 1) { #ifdef MSDOSFS_DEBUG printf("msdosfs: non-decreasing id: id %d, last id %d\n", id, nbp->nb_last_id); #endif return (EINVAL); } /* Will store this substring in a WIN_CHARS-aligned slot. */ slot = &nbp->nb_buf[id * WIN_CHARS]; count = strlen(name); newlen = nbp->nb_len + count; if (newlen > WIN_MAXLEN || newlen > MAXNAMLEN) { #ifdef MSDOSFS_DEBUG printf("msdosfs: file name length %zu too large\n", newlen); #endif return (ENAMETOOLONG); } /* Shift suffix upwards by the amount length exceeds WIN_CHARS. */ if (count > WIN_CHARS && nbp->nb_len != 0) { if ((id * WIN_CHARS + count + nbp->nb_len) > sizeof(nbp->nb_buf)) return (ENAMETOOLONG); memmove(slot + count, slot + WIN_CHARS, nbp->nb_len); } /* Copy in the substring to its slot and update length so far. */ memcpy(slot, name, count); nbp->nb_len = newlen; nbp->nb_last_id = id; return (0); } /* * Take the completed string and use it to setup the struct dirent. * Be sure to always nul-terminate the d_name and then copy the string * from our buffer. Note that this function assumes the full string has * been reassembled in the buffer. If it's called before all substrings * have been written via mbnambuf_write(), the result will be incorrect. */ char * mbnambuf_flush(struct mbnambuf *nbp, struct dirent *dp) { if (nbp->nb_len > sizeof(dp->d_name) - 1) { mbnambuf_init(nbp); return (NULL); } memcpy(dp->d_name, &nbp->nb_buf[0], nbp->nb_len); dp->d_name[nbp->nb_len] = '\0'; dp->d_namlen = nbp->nb_len; mbnambuf_init(nbp); return (dp->d_name); } Index: head/sys/fs/msdosfs/msdosfs_denode.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_denode.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_denode.c (revision 326268) @@ -1,616 +1,618 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_MSDOSFSNODE, "msdosfs_node", "MSDOSFS vnode private part"); static int de_vncmpf(struct vnode *vp, void *arg) { struct denode *de; uint64_t *a; a = arg; de = VTODE(vp); return (de->de_inode != *a); } /* * If deget() succeeds it returns with the gotten denode locked(). * * pmp - address of msdosfsmount structure of the filesystem containing * the denode of interest. The address of * the msdosfsmount structure are used. * dirclust - which cluster bp contains, if dirclust is 0 (root directory) * diroffset is relative to the beginning of the root directory, * otherwise it is cluster relative. * diroffset - offset past begin of cluster of denode we want * depp - returns the address of the gotten denode. */ int deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset, struct denode **depp) { int error; uint64_t inode; struct mount *mntp = pmp->pm_mountp; struct direntry *direntptr; struct denode *ldep; struct vnode *nvp, *xvp; struct buf *bp; #ifdef MSDOSFS_DEBUG printf("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n", pmp, dirclust, diroffset, depp); #endif /* * On FAT32 filesystems, root is a (more or less) normal * directory */ if (FAT32(pmp) && dirclust == MSDOSFSROOT) dirclust = pmp->pm_rootdirblk; /* * See if the denode is in the denode cache. Use the location of * the directory entry to compute the hash value. For subdir use * address of "." entry. For root dir (if not FAT32) use cluster * MSDOSFSROOT, offset MSDOSFSROOT_OFS * * NOTE: The check for de_refcnt > 0 below insures the denode being * examined does not represent an unlinked but still open file. * These files are not to be accessible even when the directory * entry that represented the file happens to be reused while the * deleted file is still open. */ inode = (uint64_t)pmp->pm_bpcluster * dirclust + diroffset; error = vfs_hash_get(mntp, inode, LK_EXCLUSIVE, curthread, &nvp, de_vncmpf, &inode); if (error) return (error); if (nvp != NULL) { *depp = VTODE(nvp); KASSERT((*depp)->de_dirclust == dirclust, ("wrong dirclust")); KASSERT((*depp)->de_diroffset == diroffset, ("wrong diroffset")); return (0); } ldep = malloc(sizeof(struct denode), M_MSDOSFSNODE, M_WAITOK | M_ZERO); /* * Directory entry was not in cache, have to create a vnode and * copy it from the passed disk buffer. */ /* getnewvnode() does a VREF() on the vnode */ error = getnewvnode("msdosfs", mntp, &msdosfs_vnodeops, &nvp); if (error) { *depp = NULL; free(ldep, M_MSDOSFSNODE); return error; } nvp->v_data = ldep; ldep->de_vnode = nvp; ldep->de_flag = 0; ldep->de_dirclust = dirclust; ldep->de_diroffset = diroffset; ldep->de_inode = inode; lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL); fc_purge(ldep, 0); /* init the FAT cache for this denode */ error = insmntque(nvp, mntp); if (error != 0) { free(ldep, M_MSDOSFSNODE); *depp = NULL; return (error); } error = vfs_hash_insert(nvp, inode, LK_EXCLUSIVE, curthread, &xvp, de_vncmpf, &inode); if (error) { *depp = NULL; return (error); } if (xvp != NULL) { *depp = xvp->v_data; return (0); } ldep->de_pmp = pmp; ldep->de_refcnt = 1; /* * Copy the directory entry into the denode area of the vnode. */ if ((dirclust == MSDOSFSROOT || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)) && diroffset == MSDOSFSROOT_OFS) { /* * Directory entry for the root directory. There isn't one, * so we manufacture one. We should probably rummage * through the root directory and find a label entry (if it * exists), and then use the time and date from that entry * as the time and date for the root denode. */ nvp->v_vflag |= VV_ROOT; /* should be further down XXX */ ldep->de_Attributes = ATTR_DIRECTORY; ldep->de_LowerCase = 0; if (FAT32(pmp)) ldep->de_StartCluster = pmp->pm_rootdirblk; /* de_FileSize will be filled in further down */ else { ldep->de_StartCluster = MSDOSFSROOT; ldep->de_FileSize = pmp->pm_rootdirsize * DEV_BSIZE; } /* * fill in time and date so that fattime2timespec() doesn't * spit up when called from msdosfs_getattr() with root * denode */ ldep->de_CHun = 0; ldep->de_CTime = 0x0000; /* 00:00:00 */ ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT) | (1 << DD_DAY_SHIFT); /* Jan 1, 1980 */ ldep->de_ADate = ldep->de_CDate; ldep->de_MTime = ldep->de_CTime; ldep->de_MDate = ldep->de_CDate; /* leave the other fields as garbage */ } else { error = readep(pmp, dirclust, diroffset, &bp, &direntptr); if (error) { /* * The denode does not contain anything useful, so * it would be wrong to leave it on its hash chain. * Arrange for vput() to just forget about it. */ ldep->de_Name[0] = SLOT_DELETED; vput(nvp); *depp = NULL; return (error); } (void)DE_INTERNALIZE(ldep, direntptr); brelse(bp); } /* * Fill in a few fields of the vnode and finish filling in the * denode. Then return the address of the found denode. */ if (ldep->de_Attributes & ATTR_DIRECTORY) { /* * Since DOS directory entries that describe directories * have 0 in the filesize field, we take this opportunity * to find out the length of the directory and plug it into * the denode structure. */ u_long size; /* * XXX it sometimes happens that the "." entry has cluster * number 0 when it shouldn't. Use the actual cluster number * instead of what is written in directory entry. */ if (diroffset == 0 && ldep->de_StartCluster != dirclust) { #ifdef MSDOSFS_DEBUG printf("deget(): \".\" entry at clust %lu != %lu\n", dirclust, ldep->de_StartCluster); #endif ldep->de_StartCluster = dirclust; } nvp->v_type = VDIR; if (ldep->de_StartCluster != MSDOSFSROOT) { error = pcbmap(ldep, 0xffff, 0, &size, 0); if (error == E2BIG) { ldep->de_FileSize = de_cn2off(pmp, size); error = 0; } else { #ifdef MSDOSFS_DEBUG printf("deget(): pcbmap returned %d\n", error); #endif } } } else nvp->v_type = VREG; ldep->de_modrev = init_va_filerev(); *depp = ldep; return (0); } int deupdat(struct denode *dep, int waitfor) { struct direntry dir; struct timespec ts; struct buf *bp; struct direntry *dirp; int error; if (DETOV(dep)->v_mount->mnt_flag & MNT_RDONLY) { dep->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS | DE_MODIFIED); return (0); } getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); if ((dep->de_flag & DE_MODIFIED) == 0 && waitfor == 0) return (0); dep->de_flag &= ~DE_MODIFIED; if (DETOV(dep)->v_vflag & VV_ROOT) return (EINVAL); if (dep->de_refcnt <= 0) return (0); error = readde(dep, &bp, &dirp); if (error) return (error); DE_EXTERNALIZE(&dir, dep); if (bcmp(dirp, &dir, sizeof(dir)) == 0) { if (waitfor == 0 || (bp->b_flags & B_DELWRI) == 0) { brelse(bp); return (0); } } else *dirp = dir; if ((DETOV(dep)->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) bp->b_flags |= B_CLUSTEROK; if (waitfor) error = bwrite(bp); else if (vm_page_count_severe() || buf_dirty_count_severe()) bawrite(bp); else bdwrite(bp); return (error); } /* * Truncate the file described by dep to the length specified by length. */ int detrunc(struct denode *dep, u_long length, int flags, struct ucred *cred) { int error; int allerror; u_long eofentry; u_long chaintofree; daddr_t bn; int boff; int isadir = dep->de_Attributes & ATTR_DIRECTORY; struct buf *bp; struct msdosfsmount *pmp = dep->de_pmp; #ifdef MSDOSFS_DEBUG printf("detrunc(): file %s, length %lu, flags %x\n", dep->de_Name, length, flags); #endif /* * Disallow attempts to truncate the root directory since it is of * fixed size. That's just the way dos filesystems are. We use * the VROOT bit in the vnode because checking for the directory * bit and a startcluster of 0 in the denode is not adequate to * recognize the root directory at this point in a file or * directory's life. */ if ((DETOV(dep)->v_vflag & VV_ROOT) && !FAT32(pmp)) { #ifdef MSDOSFS_DEBUG printf("detrunc(): can't truncate root directory, clust %ld, offset %ld\n", dep->de_dirclust, dep->de_diroffset); #endif return (EINVAL); } if (dep->de_FileSize < length) { vnode_pager_setsize(DETOV(dep), length); return deextend(dep, length, cred); } /* * If the desired length is 0 then remember the starting cluster of * the file and set the StartCluster field in the directory entry * to 0. If the desired length is not zero, then get the number of * the last cluster in the shortened file. Then get the number of * the first cluster in the part of the file that is to be freed. * Then set the next cluster pointer in the last cluster of the * file to CLUST_EOFE. */ if (length == 0) { chaintofree = dep->de_StartCluster; dep->de_StartCluster = 0; eofentry = ~0; } else { error = pcbmap(dep, de_clcount(pmp, length) - 1, 0, &eofentry, 0); if (error) { #ifdef MSDOSFS_DEBUG printf("detrunc(): pcbmap fails %d\n", error); #endif return (error); } } fc_purge(dep, de_clcount(pmp, length)); /* * If the new length is not a multiple of the cluster size then we * must zero the tail end of the new last cluster in case it * becomes part of the file again because of a seek. */ if ((boff = length & pmp->pm_crbomask) != 0) { if (isadir) { bn = cntobn(pmp, eofentry); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, NOCRED, &bp); if (error) { brelse(bp); #ifdef MSDOSFS_DEBUG printf("detrunc(): bread fails %d\n", error); #endif return (error); } memset(bp->b_data + boff, 0, pmp->pm_bpcluster - boff); if (flags & IO_SYNC) bwrite(bp); else bdwrite(bp); } } /* * Write out the updated directory entry. Even if the update fails * we free the trailing clusters. */ dep->de_FileSize = length; if (!isadir) dep->de_flag |= DE_UPDATE | DE_MODIFIED; allerror = vtruncbuf(DETOV(dep), cred, length, pmp->pm_bpcluster); #ifdef MSDOSFS_DEBUG if (allerror) printf("detrunc(): vtruncbuf error %d\n", allerror); #endif error = deupdat(dep, !DOINGASYNC((DETOV(dep)))); if (error != 0 && allerror == 0) allerror = error; #ifdef MSDOSFS_DEBUG printf("detrunc(): allerror %d, eofentry %lu\n", allerror, eofentry); #endif /* * If we need to break the cluster chain for the file then do it * now. */ if (eofentry != ~0) { error = fatentry(FAT_GET_AND_SET, pmp, eofentry, &chaintofree, CLUST_EOFE); if (error) { #ifdef MSDOSFS_DEBUG printf("detrunc(): fatentry errors %d\n", error); #endif return (error); } fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1), eofentry); } /* * Now free the clusters removed from the file because of the * truncation. */ if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree)) freeclusterchain(pmp, chaintofree); return (allerror); } /* * Extend the file described by dep to length specified by length. */ int deextend(struct denode *dep, u_long length, struct ucred *cred) { struct msdosfsmount *pmp = dep->de_pmp; u_long count; int error; /* * The root of a DOS filesystem cannot be extended. */ if ((DETOV(dep)->v_vflag & VV_ROOT) && !FAT32(pmp)) return (EINVAL); /* * Directories cannot be extended. */ if (dep->de_Attributes & ATTR_DIRECTORY) return (EISDIR); if (length <= dep->de_FileSize) panic("deextend: file too large"); /* * Compute the number of clusters to allocate. */ count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize); if (count > 0) { if (count > pmp->pm_freeclustercount) return (ENOSPC); error = extendfile(dep, count, NULL, NULL, DE_CLEAR); if (error) { /* truncate the added clusters away again */ (void) detrunc(dep, dep->de_FileSize, 0, cred); return (error); } } dep->de_FileSize = length; dep->de_flag |= DE_UPDATE | DE_MODIFIED; return (deupdat(dep, !DOINGASYNC(DETOV(dep)))); } /* * Move a denode to its correct hash queue after the file it represents has * been moved to a new directory. */ void reinsert(struct denode *dep) { struct vnode *vp; /* * Fix up the denode cache. If the denode is for a directory, * there is nothing to do since the hash is based on the starting * cluster of the directory file and that hasn't changed. If for a * file the hash is based on the location of the directory entry, * so we must remove it from the cache and re-enter it with the * hash based on the new location of the directory entry. */ #if 0 if (dep->de_Attributes & ATTR_DIRECTORY) return; #endif vp = DETOV(dep); dep->de_inode = (uint64_t)dep->de_pmp->pm_bpcluster * dep->de_dirclust + dep->de_diroffset; vfs_hash_rehash(vp, dep->de_inode); } int msdosfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); #ifdef MSDOSFS_DEBUG printf("msdosfs_reclaim(): dep %p, file %s, refcnt %ld\n", dep, dep->de_Name, dep->de_refcnt); #endif /* * Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); /* * Remove the denode from its hash chain. */ vfs_hash_remove(vp); /* * Purge old data structures associated with the denode. */ #if 0 /* XXX */ dep->de_flag = 0; #endif free(dep, M_MSDOSFSNODE); vp->v_data = NULL; return (0); } int msdosfs_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); int error = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_inactive(): dep %p, de_Name[0] %x\n", dep, dep->de_Name[0]); #endif /* * Ignore denodes related to stale file handles. */ if (dep->de_Name[0] == SLOT_DELETED || dep->de_Name[0] == SLOT_EMPTY) goto out; /* * If the file has been deleted and it is on a read/write * filesystem, then truncate the file, and mark the directory slot * as empty. (This may not be necessary for the dos filesystem.) */ #ifdef MSDOSFS_DEBUG printf("msdosfs_inactive(): dep %p, refcnt %ld, mntflag %llx, MNT_RDONLY %llx\n", dep, dep->de_refcnt, (unsigned long long)vp->v_mount->mnt_flag, (unsigned long long)MNT_RDONLY); #endif if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { error = detrunc(dep, (u_long) 0, 0, NOCRED); dep->de_flag |= DE_UPDATE; dep->de_Name[0] = SLOT_DELETED; } deupdat(dep, 0); out: /* * If we are done with the denode, reclaim it * so that it can be reused immediately. */ #ifdef MSDOSFS_DEBUG printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n", vrefcnt(vp), dep->de_Name[0]); #endif if (dep->de_Name[0] == SLOT_DELETED || dep->de_Name[0] == SLOT_EMPTY) vrecycle(vp); return (error); } Index: head/sys/fs/msdosfs/msdosfs_fat.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_fat.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_fat.c (revision 326268) @@ -1,1171 +1,1173 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #define FULL_RUN ((u_int)0xffffffff) static int chainalloc(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got); static int chainlength(struct msdosfsmount *pmp, u_long start, u_long count); static void fatblock(struct msdosfsmount *pmp, u_long ofs, u_long *bnp, u_long *sizep, u_long *bop); static int fatchain(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith); static void fc_lookup(struct denode *dep, u_long findcn, u_long *frcnp, u_long *fsrcnp); static void updatefats(struct msdosfsmount *pmp, struct buf *bp, u_long fatbn); static __inline void usemap_alloc(struct msdosfsmount *pmp, u_long cn); static __inline void usemap_free(struct msdosfsmount *pmp, u_long cn); static int clusteralloc1(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got); static void fatblock(struct msdosfsmount *pmp, u_long ofs, u_long *bnp, u_long *sizep, u_long *bop) { u_long bn, size; bn = ofs / pmp->pm_fatblocksize * pmp->pm_fatblocksec; size = min(pmp->pm_fatblocksec, pmp->pm_FATsecs - bn) * DEV_BSIZE; bn += pmp->pm_fatblk + pmp->pm_curfat * pmp->pm_FATsecs; if (bnp) *bnp = bn; if (sizep) *sizep = size; if (bop) *bop = ofs % pmp->pm_fatblocksize; } /* * Map the logical cluster number of a file into a physical disk sector * that is filesystem relative. * * dep - address of denode representing the file of interest * findcn - file relative cluster whose filesystem relative cluster number * and/or block number are/is to be found * bnp - address of where to place the filesystem relative block number. * If this pointer is null then don't return this quantity. * cnp - address of where to place the filesystem relative cluster number. * If this pointer is null then don't return this quantity. * sp - pointer to returned block size * * NOTE: Either bnp or cnp must be non-null. * This function has one side effect. If the requested file relative cluster * is beyond the end of file, then the actual number of clusters in the file * is returned in *cnp. This is useful for determining how long a directory is. * If cnp is null, nothing is returned. */ int pcbmap(struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int *sp) { int error; u_long i; u_long cn; u_long prevcn = 0; /* XXX: prevcn could be used unititialized */ u_long byteoffset; u_long bn; u_long bo; struct buf *bp = NULL; u_long bp_bn = -1; struct msdosfsmount *pmp = dep->de_pmp; u_long bsize; KASSERT(bnp != NULL || cnp != NULL || sp != NULL, ("pcbmap: extra call")); ASSERT_VOP_ELOCKED(DETOV(dep), "pcbmap"); cn = dep->de_StartCluster; /* * The "file" that makes up the root directory is contiguous, * permanently allocated, of fixed size, and is not made up of * clusters. If the cluster number is beyond the end of the root * directory, then return the number of clusters in the file. */ if (cn == MSDOSFSROOT) { if (dep->de_Attributes & ATTR_DIRECTORY) { if (de_cn2off(pmp, findcn) >= dep->de_FileSize) { if (cnp) *cnp = de_bn2cn(pmp, pmp->pm_rootdirsize); return (E2BIG); } if (bnp) *bnp = pmp->pm_rootdirblk + de_cn2bn(pmp, findcn); if (cnp) *cnp = MSDOSFSROOT; if (sp) *sp = min(pmp->pm_bpcluster, dep->de_FileSize - de_cn2off(pmp, findcn)); return (0); } else { /* just an empty file */ if (cnp) *cnp = 0; return (E2BIG); } } /* * All other files do I/O in cluster sized blocks */ if (sp) *sp = pmp->pm_bpcluster; /* * Rummage around in the FAT cache, maybe we can avoid tromping * through every FAT entry for the file. And, keep track of how far * off the cache was from where we wanted to be. */ i = 0; fc_lookup(dep, findcn, &i, &cn); /* * Handle all other files or directories the normal way. */ for (; i < findcn; i++) { /* * Stop with all reserved clusters, not just with EOF. */ if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD) goto hiteof; byteoffset = FATOFS(pmp, cn); fatblock(pmp, byteoffset, &bn, &bsize, &bo); if (bn != bp_bn) { if (bp) brelse(bp); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } bp_bn = bn; } prevcn = cn; if (bo >= bsize) { if (bp) brelse(bp); return (EIO); } if (FAT32(pmp)) cn = getulong(&bp->b_data[bo]); else cn = getushort(&bp->b_data[bo]); if (FAT12(pmp) && (prevcn & 1)) cn >>= 4; cn &= pmp->pm_fatmask; /* * Force the special cluster numbers * to be the same for all cluster sizes * to let the rest of msdosfs handle * all cases the same. */ if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD) cn |= ~pmp->pm_fatmask; } if (!MSDOSFSEOF(pmp, cn)) { if (bp) brelse(bp); if (bnp) *bnp = cntobn(pmp, cn); if (cnp) *cnp = cn; fc_setcache(dep, FC_LASTMAP, i, cn); return (0); } hiteof:; if (cnp) *cnp = i; if (bp) brelse(bp); /* update last file cluster entry in the FAT cache */ fc_setcache(dep, FC_LASTFC, i - 1, prevcn); return (E2BIG); } /* * Find the closest entry in the FAT cache to the cluster we are looking * for. */ static void fc_lookup(struct denode *dep, u_long findcn, u_long *frcnp, u_long *fsrcnp) { int i; u_long cn; struct fatcache *closest = NULL; ASSERT_VOP_LOCKED(DETOV(dep), "fc_lookup"); for (i = 0; i < FC_SIZE; i++) { cn = dep->de_fc[i].fc_frcn; if (cn != FCE_EMPTY && cn <= findcn) { if (closest == NULL || cn > closest->fc_frcn) closest = &dep->de_fc[i]; } } if (closest) { *frcnp = closest->fc_frcn; *fsrcnp = closest->fc_fsrcn; } } /* * Purge the FAT cache in denode dep of all entries relating to file * relative cluster frcn and beyond. */ void fc_purge(struct denode *dep, u_int frcn) { int i; struct fatcache *fcp; ASSERT_VOP_ELOCKED(DETOV(dep), "fc_purge"); fcp = dep->de_fc; for (i = 0; i < FC_SIZE; i++, fcp++) { if (fcp->fc_frcn >= frcn) fcp->fc_frcn = FCE_EMPTY; } } /* * Update the FAT. * If mirroring the FAT, update all copies, with the first copy as last. * Else update only the current FAT (ignoring the others). * * pmp - msdosfsmount structure for filesystem to update * bp - addr of modified FAT block * fatbn - block number relative to begin of filesystem of the modified FAT block. */ static void updatefats(struct msdosfsmount *pmp, struct buf *bp, u_long fatbn) { struct buf *bpn; int cleanfat, i; #ifdef MSDOSFS_DEBUG printf("updatefats(pmp %p, bp %p, fatbn %lu)\n", pmp, bp, fatbn); #endif if (pmp->pm_flags & MSDOSFS_FATMIRROR) { /* * Now copy the block(s) of the modified FAT to the other copies of * the FAT and write them out. This is faster than reading in the * other FATs and then writing them back out. This could tie up * the FAT for quite a while. Preventing others from accessing it. * To prevent us from going after the FAT quite so much we use * delayed writes, unless they specified "synchronous" when the * filesystem was mounted. If synch is asked for then use * bwrite()'s and really slow things down. */ if (fatbn != pmp->pm_fatblk || FAT12(pmp)) cleanfat = 0; else if (FAT16(pmp)) cleanfat = 16; else cleanfat = 32; for (i = 1; i < pmp->pm_FATs; i++) { fatbn += pmp->pm_FATsecs; /* getblk() never fails */ bpn = getblk(pmp->pm_devvp, fatbn, bp->b_bcount, 0, 0, 0); memcpy(bpn->b_data, bp->b_data, bp->b_bcount); /* Force the clean bit on in the other copies. */ if (cleanfat == 16) ((uint8_t *)bpn->b_data)[3] |= 0x80; else if (cleanfat == 32) ((uint8_t *)bpn->b_data)[7] |= 0x08; if (pmp->pm_mountp->mnt_flag & MNT_SYNCHRONOUS) bwrite(bpn); else bdwrite(bpn); } } /* * Write out the first (or current) FAT last. */ if (pmp->pm_mountp->mnt_flag & MNT_SYNCHRONOUS) bwrite(bp); else bdwrite(bp); } /* * Updating entries in 12 bit FATs is a pain in the butt. * * The following picture shows where nibbles go when moving from a 12 bit * cluster number into the appropriate bytes in the FAT. * * byte m byte m+1 byte m+2 * +----+----+ +----+----+ +----+----+ * | 0 1 | | 2 3 | | 4 5 | FAT bytes * +----+----+ +----+----+ +----+----+ * * +----+----+----+ +----+----+----+ * | 3 0 1 | | 4 5 2 | * +----+----+----+ +----+----+----+ * cluster n cluster n+1 * * Where n is even. m = n + (n >> 2) * */ static __inline void usemap_alloc(struct msdosfsmount *pmp, u_long cn) { MSDOSFS_ASSERT_MP_LOCKED(pmp); KASSERT(cn <= pmp->pm_maxcluster, ("cn too large %lu %lu", cn, pmp->pm_maxcluster)); KASSERT((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0, ("usemap_alloc on ro msdosfs mount")); KASSERT((pmp->pm_inusemap[cn / N_INUSEBITS] & (1 << (cn % N_INUSEBITS))) == 0, ("Allocating used sector %ld %ld %x", cn, cn % N_INUSEBITS, (unsigned)pmp->pm_inusemap[cn / N_INUSEBITS])); pmp->pm_inusemap[cn / N_INUSEBITS] |= 1 << (cn % N_INUSEBITS); KASSERT(pmp->pm_freeclustercount > 0, ("usemap_alloc: too little")); pmp->pm_freeclustercount--; pmp->pm_flags |= MSDOSFS_FSIMOD; } static __inline void usemap_free(struct msdosfsmount *pmp, u_long cn) { MSDOSFS_ASSERT_MP_LOCKED(pmp); KASSERT(cn <= pmp->pm_maxcluster, ("cn too large %lu %lu", cn, pmp->pm_maxcluster)); KASSERT((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0, ("usemap_free on ro msdosfs mount")); pmp->pm_freeclustercount++; pmp->pm_flags |= MSDOSFS_FSIMOD; KASSERT((pmp->pm_inusemap[cn / N_INUSEBITS] & (1 << (cn % N_INUSEBITS))) != 0, ("Freeing unused sector %ld %ld %x", cn, cn % N_INUSEBITS, (unsigned)pmp->pm_inusemap[cn / N_INUSEBITS])); pmp->pm_inusemap[cn / N_INUSEBITS] &= ~(1 << (cn % N_INUSEBITS)); } int clusterfree(struct msdosfsmount *pmp, u_long cluster, u_long *oldcnp) { int error; u_long oldcn; error = fatentry(FAT_GET_AND_SET, pmp, cluster, &oldcn, MSDOSFSFREE); if (error) return (error); /* * If the cluster was successfully marked free, then update * the count of free clusters, and turn off the "allocated" * bit in the "in use" cluster bit map. */ MSDOSFS_LOCK_MP(pmp); usemap_free(pmp, cluster); MSDOSFS_UNLOCK_MP(pmp); if (oldcnp) *oldcnp = oldcn; return (0); } /* * Get or Set or 'Get and Set' the cluster'th entry in the FAT. * * function - whether to get or set a FAT entry * pmp - address of the msdosfsmount structure for the filesystem * whose FAT is to be manipulated. * cn - which cluster is of interest * oldcontents - address of a word that is to receive the contents of the * cluster'th entry if this is a get function * newcontents - the new value to be written into the cluster'th element of * the FAT if this is a set function. * * This function can also be used to free a cluster by setting the FAT entry * for a cluster to 0. * * All copies of the FAT are updated if this is a set function. NOTE: If * fatentry() marks a cluster as free it does not update the inusemap in * the msdosfsmount structure. This is left to the caller. */ int fatentry(int function, struct msdosfsmount *pmp, u_long cn, u_long *oldcontents, u_long newcontents) { int error; u_long readcn; u_long bn, bo, bsize, byteoffset; struct buf *bp; #ifdef MSDOSFS_DEBUG printf("fatentry(func %d, pmp %p, clust %lu, oldcon %p, newcon %lx)\n", function, pmp, cn, oldcontents, newcontents); #endif #ifdef DIAGNOSTIC /* * Be sure they asked us to do something. */ if ((function & (FAT_SET | FAT_GET)) == 0) { #ifdef MSDOSFS_DEBUG printf("fatentry(): function code doesn't specify get or set\n"); #endif return (EINVAL); } /* * If they asked us to return a cluster number but didn't tell us * where to put it, give them an error. */ if ((function & FAT_GET) && oldcontents == NULL) { #ifdef MSDOSFS_DEBUG printf("fatentry(): get function with no place to put result\n"); #endif return (EINVAL); } #endif /* * Be sure the requested cluster is in the filesystem. */ if (cn < CLUST_FIRST || cn > pmp->pm_maxcluster) return (EINVAL); byteoffset = FATOFS(pmp, cn); fatblock(pmp, byteoffset, &bn, &bsize, &bo); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } if (function & FAT_GET) { if (FAT32(pmp)) readcn = getulong(&bp->b_data[bo]); else readcn = getushort(&bp->b_data[bo]); if (FAT12(pmp) & (cn & 1)) readcn >>= 4; readcn &= pmp->pm_fatmask; /* map reserved FAT entries to same values for all FATs */ if ((readcn | ~pmp->pm_fatmask) >= CLUST_RSRVD) readcn |= ~pmp->pm_fatmask; *oldcontents = readcn; } if (function & FAT_SET) { switch (pmp->pm_fatmask) { case FAT12_MASK: readcn = getushort(&bp->b_data[bo]); if (cn & 1) { readcn &= 0x000f; readcn |= newcontents << 4; } else { readcn &= 0xf000; readcn |= newcontents & 0xfff; } putushort(&bp->b_data[bo], readcn); break; case FAT16_MASK: putushort(&bp->b_data[bo], newcontents); break; case FAT32_MASK: /* * According to spec we have to retain the * high order bits of the FAT entry. */ readcn = getulong(&bp->b_data[bo]); readcn &= ~FAT32_MASK; readcn |= newcontents & FAT32_MASK; putulong(&bp->b_data[bo], readcn); break; } updatefats(pmp, bp, bn); bp = NULL; pmp->pm_fmod = 1; } if (bp) brelse(bp); return (0); } /* * Update a contiguous cluster chain * * pmp - mount point * start - first cluster of chain * count - number of clusters in chain * fillwith - what to write into FAT entry of last cluster */ static int fatchain(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith) { int error; u_long bn, bo, bsize, byteoffset, readcn, newc; struct buf *bp; #ifdef MSDOSFS_DEBUG printf("fatchain(pmp %p, start %lu, count %lu, fillwith %lx)\n", pmp, start, count, fillwith); #endif /* * Be sure the clusters are in the filesystem. */ if (start < CLUST_FIRST || start + count - 1 > pmp->pm_maxcluster) return (EINVAL); while (count > 0) { byteoffset = FATOFS(pmp, start); fatblock(pmp, byteoffset, &bn, &bsize, &bo); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } while (count > 0) { start++; newc = --count > 0 ? start : fillwith; switch (pmp->pm_fatmask) { case FAT12_MASK: readcn = getushort(&bp->b_data[bo]); if (start & 1) { readcn &= 0xf000; readcn |= newc & 0xfff; } else { readcn &= 0x000f; readcn |= newc << 4; } putushort(&bp->b_data[bo], readcn); bo++; if (!(start & 1)) bo++; break; case FAT16_MASK: putushort(&bp->b_data[bo], newc); bo += 2; break; case FAT32_MASK: readcn = getulong(&bp->b_data[bo]); readcn &= ~pmp->pm_fatmask; readcn |= newc & pmp->pm_fatmask; putulong(&bp->b_data[bo], readcn); bo += 4; break; } if (bo >= bsize) break; } updatefats(pmp, bp, bn); } pmp->pm_fmod = 1; return (0); } /* * Check the length of a free cluster chain starting at start. * * pmp - mount point * start - start of chain * count - maximum interesting length */ static int chainlength(struct msdosfsmount *pmp, u_long start, u_long count) { u_long idx, max_idx; u_int map; u_long len; MSDOSFS_ASSERT_MP_LOCKED(pmp); if (start > pmp->pm_maxcluster) return (0); max_idx = pmp->pm_maxcluster / N_INUSEBITS; idx = start / N_INUSEBITS; start %= N_INUSEBITS; map = pmp->pm_inusemap[idx]; map &= ~((1 << start) - 1); if (map) { len = ffs(map) - 1 - start; len = MIN(len, count); if (start + len > pmp->pm_maxcluster) len = pmp->pm_maxcluster - start + 1; return (len); } len = N_INUSEBITS - start; if (len >= count) { len = count; if (start + len > pmp->pm_maxcluster) len = pmp->pm_maxcluster - start + 1; return (len); } while (++idx <= max_idx) { if (len >= count) break; map = pmp->pm_inusemap[idx]; if (map) { len += ffs(map) - 1; break; } len += N_INUSEBITS; } len = MIN(len, count); if (start + len > pmp->pm_maxcluster) len = pmp->pm_maxcluster - start + 1; return (len); } /* * Allocate contigous free clusters. * * pmp - mount point. * start - start of cluster chain. * count - number of clusters to allocate. * fillwith - put this value into the FAT entry for the * last allocated cluster. * retcluster - put the first allocated cluster's number here. * got - how many clusters were actually allocated. */ static int chainalloc(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got) { int error; u_long cl, n; MSDOSFS_ASSERT_MP_LOCKED(pmp); KASSERT((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0, ("chainalloc on ro msdosfs mount")); for (cl = start, n = count; n-- > 0;) usemap_alloc(pmp, cl++); pmp->pm_nxtfree = start + count; if (pmp->pm_nxtfree > pmp->pm_maxcluster) pmp->pm_nxtfree = CLUST_FIRST; pmp->pm_flags |= MSDOSFS_FSIMOD; error = fatchain(pmp, start, count, fillwith); if (error != 0) { for (cl = start, n = count; n-- > 0;) usemap_free(pmp, cl++); return (error); } #ifdef MSDOSFS_DEBUG printf("clusteralloc(): allocated cluster chain at %lu (%lu clusters)\n", start, count); #endif if (retcluster) *retcluster = start; if (got) *got = count; return (0); } /* * Allocate contiguous free clusters. * * pmp - mount point. * start - preferred start of cluster chain. * count - number of clusters requested. * fillwith - put this value into the FAT entry for the * last allocated cluster. * retcluster - put the first allocated cluster's number here. * got - how many clusters were actually allocated. */ int clusteralloc(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got) { int error; MSDOSFS_LOCK_MP(pmp); error = clusteralloc1(pmp, start, count, fillwith, retcluster, got); MSDOSFS_UNLOCK_MP(pmp); return (error); } static int clusteralloc1(struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got) { u_long idx; u_long len, newst, foundl, cn, l; u_long foundcn = 0; /* XXX: foundcn could be used unititialized */ u_int map; MSDOSFS_ASSERT_MP_LOCKED(pmp); #ifdef MSDOSFS_DEBUG printf("clusteralloc(): find %lu clusters\n", count); #endif if (start) { if ((len = chainlength(pmp, start, count)) >= count) return (chainalloc(pmp, start, count, fillwith, retcluster, got)); } else len = 0; newst = pmp->pm_nxtfree; foundl = 0; for (cn = newst; cn <= pmp->pm_maxcluster;) { idx = cn / N_INUSEBITS; map = pmp->pm_inusemap[idx]; map |= (1 << (cn % N_INUSEBITS)) - 1; if (map != FULL_RUN) { cn = idx * N_INUSEBITS + ffs(map ^ FULL_RUN) - 1; if ((l = chainlength(pmp, cn, count)) >= count) return (chainalloc(pmp, cn, count, fillwith, retcluster, got)); if (l > foundl) { foundcn = cn; foundl = l; } cn += l + 1; continue; } cn += N_INUSEBITS - cn % N_INUSEBITS; } for (cn = 0; cn < newst;) { idx = cn / N_INUSEBITS; map = pmp->pm_inusemap[idx]; map |= (1 << (cn % N_INUSEBITS)) - 1; if (map != FULL_RUN) { cn = idx * N_INUSEBITS + ffs(map ^ FULL_RUN) - 1; if ((l = chainlength(pmp, cn, count)) >= count) return (chainalloc(pmp, cn, count, fillwith, retcluster, got)); if (l > foundl) { foundcn = cn; foundl = l; } cn += l + 1; continue; } cn += N_INUSEBITS - cn % N_INUSEBITS; } if (!foundl) return (ENOSPC); if (len) return (chainalloc(pmp, start, len, fillwith, retcluster, got)); else return (chainalloc(pmp, foundcn, foundl, fillwith, retcluster, got)); } /* * Free a chain of clusters. * * pmp - address of the msdosfs mount structure for the filesystem * containing the cluster chain to be freed. * startcluster - number of the 1st cluster in the chain of clusters to be * freed. */ int freeclusterchain(struct msdosfsmount *pmp, u_long cluster) { int error; struct buf *bp = NULL; u_long bn, bo, bsize, byteoffset; u_long readcn, lbn = -1; MSDOSFS_LOCK_MP(pmp); while (cluster >= CLUST_FIRST && cluster <= pmp->pm_maxcluster) { byteoffset = FATOFS(pmp, cluster); fatblock(pmp, byteoffset, &bn, &bsize, &bo); if (lbn != bn) { if (bp) updatefats(pmp, bp, lbn); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error) { brelse(bp); MSDOSFS_UNLOCK_MP(pmp); return (error); } lbn = bn; } usemap_free(pmp, cluster); switch (pmp->pm_fatmask) { case FAT12_MASK: readcn = getushort(&bp->b_data[bo]); if (cluster & 1) { cluster = readcn >> 4; readcn &= 0x000f; readcn |= MSDOSFSFREE << 4; } else { cluster = readcn; readcn &= 0xf000; readcn |= MSDOSFSFREE & 0xfff; } putushort(&bp->b_data[bo], readcn); break; case FAT16_MASK: cluster = getushort(&bp->b_data[bo]); putushort(&bp->b_data[bo], MSDOSFSFREE); break; case FAT32_MASK: cluster = getulong(&bp->b_data[bo]); putulong(&bp->b_data[bo], (MSDOSFSFREE & FAT32_MASK) | (cluster & ~FAT32_MASK)); break; } cluster &= pmp->pm_fatmask; if ((cluster | ~pmp->pm_fatmask) >= CLUST_RSRVD) cluster |= pmp->pm_fatmask; } if (bp) updatefats(pmp, bp, bn); MSDOSFS_UNLOCK_MP(pmp); return (0); } /* * Read in FAT blocks looking for free clusters. For every free cluster * found turn off its corresponding bit in the pm_inusemap. */ int fillinusemap(struct msdosfsmount *pmp) { struct buf *bp; u_long bn, bo, bsize, byteoffset, cn, readcn; int error; MSDOSFS_ASSERT_MP_LOCKED(pmp); bp = NULL; /* * Mark all clusters in use, we mark the free ones in the FAT scan * loop further down. */ for (cn = 0; cn < (pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS; cn++) pmp->pm_inusemap[cn] = FULL_RUN; /* * Figure how many free clusters are in the filesystem by ripping * through the FAT counting the number of entries whose content is * zero. These represent free clusters. */ pmp->pm_freeclustercount = 0; for (cn = 0; cn <= pmp->pm_maxcluster; cn++) { byteoffset = FATOFS(pmp, cn); bo = byteoffset % pmp->pm_fatblocksize; if (bo == 0) { /* Read new FAT block */ if (bp != NULL) brelse(bp); fatblock(pmp, byteoffset, &bn, &bsize, NULL); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error != 0) return (error); } if (FAT32(pmp)) readcn = getulong(&bp->b_data[bo]); else readcn = getushort(&bp->b_data[bo]); if (FAT12(pmp) && (cn & 1)) readcn >>= 4; readcn &= pmp->pm_fatmask; /* * Check if the FAT ID matches the BPB's media descriptor and * all other bits are set to 1. */ if (cn == 0 && readcn != ((pmp->pm_fatmask & 0xffffff00) | pmp->pm_bpb.bpbMedia)) { #ifdef MSDOSFS_DEBUG printf("mountmsdosfs(): Media descriptor in BPB" "does not match FAT ID\n"); #endif brelse(bp); return (EINVAL); } else if (readcn == CLUST_FREE) usemap_free(pmp, cn); } if (bp != NULL) brelse(bp); for (cn = pmp->pm_maxcluster + 1; cn < (pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS; cn++) pmp->pm_inusemap[cn / N_INUSEBITS] |= 1 << (cn % N_INUSEBITS); return (0); } /* * Allocate a new cluster and chain it onto the end of the file. * * dep - the file to extend * count - number of clusters to allocate * bpp - where to return the address of the buf header for the first new * file block * ncp - where to put cluster number of the first newly allocated cluster * If this pointer is 0, do not return the cluster number. * flags - see fat.h * * NOTE: This function is not responsible for turning on the DE_UPDATE bit of * the de_flag field of the denode and it does not change the de_FileSize * field. This is left for the caller to do. */ int extendfile(struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags) { int error; u_long frcn; u_long cn, got; struct msdosfsmount *pmp = dep->de_pmp; struct buf *bp; daddr_t blkno; /* * Don't try to extend the root directory */ if (dep->de_StartCluster == MSDOSFSROOT && (dep->de_Attributes & ATTR_DIRECTORY)) { #ifdef MSDOSFS_DEBUG printf("extendfile(): attempt to extend root directory\n"); #endif return (ENOSPC); } /* * If the "file's last cluster" cache entry is empty, and the file * is not empty, then fill the cache entry by calling pcbmap(). */ if (dep->de_fc[FC_LASTFC].fc_frcn == FCE_EMPTY && dep->de_StartCluster != 0) { error = pcbmap(dep, 0xffff, 0, &cn, 0); /* we expect it to return E2BIG */ if (error != E2BIG) return (error); } dep->de_fc[FC_NEXTTOLASTFC].fc_frcn = dep->de_fc[FC_LASTFC].fc_frcn; dep->de_fc[FC_NEXTTOLASTFC].fc_fsrcn = dep->de_fc[FC_LASTFC].fc_fsrcn; while (count > 0) { /* * Allocate a new cluster chain and cat onto the end of the * file. If the file is empty we make de_StartCluster point * to the new block. Note that de_StartCluster being 0 is * sufficient to be sure the file is empty since we exclude * attempts to extend the root directory above, and the root * dir is the only file with a startcluster of 0 that has * blocks allocated (sort of). */ if (dep->de_StartCluster == 0) cn = 0; else cn = dep->de_fc[FC_LASTFC].fc_fsrcn + 1; error = clusteralloc(pmp, cn, count, CLUST_EOFE, &cn, &got); if (error) return (error); count -= got; /* * Give them the filesystem relative cluster number if they want * it. */ if (ncp) { *ncp = cn; ncp = NULL; } if (dep->de_StartCluster == 0) { dep->de_StartCluster = cn; frcn = 0; } else { error = fatentry(FAT_SET, pmp, dep->de_fc[FC_LASTFC].fc_fsrcn, 0, cn); if (error) { clusterfree(pmp, cn, NULL); return (error); } frcn = dep->de_fc[FC_LASTFC].fc_frcn + 1; } /* * Update the "last cluster of the file" entry in the * denode's FAT cache. */ fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1); if (flags & DE_CLEAR) { while (got-- > 0) { /* * Get the buf header for the new block of the file. */ if (dep->de_Attributes & ATTR_DIRECTORY) bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++), pmp->pm_bpcluster, 0, 0, 0); else { bp = getblk(DETOV(dep), frcn++, pmp->pm_bpcluster, 0, 0, 0); /* * Do the bmap now, as in msdosfs_write */ if (pcbmap(dep, bp->b_lblkno, &blkno, 0, 0)) bp->b_blkno = -1; if (bp->b_blkno == -1) panic("extendfile: pcbmap"); else bp->b_blkno = blkno; } vfs_bio_clrbuf(bp); if (bpp) { *bpp = bp; bpp = NULL; } else bdwrite(bp); } } } return (0); } /*- * Routine to mark a FAT16 or FAT32 volume as "clean" or "dirty" by * manipulating the upper bit of the FAT entry for cluster 1. Note that * this bit is not defined for FAT12 volumes, which are always assumed to * be clean. * * The fatentry() routine only works on cluster numbers that a file could * occupy, so it won't manipulate the entry for cluster 1. So we have to do * it here. The code was stolen from fatentry() and tailored for cluster 1. * * Inputs: * pmp The MS-DOS volume to mark * dirty Non-zero if the volume should be marked dirty; zero if it * should be marked clean * * Result: * 0 Success * EROFS Volume is read-only * ? (other errors from called routines) */ int markvoldirty(struct msdosfsmount *pmp, int dirty) { struct buf *bp; u_long bn, bo, bsize, byteoffset, fatval; int error; /* * FAT12 does not support a "clean" bit, so don't do anything for * FAT12. */ if (FAT12(pmp)) return (0); /* Can't change the bit on a read-only filesystem. */ if (pmp->pm_flags & MSDOSFSMNT_RONLY) return (EROFS); /* * Fetch the block containing the FAT entry. It is given by the * pseudo-cluster 1. */ byteoffset = FATOFS(pmp, 1); fatblock(pmp, byteoffset, &bn, &bsize, &bo); error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } /* * Get the current value of the FAT entry and set/clear the relevant * bit. Dirty means clear the "clean" bit; clean means set the * "clean" bit. */ if (FAT32(pmp)) { /* FAT32 uses bit 27. */ fatval = getulong(&bp->b_data[bo]); if (dirty) fatval &= 0xF7FFFFFF; else fatval |= 0x08000000; putulong(&bp->b_data[bo], fatval); } else { /* Must be FAT16; use bit 15. */ fatval = getushort(&bp->b_data[bo]); if (dirty) fatval &= 0x7FFF; else fatval |= 0x8000; putushort(&bp->b_data[bo], fatval); } /* Write out the modified FAT block synchronously. */ return (bwrite(bp)); } Index: head/sys/fs/msdosfs/msdosfs_iconv.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_iconv.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_iconv.c (revision 326268) @@ -1,36 +1,38 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Ryuichiro Imura * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include VFS_DECLARE_ICONV(msdosfs); Index: head/sys/fs/msdosfs/msdosfs_lookup.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_lookup.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_lookup.c (revision 326268) @@ -1,1064 +1,1066 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #include static int msdosfs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, uint64_t *inum); int msdosfs_lookup(struct vop_cachedlookup_args *ap) { return (msdosfs_lookup_(ap->a_dvp, ap->a_vpp, ap->a_cnp, NULL)); } struct deget_dotdot { u_long cluster; int blkoff; }; static int msdosfs_deget_dotdot(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { struct deget_dotdot *dd_arg; struct denode *rdp; struct msdosfsmount *pmp; int error; pmp = VFSTOMSDOSFS(mp); dd_arg = arg; error = deget(pmp, dd_arg->cluster, dd_arg->blkoff, &rdp); if (error == 0) *rvp = DETOV(rdp); return (error); } /* * When we search a directory the blocks containing directory entries are * read and examined. The directory entries contain information that would * normally be in the inode of a unix filesystem. This means that some of * a directory's contents may also be in memory resident denodes (sort of * an inode). This can cause problems if we are searching while some other * process is modifying a directory. To prevent one process from accessing * incompletely modified directory information we depend upon being the * sole owner of a directory block. bread/brelse provide this service. * This being the case, when a process modifies a directory it must first * acquire the disk block that contains the directory entry to be modified. * Then update the disk block and the denode, and then write the disk block * out to disk. This way disk blocks containing directory entries and in * memory denode's will be in synch. */ static int msdosfs_lookup_(struct vnode *vdp, struct vnode **vpp, struct componentname *cnp, uint64_t *dd_inum) { struct mbnambuf nb; daddr_t bn; int error; int slotcount; int slotoffset = 0; int frcn; u_long cluster; int blkoff; int diroff; int blsize; int isadir; /* ~0 if found direntry is a directory */ u_long scn; /* starting cluster number */ struct vnode *pdp; struct denode *dp; struct denode *tdp; struct msdosfsmount *pmp; struct buf *bp = NULL; struct direntry *dep = NULL; struct deget_dotdot dd_arg; u_char dosfilename[12]; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; int unlen; uint64_t inode1; int wincnt = 1; int chksum = -1, chksum_ok; int olddos = 1; #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr); #endif dp = VTODE(vdp); pmp = dp->de_pmp; #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): vdp %p, dp %p, Attr %02x\n", vdp, dp, dp->de_Attributes); #endif restart: if (vpp != NULL) *vpp = NULL; /* * If they are going after the . or .. entry in the root directory, * they won't find it. DOS filesystems don't have them in the root * directory. So, we fake it. deget() is in on this scam too. */ if ((vdp->v_vflag & VV_ROOT) && cnp->cn_nameptr[0] == '.' && (cnp->cn_namelen == 1 || (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) { isadir = ATTR_DIRECTORY; scn = MSDOSFSROOT; #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): looking for . or .. in root directory\n"); #endif cluster = MSDOSFSROOT; blkoff = MSDOSFSROOT_OFS; goto foundroot; } switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename, cnp->cn_namelen, 0, pmp)) { case 0: return (EINVAL); case 1: break; case 2: wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, cnp->cn_namelen, pmp) + 1; break; case 3: olddos = 0; wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, cnp->cn_namelen, pmp) + 1; break; } if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) { wincnt = 1; olddos = 1; } unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen); /* * Suppress search for slots unless creating * file and at end of pathname, in which case * we watch for a place to put the new file in * case it doesn't already exist. */ slotcount = wincnt; if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN)) slotcount = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): dos version of filename %s, length %ld\n", dosfilename, cnp->cn_namelen); #endif /* * Search the directory pointed at by vdp for the name pointed at * by cnp->cn_nameptr. */ tdp = NULL; mbnambuf_init(&nb); /* * The outer loop ranges over the clusters that make up the * directory. Note that the root directory is different from all * other directories. It has a fixed number of blocks that are not * part of the pool of allocatable clusters. So, we treat it a * little differently. The root directory starts at "cluster" 0. */ diroff = 0; for (frcn = 0;; frcn++) { error = pcbmap(dp, frcn, &bn, &cluster, &blsize); if (error) { if (error == E2BIG) break; return (error); } error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } for (blkoff = 0; blkoff < blsize; blkoff += sizeof(struct direntry), diroff += sizeof(struct direntry)) { dep = (struct direntry *)(bp->b_data + blkoff); /* * If the slot is empty and we are still looking * for an empty then remember this one. If the * slot is not empty then check to see if it * matches what we are looking for. If the slot * has never been filled with anything, then the * remainder of the directory has never been used, * so there is no point in searching it. */ if (dep->deName[0] == SLOT_EMPTY || dep->deName[0] == SLOT_DELETED) { /* * Drop memory of previous long matches */ chksum = -1; mbnambuf_init(&nb); if (slotcount < wincnt) { slotcount++; slotoffset = diroff; } if (dep->deName[0] == SLOT_EMPTY) { brelse(bp); goto notfound; } } else { /* * If there wasn't enough space for our winentries, * forget about the empty space */ if (slotcount < wincnt) slotcount = 0; /* * Check for Win95 long filename entry */ if (dep->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = win2unixfn(&nb, (struct winentry *)dep, chksum, pmp); continue; } chksum = winChkName(&nb, (const u_char *)cnp->cn_nameptr, unlen, chksum, pmp); if (chksum == -2) { chksum = -1; continue; } /* * Ignore volume labels (anywhere, not just * the root directory). */ if (dep->deAttributes & ATTR_VOLUME) { chksum = -1; continue; } /* * Check for a checksum or name match */ chksum_ok = (chksum == winChksum(dep->deName)); if (!chksum_ok && (!olddos || bcmp(dosfilename, dep->deName, 11))) { chksum = -1; continue; } #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): match blkoff %d, diroff %d\n", blkoff, diroff); #endif /* * Remember where this directory * entry came from for whoever did * this lookup. */ dp->de_fndoffset = diroff; if (chksum_ok && nameiop == RENAME) { /* * Target had correct long name * directory entries, reuse them * as needed. */ dp->de_fndcnt = wincnt - 1; } else { /* * Long name directory entries * not present or corrupt, can only * reuse dos directory entry. */ dp->de_fndcnt = 0; } goto found; } } /* for (blkoff = 0; .... */ /* * Release the buffer holding the directory cluster just * searched. */ brelse(bp); } /* for (frcn = 0; ; frcn++) */ notfound: /* * We hold no disk buffers at this point. */ /* * Fixup the slot description to point to the place where * we might put the new DOS direntry (putting the Win95 * long name entries before that) */ if (!slotcount) { slotcount = 1; slotoffset = diroff; } if (wincnt > slotcount) slotoffset += sizeof(struct direntry) * (wincnt - slotcount); /* * If we get here we didn't find the entry we were looking for. But * that's ok if we are creating or renaming and are at the end of * the pathname and the directory hasn't been removed. */ #ifdef MSDOSFS_DEBUG printf("msdosfs_lookup(): op %d, refcnt %ld\n", nameiop, dp->de_refcnt); printf(" slotcount %d, slotoffset %d\n", slotcount, slotoffset); #endif if ((nameiop == CREATE || nameiop == RENAME) && (flags & ISLASTCN) && dp->de_refcnt != 0) { /* * Access for write is interpreted as allowing * creation of files in the directory. */ error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error) return (error); /* * Return an indication of where the new directory * entry should be put. */ dp->de_fndoffset = slotoffset; dp->de_fndcnt = wincnt - 1; /* * We return with the directory locked, so that * the parameters we set up above will still be * valid if we actually decide to do a direnter(). * We return ni_vp == NULL to indicate that the entry * does not currently exist; we leave a pointer to * the (locked) directory inode in ndp->ni_dvp. * The pathname buffer is saved so that the name * can be obtained later. * * NB - if the directory is unlocked, then this * information cannot be used. */ cnp->cn_flags |= SAVENAME; return (EJUSTRETURN); } #if 0 /* * Insert name into cache (as non-existent) if appropriate. * * XXX Negative caching is broken for msdosfs because the name * cache doesn't understand peculiarities such as case insensitivity * and 8.3 filenames. Hence, it may not invalidate all negative * entries if a file with this name is later created. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(vdp, *vpp, cnp); #endif return (ENOENT); found: /* * NOTE: We still have the buffer with matched directory entry at * this point. */ isadir = dep->deAttributes & ATTR_DIRECTORY; scn = getushort(dep->deStartCluster); if (FAT32(pmp)) { scn |= getushort(dep->deHighClust) << 16; if (scn == pmp->pm_rootdirblk) { /* * There should actually be 0 here. * Just ignore the error. */ scn = MSDOSFSROOT; } } if (isadir) { cluster = scn; if (cluster == MSDOSFSROOT) blkoff = MSDOSFSROOT_OFS; else blkoff = 0; } else if (cluster == MSDOSFSROOT) blkoff = diroff; /* * Now release buf to allow deget to read the entry again. * Reserving it here and giving it to deget could result * in a deadlock. */ brelse(bp); bp = NULL; foundroot: /* * If we entered at foundroot, then we are looking for the . or .. * entry of the filesystems root directory. isadir and scn were * setup before jumping here. And, bp is already null. */ if (FAT32(pmp) && scn == MSDOSFSROOT) scn = pmp->pm_rootdirblk; if (dd_inum != NULL) { *dd_inum = (uint64_t)pmp->pm_bpcluster * scn + blkoff; return (0); } /* * If deleting, and at end of pathname, return * parameters which can be used to remove file. */ if (nameiop == DELETE && (flags & ISLASTCN)) { /* * Don't allow deleting the root. */ if (blkoff == MSDOSFSROOT_OFS) return (EBUSY); /* * Write access to directory required to delete files. */ error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error) return (error); /* * Return pointer to current entry in dp->i_offset. * Save directory inode pointer in ndp->ni_dvp for dirremove(). */ if (dp->de_StartCluster == scn && isadir) { /* "." */ VREF(vdp); *vpp = vdp; return (0); } error = deget(pmp, cluster, blkoff, &tdp); if (error) return (error); *vpp = DETOV(tdp); return (0); } /* * If rewriting (RENAME), return the inode and the * information required to rewrite the present directory * Must get inode of directory entry to verify it's a * regular file, or empty directory. */ if (nameiop == RENAME && (flags & ISLASTCN)) { if (blkoff == MSDOSFSROOT_OFS) return (EBUSY); error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error) return (error); /* * Careful about locking second inode. * This can only occur if the target is ".". */ if (dp->de_StartCluster == scn && isadir) return (EISDIR); if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) return (error); *vpp = DETOV(tdp); cnp->cn_flags |= SAVENAME; return (0); } /* * Step through the translation in the name. We do not `vput' the * directory because we may need it again if a symbolic link * is relative to the current directory. Instead we save it * unlocked as "pdp". We must get the target inode before unlocking * the directory to insure that the inode will not be removed * before we get it. We prevent deadlock by always fetching * inodes from the root, moving down the directory tree. Thus * when following backward pointers ".." we must unlock the * parent directory before getting the requested directory. */ pdp = vdp; if (flags & ISDOTDOT) { dd_arg.cluster = cluster; dd_arg.blkoff = blkoff; error = vn_vget_ino_gen(vdp, msdosfs_deget_dotdot, &dd_arg, cnp->cn_lkflags, vpp); if (error != 0) { *vpp = NULL; return (error); } /* * Recheck that ".." still points to the inode we * looked up before pdp lock was dropped. */ error = msdosfs_lookup_(pdp, NULL, cnp, &inode1); if (error) { vput(*vpp); *vpp = NULL; return (error); } if (VTODE(*vpp)->de_inode != inode1) { vput(*vpp); goto restart; } } else if (dp->de_StartCluster == scn && isadir) { VREF(vdp); /* we want ourself, ie "." */ *vpp = vdp; } else { if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) return (error); *vpp = DETOV(tdp); } /* * Insert name into cache if appropriate. */ if (cnp->cn_flags & MAKEENTRY) cache_enter(vdp, *vpp, cnp); return (0); } /* * dep - directory entry to copy into the directory * ddep - directory to add to * depp - return the address of the denode for the created directory entry * if depp != 0 * cnp - componentname needed for Win95 long filenames */ int createde(struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp) { int error; u_long dirclust, diroffset; struct direntry *ndep; struct msdosfsmount *pmp = ddep->de_pmp; struct buf *bp; daddr_t bn; int blsize; #ifdef MSDOSFS_DEBUG printf("createde(dep %p, ddep %p, depp %p, cnp %p)\n", dep, ddep, depp, cnp); #endif /* * If no space left in the directory then allocate another cluster * and chain it onto the end of the file. There is one exception * to this. That is, if the root directory has no more space it * can NOT be expanded. extendfile() checks for and fails attempts * to extend the root directory. We just return an error in that * case. */ if (ddep->de_fndoffset >= ddep->de_FileSize) { diroffset = ddep->de_fndoffset + sizeof(struct direntry) - ddep->de_FileSize; dirclust = de_clcount(pmp, diroffset); error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR); if (error) { (void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED); return error; } /* * Update the size of the directory */ ddep->de_FileSize += de_cn2off(pmp, dirclust); } /* * We just read in the cluster with space. Copy the new directory * entry in. Then write it to disk. NOTE: DOS directories * do not get smaller as clusters are emptied. */ error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset), &bn, &dirclust, &blsize); if (error) return error; diroffset = ddep->de_fndoffset; if (dirclust != MSDOSFSROOT) diroffset &= pmp->pm_crbomask; if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) { brelse(bp); return error; } ndep = bptoep(pmp, bp, ddep->de_fndoffset); DE_EXTERNALIZE(ndep, dep); /* * Now write the Win95 long name */ if (ddep->de_fndcnt > 0) { uint8_t chksum = winChksum(ndep->deName); const u_char *un = (const u_char *)cnp->cn_nameptr; int unlen = cnp->cn_namelen; int cnt = 1; while (--ddep->de_fndcnt >= 0) { if (!(ddep->de_fndoffset & pmp->pm_crbomask)) { if (DOINGASYNC(DETOV(ddep))) bdwrite(bp); else if ((error = bwrite(bp)) != 0) return error; ddep->de_fndoffset -= sizeof(struct direntry); error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset), &bn, 0, &blsize); if (error) return error; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return error; } ndep = bptoep(pmp, bp, ddep->de_fndoffset); } else { ndep--; ddep->de_fndoffset -= sizeof(struct direntry); } if (!unix2winfn(un, unlen, (struct winentry *)ndep, cnt++, chksum, pmp)) break; } } if (DOINGASYNC(DETOV(ddep))) bdwrite(bp); else if ((error = bwrite(bp)) != 0) return error; /* * If they want us to return with the denode gotten. */ if (depp) { if (dep->de_Attributes & ATTR_DIRECTORY) { dirclust = dep->de_StartCluster; if (FAT32(pmp) && dirclust == pmp->pm_rootdirblk) dirclust = MSDOSFSROOT; if (dirclust == MSDOSFSROOT) diroffset = MSDOSFSROOT_OFS; else diroffset = 0; } return deget(pmp, dirclust, diroffset, depp); } return 0; } /* * Be sure a directory is empty except for "." and "..". Return 1 if empty, * return 0 if not empty or error. */ int dosdirempty(struct denode *dep) { int blsize; int error; u_long cn; daddr_t bn; struct buf *bp; struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; /* * Since the filesize field in directory entries for a directory is * zero, we just have to feel our way through the directory until * we hit end of file. */ for (cn = 0;; cn++) { if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) { if (error == E2BIG) return (1); /* it's empty */ return (0); } error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return (0); } for (dentp = (struct direntry *)bp->b_data; (char *)dentp < bp->b_data + blsize; dentp++) { if (dentp->deName[0] != SLOT_DELETED && (dentp->deAttributes & ATTR_VOLUME) == 0) { /* * In dos directories an entry whose name * starts with SLOT_EMPTY (0) starts the * beginning of the unused part of the * directory, so we can just return that it * is empty. */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); return (1); } /* * Any names other than "." and ".." in a * directory mean it is not empty. */ if (bcmp(dentp->deName, ". ", 11) && bcmp(dentp->deName, ".. ", 11)) { brelse(bp); #ifdef MSDOSFS_DEBUG printf("dosdirempty(): entry found %02x, %02x\n", dentp->deName[0], dentp->deName[1]); #endif return (0); /* not empty */ } } } brelse(bp); } /* NOTREACHED */ } /* * Check to see if the directory described by target is in some * subdirectory of source. This prevents something like the following from * succeeding and leaving a bunch or files and directories orphaned. mv * /a/b/c /a/b/c/d/e/f Where c and f are directories. * * source - the inode for /a/b/c * target - the inode for /a/b/c/d/e/f * * Returns 0 if target is NOT a subdirectory of source. * Otherwise returns a non-zero error number. * The target inode is always unlocked on return. */ int doscheckpath(struct denode *source, struct denode *target) { daddr_t scn; struct msdosfsmount *pmp; struct direntry *ep; struct denode *dep; struct buf *bp = NULL; int error = 0; dep = target; if ((target->de_Attributes & ATTR_DIRECTORY) == 0 || (source->de_Attributes & ATTR_DIRECTORY) == 0) { error = ENOTDIR; goto out; } if (dep->de_StartCluster == source->de_StartCluster) { error = EEXIST; goto out; } if (dep->de_StartCluster == MSDOSFSROOT) goto out; pmp = dep->de_pmp; #ifdef DIAGNOSTIC if (pmp != source->de_pmp) panic("doscheckpath: source and target on different filesystems"); #endif if (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk) goto out; for (;;) { if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) { error = ENOTDIR; break; } scn = dep->de_StartCluster; error = bread(pmp->pm_devvp, cntobn(pmp, scn), pmp->pm_bpcluster, NOCRED, &bp); if (error) break; ep = (struct direntry *) bp->b_data + 1; if ((ep->deAttributes & ATTR_DIRECTORY) == 0 || bcmp(ep->deName, ".. ", 11) != 0) { error = ENOTDIR; break; } scn = getushort(ep->deStartCluster); if (FAT32(pmp)) scn |= getushort(ep->deHighClust) << 16; if (scn == source->de_StartCluster) { error = EINVAL; break; } if (scn == MSDOSFSROOT) break; if (FAT32(pmp) && scn == pmp->pm_rootdirblk) { /* * scn should be 0 in this case, * but we silently ignore the error. */ break; } vput(DETOV(dep)); brelse(bp); bp = NULL; /* NOTE: deget() clears dep on error */ if ((error = deget(pmp, scn, 0, &dep)) != 0) break; } out:; if (bp) brelse(bp); #ifdef MSDOSFS_DEBUG if (error == ENOTDIR) printf("doscheckpath(): .. not a directory?\n"); #endif if (dep != NULL) vput(DETOV(dep)); return (error); } /* * Read in the disk block containing the directory entry (dirclu, dirofs) * and return the address of the buf header, and the address of the * directory entry within the block. */ int readep(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset, struct buf **bpp, struct direntry **epp) { int error; daddr_t bn; int blsize; blsize = pmp->pm_bpcluster; if (dirclust == MSDOSFSROOT && de_blk(pmp, diroffset + blsize) > pmp->pm_rootdirsize) blsize = de_bn2off(pmp, pmp->pm_rootdirsize) & pmp->pm_crbomask; bn = detobn(pmp, dirclust, diroffset); if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) { brelse(*bpp); *bpp = NULL; return (error); } if (epp) *epp = bptoep(pmp, *bpp, diroffset); return (0); } /* * Read in the disk block containing the directory entry dep came from and * return the address of the buf header, and the address of the directory * entry within the block. */ int readde(struct denode *dep, struct buf **bpp, struct direntry **epp) { return (readep(dep->de_pmp, dep->de_dirclust, dep->de_diroffset, bpp, epp)); } /* * Remove a directory entry. At this point the file represented by the * directory entry to be removed is still full length until no one has it * open. When the file no longer being used msdosfs_inactive() is called * and will truncate the file to 0 length. When the vnode containing the * denode is needed for some other purpose by VFS it will call * msdosfs_reclaim() which will remove the denode from the denode cache. * * pdep directory where the entry is removed * dep file to be removed */ int removede(struct denode *pdep, struct denode *dep) { int error; struct direntry *ep; struct buf *bp; daddr_t bn; int blsize; struct msdosfsmount *pmp = pdep->de_pmp; u_long offset = pdep->de_fndoffset; #ifdef MSDOSFS_DEBUG printf("removede(): filename %s, dep %p, offset %08lx\n", dep->de_Name, dep, offset); #endif dep->de_refcnt--; offset += sizeof(struct direntry); do { offset -= sizeof(struct direntry); error = pcbmap(pdep, de_cluster(pmp, offset), &bn, 0, &blsize); if (error) return error; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return error; } ep = bptoep(pmp, bp, offset); /* * Check whether, if we came here the second time, i.e. * when underflowing into the previous block, the last * entry in this block is a longfilename entry, too. */ if (ep->deAttributes != ATTR_WIN95 && offset != pdep->de_fndoffset) { brelse(bp); break; } offset += sizeof(struct direntry); while (1) { /* * We are a bit aggressive here in that we delete any Win95 * entries preceding this entry, not just the ones we "own". * Since these presumably aren't valid anyway, * there should be no harm. */ offset -= sizeof(struct direntry); ep--->deName[0] = SLOT_DELETED; if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) || !(offset & pmp->pm_crbomask) || ep->deAttributes != ATTR_WIN95) break; } if (DOINGASYNC(DETOV(pdep))) bdwrite(bp); else if ((error = bwrite(bp)) != 0) return error; } while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95) && !(offset & pmp->pm_crbomask) && offset); return 0; } /* * Create a unique DOS name in dvp */ int uniqdosname(struct denode *dep, struct componentname *cnp, u_char *cp) { struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; int gen; int blsize; u_long cn; daddr_t bn; struct buf *bp; int error; if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp, cnp->cn_namelen, 0, pmp) ? 0 : EINVAL); for (gen = 1;; gen++) { /* * Generate DOS name with generation number */ if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp, cnp->cn_namelen, gen, pmp)) return gen == 1 ? EINVAL : EEXIST; /* * Now look for a dir entry with this exact name */ for (cn = error = 0; !error; cn++) { if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) { if (error == E2BIG) /* EOF reached and not found */ return 0; return error; } error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return error; } for (dentp = (struct direntry *)bp->b_data; (char *)dentp < bp->b_data + blsize; dentp++) { if (dentp->deName[0] == SLOT_EMPTY) { /* * Last used entry and not found */ brelse(bp); return 0; } /* * Ignore volume labels and Win95 entries */ if (dentp->deAttributes & ATTR_VOLUME) continue; if (!bcmp(dentp->deName, cp, 11)) { error = EEXIST; break; } } brelse(bp); } } } Index: head/sys/fs/msdosfs/msdosfs_vfsops.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_vfsops.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_vfsops.c (revision 326268) @@ -1,974 +1,976 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef MSDOSFS_DEBUG #include #endif static const char msdosfs_lock_msg[] = "fatlk"; /* Mount options that we support. */ static const char *msdosfs_opts[] = { "async", "noatime", "noclusterr", "noclusterw", "export", "force", "from", "sync", "cs_dos", "cs_local", "cs_win", "dirmask", "gid", "kiconv", "longname", "longnames", "mask", "shortname", "shortnames", "uid", "win95", "nowin95", NULL }; #if 1 /*def PC98*/ /* * XXX - The boot signature formatted by NEC PC-98 DOS looks like a * garbage or a random value :-{ * If you want to use that broken-signatured media, define the * following symbol even though PC/AT. * (ex. mount PC-98 DOS formatted FD on PC/AT) */ #define MSDOSFS_NOCHECKSIG #endif MALLOC_DEFINE(M_MSDOSFSMNT, "msdosfs_mount", "MSDOSFS mount structure"); static MALLOC_DEFINE(M_MSDOSFSFAT, "msdosfs_fat", "MSDOSFS file allocation table"); struct iconv_functions *msdosfs_iconv; static int update_mp(struct mount *mp, struct thread *td); static int mountmsdosfs(struct vnode *devvp, struct mount *mp); static vfs_fhtovp_t msdosfs_fhtovp; static vfs_mount_t msdosfs_mount; static vfs_root_t msdosfs_root; static vfs_statfs_t msdosfs_statfs; static vfs_sync_t msdosfs_sync; static vfs_unmount_t msdosfs_unmount; /* Maximum length of a character set name (arbitrary). */ #define MAXCSLEN 64 static int update_mp(struct mount *mp, struct thread *td) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); void *dos, *win, *local; int error, v; if (!vfs_getopt(mp->mnt_optnew, "kiconv", NULL, NULL)) { if (msdosfs_iconv != NULL) { error = vfs_getopt(mp->mnt_optnew, "cs_win", &win, NULL); if (!error) error = vfs_getopt(mp->mnt_optnew, "cs_local", &local, NULL); if (!error) error = vfs_getopt(mp->mnt_optnew, "cs_dos", &dos, NULL); if (!error) { msdosfs_iconv->open(win, local, &pmp->pm_u2w); msdosfs_iconv->open(local, win, &pmp->pm_w2u); msdosfs_iconv->open(dos, local, &pmp->pm_u2d); msdosfs_iconv->open(local, dos, &pmp->pm_d2u); } if (error != 0) return (error); } else { pmp->pm_w2u = NULL; pmp->pm_u2w = NULL; pmp->pm_d2u = NULL; pmp->pm_u2d = NULL; } } if (vfs_scanopt(mp->mnt_optnew, "gid", "%d", &v) == 1) pmp->pm_gid = v; if (vfs_scanopt(mp->mnt_optnew, "uid", "%d", &v) == 1) pmp->pm_uid = v; if (vfs_scanopt(mp->mnt_optnew, "mask", "%d", &v) == 1) pmp->pm_mask = v & ALLPERMS; if (vfs_scanopt(mp->mnt_optnew, "dirmask", "%d", &v) == 1) pmp->pm_dirmask = v & ALLPERMS; vfs_flagopt(mp->mnt_optnew, "shortname", &pmp->pm_flags, MSDOSFSMNT_SHORTNAME); vfs_flagopt(mp->mnt_optnew, "shortnames", &pmp->pm_flags, MSDOSFSMNT_SHORTNAME); vfs_flagopt(mp->mnt_optnew, "longname", &pmp->pm_flags, MSDOSFSMNT_LONGNAME); vfs_flagopt(mp->mnt_optnew, "longnames", &pmp->pm_flags, MSDOSFSMNT_LONGNAME); vfs_flagopt(mp->mnt_optnew, "kiconv", &pmp->pm_flags, MSDOSFSMNT_KICONV); if (vfs_getopt(mp->mnt_optnew, "nowin95", NULL, NULL) == 0) pmp->pm_flags |= MSDOSFSMNT_NOWIN95; else pmp->pm_flags &= ~MSDOSFSMNT_NOWIN95; if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; else pmp->pm_flags |= MSDOSFSMNT_LONGNAME; return 0; } static int msdosfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { struct msdosfs_args args; struct export_args exp; int error; if (data == NULL) return (EINVAL); error = copyin(data, &args, sizeof args); if (error) return (error); vfs_oexport_conv(&args.export, &exp); ma = mount_argsu(ma, "from", args.fspec, MAXPATHLEN); ma = mount_arg(ma, "export", &exp, sizeof(exp)); ma = mount_argf(ma, "uid", "%d", args.uid); ma = mount_argf(ma, "gid", "%d", args.gid); ma = mount_argf(ma, "mask", "%d", args.mask); ma = mount_argf(ma, "dirmask", "%d", args.dirmask); ma = mount_argb(ma, args.flags & MSDOSFSMNT_SHORTNAME, "noshortname"); ma = mount_argb(ma, args.flags & MSDOSFSMNT_LONGNAME, "nolongname"); ma = mount_argb(ma, !(args.flags & MSDOSFSMNT_NOWIN95), "nowin95"); ma = mount_argb(ma, args.flags & MSDOSFSMNT_KICONV, "nokiconv"); ma = mount_argsu(ma, "cs_win", args.cs_win, MAXCSLEN); ma = mount_argsu(ma, "cs_dos", args.cs_dos, MAXCSLEN); ma = mount_argsu(ma, "cs_local", args.cs_local, MAXCSLEN); error = kernel_mount(ma, flags); return (error); } /* * mp - path - addr in user space of mount point (ie /usr or whatever) * data - addr in user space of mount params including the name of the block * special file to treat as a filesystem. */ static int msdosfs_mount(struct mount *mp) { struct vnode *devvp; /* vnode for blk device to mount */ struct thread *td; /* msdosfs specific mount control block */ struct msdosfsmount *pmp = NULL; struct nameidata ndp; int error, flags; accmode_t accmode; char *from; td = curthread; if (vfs_filteropt(mp->mnt_optnew, msdosfs_opts)) return (EINVAL); /* * If updating, check whether changing from read-only to * read/write; if there is no device name, that's all we do. */ if (mp->mnt_flag & MNT_UPDATE) { pmp = VFSTOMSDOSFS(mp); if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { error = VFS_SYNC(mp, MNT_WAIT); if (error) return (error); flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; error = vflush(mp, 0, flags, td); if (error) return (error); /* * Now the volume is clean. Mark it so while the * device is still rw. */ error = markvoldirty(pmp, 0); if (error) { (void)markvoldirty(pmp, 1); return (error); } /* Downgrade the device from rw to ro. */ g_topology_lock(); error = g_access(pmp->pm_cp, 0, -1, 0); g_topology_unlock(); if (error) { (void)markvoldirty(pmp, 1); return (error); } /* * Backing out after an error was painful in the * above. Now we are committed to succeeding. */ pmp->pm_fmod = 0; pmp->pm_flags |= MSDOSFSMNT_RONLY; MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); } else if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ devvp = pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } VOP_UNLOCK(devvp, 0); g_topology_lock(); error = g_access(pmp->pm_cp, 0, 1, 0); g_topology_unlock(); if (error) return (error); pmp->pm_fmod = 1; pmp->pm_flags &= ~MSDOSFSMNT_RONLY; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); /* Now that the volume is modifiable, mark it dirty. */ error = markvoldirty(pmp, 1); if (error) return (error); } } /* * Not an update, or updating the name: look up the name * and verify that it refers to a sensible disk device. */ if (vfs_getopt(mp->mnt_optnew, "from", (void **)&from, NULL)) return (EINVAL); NDINIT(&ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, from, td); error = namei(&ndp); if (error) return (error); devvp = ndp.ni_vp; NDFREE(&ndp, NDF_ONLY_PNBUF); if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* * If mount by non-root, then verify that user has necessary * permissions on the device. */ accmode = VREAD; if ((mp->mnt_flag & MNT_RDONLY) == 0) accmode |= VWRITE; error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if ((mp->mnt_flag & MNT_UPDATE) == 0) { error = mountmsdosfs(devvp, mp); #ifdef MSDOSFS_DEBUG /* only needed for the printf below */ pmp = VFSTOMSDOSFS(mp); #endif } else { vput(devvp); if (devvp != pmp->pm_devvp) return (EINVAL); /* XXX needs translation */ } if (error) { vrele(devvp); return (error); } error = update_mp(mp, td); if (error) { if ((mp->mnt_flag & MNT_UPDATE) == 0) msdosfs_unmount(mp, MNT_FORCE); return error; } vfs_mountedfrom(mp, from); #ifdef MSDOSFS_DEBUG printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); #endif return (0); } static int mountmsdosfs(struct vnode *devvp, struct mount *mp) { struct msdosfsmount *pmp; struct buf *bp; struct cdev *dev; union bootsector *bsp; struct byte_bpb33 *b33; struct byte_bpb50 *b50; struct byte_bpb710 *b710; uint8_t SecPerClust; u_long clusters; int ronly, error; struct g_consumer *cp; struct bufobj *bo; bp = NULL; /* This and pmp both used in error_exit. */ pmp = NULL; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; dev = devvp->v_rdev; if (atomic_cmpset_acq_ptr((uintptr_t *)&dev->si_mountpt, 0, (uintptr_t)mp) == 0) { VOP_UNLOCK(devvp, 0); return (EBUSY); } g_topology_lock(); error = g_vfs_open(devvp, &cp, "msdosfs", ronly ? 0 : 1); g_topology_unlock(); if (error != 0) { atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); VOP_UNLOCK(devvp, 0); return (error); } dev_ref(dev); VOP_UNLOCK(devvp, 0); bo = &devvp->v_bufobj; /* * Read the boot sector of the filesystem, and then check the * boot signature. If not a dos boot sector then error out. * * NOTE: 8192 is a magic size that works for ffs. */ error = bread(devvp, 0, 8192, NOCRED, &bp); if (error) goto error_exit; bp->b_flags |= B_AGE; bsp = (union bootsector *)bp->b_data; b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB; b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB; b710 = (struct byte_bpb710 *)bsp->bs710.bsBPB; #ifndef MSDOSFS_NOCHECKSIG if (bsp->bs50.bsBootSectSig0 != BOOTSIG0 || bsp->bs50.bsBootSectSig1 != BOOTSIG1) { error = EINVAL; goto error_exit; } #endif pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK | M_ZERO); pmp->pm_mountp = mp; pmp->pm_cp = cp; pmp->pm_bo = bo; lockinit(&pmp->pm_fatlock, 0, msdosfs_lock_msg, 0, 0); /* * Initialize ownerships and permissions, since nothing else will * initialize them iff we are mounting root. */ pmp->pm_uid = UID_ROOT; pmp->pm_gid = GID_WHEEL; pmp->pm_mask = pmp->pm_dirmask = S_IXUSR | S_IXGRP | S_IXOTH | S_IRUSR | S_IRGRP | S_IROTH | S_IWUSR; /* * Compute several useful quantities from the bpb in the * bootsector. Copy in the dos 5 variant of the bpb then fix up * the fields that are different between dos 5 and dos 3.3. */ SecPerClust = b50->bpbSecPerClust; pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec); if (pmp->pm_BytesPerSec < DEV_BSIZE) { error = EINVAL; goto error_exit; } pmp->pm_ResSectors = getushort(b50->bpbResSectors); pmp->pm_FATs = b50->bpbFATs; pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts); pmp->pm_Sectors = getushort(b50->bpbSectors); pmp->pm_FATsecs = getushort(b50->bpbFATsecs); pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack); pmp->pm_Heads = getushort(b50->bpbHeads); pmp->pm_Media = b50->bpbMedia; /* calculate the ratio of sector size to DEV_BSIZE */ pmp->pm_BlkPerSec = pmp->pm_BytesPerSec / DEV_BSIZE; /* * We don't check pm_Heads nor pm_SecPerTrack, because * these may not be set for EFI file systems. We don't * use these anyway, so we're unaffected if they are * invalid. */ if (!pmp->pm_BytesPerSec || !SecPerClust) { error = EINVAL; goto error_exit; } if (pmp->pm_Sectors == 0) { pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs); pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors); } else { pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs); pmp->pm_HugeSectors = pmp->pm_Sectors; } if (pmp->pm_RootDirEnts == 0) { if (pmp->pm_FATsecs || getushort(b710->bpbFSVers)) { error = EINVAL; #ifdef MSDOSFS_DEBUG printf("mountmsdosfs(): bad FAT32 filesystem\n"); #endif goto error_exit; } pmp->pm_fatmask = FAT32_MASK; pmp->pm_fatmult = 4; pmp->pm_fatdiv = 1; pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs); if (getushort(b710->bpbExtFlags) & FATMIRROR) pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM; else pmp->pm_flags |= MSDOSFS_FATMIRROR; } else pmp->pm_flags |= MSDOSFS_FATMIRROR; /* * Check a few values (could do some more): * - logical sector size: power of 2, >= block size * - sectors per cluster: power of 2, >= 1 * - number of sectors: >= 1, <= size of partition * - number of FAT sectors: >= 1 */ if ( (SecPerClust == 0) || (SecPerClust & (SecPerClust - 1)) || (pmp->pm_BytesPerSec < DEV_BSIZE) || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1)) || (pmp->pm_HugeSectors == 0) || (pmp->pm_FATsecs == 0) || (SecPerClust * pmp->pm_BlkPerSec > MAXBSIZE / DEV_BSIZE) ) { error = EINVAL; goto error_exit; } pmp->pm_HugeSectors *= pmp->pm_BlkPerSec; pmp->pm_HiddenSects *= pmp->pm_BlkPerSec; /* XXX not used? */ pmp->pm_FATsecs *= pmp->pm_BlkPerSec; SecPerClust *= pmp->pm_BlkPerSec; pmp->pm_fatblk = pmp->pm_ResSectors * pmp->pm_BlkPerSec; if (FAT32(pmp)) { pmp->pm_rootdirblk = getulong(b710->bpbRootClust); pmp->pm_firstcluster = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_fsinfo = getushort(b710->bpbFSInfo) * pmp->pm_BlkPerSec; } else { pmp->pm_rootdirblk = pmp->pm_fatblk + (pmp->pm_FATs * pmp->pm_FATsecs); pmp->pm_rootdirsize = howmany(pmp->pm_RootDirEnts * sizeof(struct direntry), DEV_BSIZE); /* in blocks */ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; } pmp->pm_maxcluster = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / SecPerClust + 1; pmp->pm_fatsize = pmp->pm_FATsecs * DEV_BSIZE; /* XXX not used? */ if (pmp->pm_fatmask == 0) { if (pmp->pm_maxcluster <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) { /* * This will usually be a floppy disk. This size makes * sure that one FAT entry will not be split across * multiple blocks. */ pmp->pm_fatmask = FAT12_MASK; pmp->pm_fatmult = 3; pmp->pm_fatdiv = 2; } else { pmp->pm_fatmask = FAT16_MASK; pmp->pm_fatmult = 2; pmp->pm_fatdiv = 1; } } clusters = (pmp->pm_fatsize / pmp->pm_fatmult) * pmp->pm_fatdiv; if (pmp->pm_maxcluster >= clusters) { #ifdef MSDOSFS_DEBUG printf("Warning: number of clusters (%ld) exceeds FAT " "capacity (%ld)\n", pmp->pm_maxcluster + 1, clusters); #endif pmp->pm_maxcluster = clusters - 1; } if (FAT12(pmp)) pmp->pm_fatblocksize = 3 * 512; else pmp->pm_fatblocksize = PAGE_SIZE; pmp->pm_fatblocksize = roundup(pmp->pm_fatblocksize, pmp->pm_BytesPerSec); pmp->pm_fatblocksec = pmp->pm_fatblocksize / DEV_BSIZE; pmp->pm_bnshift = ffs(DEV_BSIZE) - 1; /* * Compute mask and shift value for isolating cluster relative byte * offsets and cluster numbers from a file offset. */ pmp->pm_bpcluster = SecPerClust * DEV_BSIZE; pmp->pm_crbomask = pmp->pm_bpcluster - 1; pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1; /* * Check for valid cluster size * must be a power of 2 */ if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) { error = EINVAL; goto error_exit; } /* * Release the bootsector buffer. */ brelse(bp); bp = NULL; /* * Check the fsinfo sector if we have one. Silently fix up our * in-core copy of fp->fsinxtfree if it is unknown (0xffffffff) * or too large. Ignore fp->fsinfree for now, since we need to * read the entire FAT anyway to fill the inuse map. */ if (pmp->pm_fsinfo) { struct fsinfo *fp; if ((error = bread(devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec, NOCRED, &bp)) != 0) goto error_exit; fp = (struct fsinfo *)bp->b_data; if (!bcmp(fp->fsisig1, "RRaA", 4) && !bcmp(fp->fsisig2, "rrAa", 4) && !bcmp(fp->fsisig3, "\0\0\125\252", 4)) { pmp->pm_nxtfree = getulong(fp->fsinxtfree); if (pmp->pm_nxtfree > pmp->pm_maxcluster) pmp->pm_nxtfree = CLUST_FIRST; } else pmp->pm_fsinfo = 0; brelse(bp); bp = NULL; } /* * Finish initializing pmp->pm_nxtfree (just in case the first few * sectors aren't properly reserved in the FAT). This completes * the fixup for fp->fsinxtfree, and fixes up the zero-initialized * value if there is no fsinfo. We will use pmp->pm_nxtfree * internally even if there is no fsinfo. */ if (pmp->pm_nxtfree < CLUST_FIRST) pmp->pm_nxtfree = CLUST_FIRST; /* * Allocate memory for the bitmap of allocated clusters, and then * fill it in. */ pmp->pm_inusemap = malloc(howmany(pmp->pm_maxcluster + 1, N_INUSEBITS) * sizeof(*pmp->pm_inusemap), M_MSDOSFSFAT, M_WAITOK); /* * fillinusemap() needs pm_devvp. */ pmp->pm_devvp = devvp; pmp->pm_dev = dev; /* * Have the inuse map filled in. */ MSDOSFS_LOCK_MP(pmp); error = fillinusemap(pmp); MSDOSFS_UNLOCK_MP(pmp); if (error != 0) goto error_exit; /* * If they want FAT updates to be synchronous then let them suffer * the performance degradation in exchange for the on disk copy of * the FAT being correct just about all the time. I suppose this * would be a good thing to turn on if the kernel is still flakey. */ if (mp->mnt_flag & MNT_SYNCHRONOUS) pmp->pm_flags |= MSDOSFSMNT_WAITONFAT; /* * Finish up. */ if (ronly) pmp->pm_flags |= MSDOSFSMNT_RONLY; else { if ((error = markvoldirty(pmp, 1)) != 0) { (void)markvoldirty(pmp, 0); goto error_exit; } pmp->pm_fmod = 1; } mp->mnt_data = pmp; mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_USES_BCACHE | MNTK_NO_IOPF; MNT_IUNLOCK(mp); return (0); error_exit: if (bp) brelse(bp); if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } if (pmp) { lockdestroy(&pmp->pm_fatlock); free(pmp->pm_inusemap, M_MSDOSFSFAT); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; } atomic_store_rel_ptr((uintptr_t *)&dev->si_mountpt, 0); dev_rel(dev); return (error); } /* * Unmount the filesystem described by mp. */ static int msdosfs_unmount(struct mount *mp, int mntflags) { struct msdosfsmount *pmp; int error, flags; error = flags = 0; pmp = VFSTOMSDOSFS(mp); if ((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0) error = msdosfs_sync(mp, MNT_WAIT); if ((mntflags & MNT_FORCE) != 0) flags |= FORCECLOSE; else if (error != 0) return (error); error = vflush(mp, 0, flags, curthread); if (error != 0 && error != ENXIO) return (error); if ((pmp->pm_flags & MSDOSFSMNT_RONLY) == 0) { error = markvoldirty(pmp, 0); if (error && error != ENXIO) { (void)markvoldirty(pmp, 1); return (error); } } if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) { if (pmp->pm_w2u) msdosfs_iconv->close(pmp->pm_w2u); if (pmp->pm_u2w) msdosfs_iconv->close(pmp->pm_u2w); if (pmp->pm_d2u) msdosfs_iconv->close(pmp->pm_d2u); if (pmp->pm_u2d) msdosfs_iconv->close(pmp->pm_u2d); } #ifdef MSDOSFS_DEBUG { struct vnode *vp = pmp->pm_devvp; struct bufobj *bo; bo = &vp->v_bufobj; BO_LOCK(bo); VI_LOCK(vp); vn_printf(vp, "msdosfs_umount(): just before calling VOP_CLOSE()\n"); printf("freef %p, freeb %p, mount %p\n", TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev, vp->v_mount); printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd), TAILQ_FIRST(&vp->v_bufobj.bo_dirty.bv_hd), vp->v_bufobj.bo_numoutput, vp->v_type); VI_UNLOCK(vp); BO_UNLOCK(bo); } #endif g_topology_lock(); g_vfs_close(pmp->pm_cp); g_topology_unlock(); atomic_store_rel_ptr((uintptr_t *)&pmp->pm_dev->si_mountpt, 0); vrele(pmp->pm_devvp); dev_rel(pmp->pm_dev); free(pmp->pm_inusemap, M_MSDOSFSFAT); lockdestroy(&pmp->pm_fatlock); free(pmp, M_MSDOSFSMNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (error); } static int msdosfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct denode *ndep; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp); #endif error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep); if (error) return (error); *vpp = DETOV(ndep); return (0); } static int msdosfs_statfs(struct mount *mp, struct statfs *sbp) { struct msdosfsmount *pmp; pmp = VFSTOMSDOSFS(mp); sbp->f_bsize = pmp->pm_bpcluster; sbp->f_iosize = pmp->pm_bpcluster; sbp->f_blocks = pmp->pm_maxcluster + 1; sbp->f_bfree = pmp->pm_freeclustercount; sbp->f_bavail = pmp->pm_freeclustercount; sbp->f_files = pmp->pm_RootDirEnts; /* XXX */ sbp->f_ffree = 0; /* what to put in here? */ return (0); } /* * If we have an FSInfo block, update it. */ static int msdosfs_fsiflush(struct msdosfsmount *pmp, int waitfor) { struct fsinfo *fp; struct buf *bp; int error; MSDOSFS_LOCK_MP(pmp); if (pmp->pm_fsinfo == 0 || (pmp->pm_flags & MSDOSFS_FSIMOD) == 0) { error = 0; goto unlock; } error = bread(pmp->pm_devvp, pmp->pm_fsinfo, pmp->pm_BytesPerSec, NOCRED, &bp); if (error != 0) { brelse(bp); goto unlock; } fp = (struct fsinfo *)bp->b_data; putulong(fp->fsinfree, pmp->pm_freeclustercount); putulong(fp->fsinxtfree, pmp->pm_nxtfree); pmp->pm_flags &= ~MSDOSFS_FSIMOD; if (waitfor == MNT_WAIT) error = bwrite(bp); else bawrite(bp); unlock: MSDOSFS_UNLOCK_MP(pmp); return (error); } static int msdosfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *nvp; struct thread *td; struct denode *dep; struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); int error, allerror = 0; td = curthread; /* * If we ever switch to not updating all of the FATs all the time, * this would be the place to update them from the first one. */ if (pmp->pm_fmod != 0) { if (pmp->pm_flags & MSDOSFSMNT_RONLY) panic("msdosfs_sync: rofs mod"); else { /* update FATs here */ } } /* * Write back each (modified) denode. */ loop: MNT_VNODE_FOREACH_ALL(vp, mp, nvp) { if (vp->v_type == VNON) { VI_UNLOCK(vp); continue; } dep = VTODE(vp); if ((dep->de_flag & (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 && (vp->v_bufobj.bo_dirty.bv_cnt == 0 || waitfor == MNT_LAZY)) { VI_UNLOCK(vp); continue; } error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); if (error) { if (error == ENOENT) goto loop; continue; } error = VOP_FSYNC(vp, waitfor, td); if (error) allerror = error; VOP_UNLOCK(vp, 0); vrele(vp); } /* * Flush filesystem control info. */ if (waitfor != MNT_LAZY) { vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_FSYNC(pmp->pm_devvp, waitfor, td); if (error) allerror = error; VOP_UNLOCK(pmp->pm_devvp, 0); } error = msdosfs_fsiflush(pmp, waitfor); if (error != 0) allerror = error; return (allerror); } static int msdosfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); struct defid *defhp = (struct defid *) fhp; struct denode *dep; int error; error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep); if (error) { *vpp = NULLVP; return (error); } *vpp = DETOV(dep); vnode_create_vobject(*vpp, dep->de_FileSize, curthread); return (0); } static struct vfsops msdosfs_vfsops = { .vfs_fhtovp = msdosfs_fhtovp, .vfs_mount = msdosfs_mount, .vfs_cmount = msdosfs_cmount, .vfs_root = msdosfs_root, .vfs_statfs = msdosfs_statfs, .vfs_sync = msdosfs_sync, .vfs_unmount = msdosfs_unmount, }; VFS_SET(msdosfs_vfsops, msdosfs, 0); MODULE_VERSION(msdosfs, 1); Index: head/sys/fs/msdosfs/msdosfs_vnops.c =================================================================== --- head/sys/fs/msdosfs/msdosfs_vnops.c (revision 326267) +++ head/sys/fs/msdosfs/msdosfs_vnops.c (revision 326268) @@ -1,1939 +1,1941 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DOS_FILESIZE_MAX 0xffffffff /* * Prototypes for MSDOSFS vnode operations */ static vop_create_t msdosfs_create; static vop_mknod_t msdosfs_mknod; static vop_open_t msdosfs_open; static vop_close_t msdosfs_close; static vop_access_t msdosfs_access; static vop_getattr_t msdosfs_getattr; static vop_setattr_t msdosfs_setattr; static vop_read_t msdosfs_read; static vop_write_t msdosfs_write; static vop_fsync_t msdosfs_fsync; static vop_remove_t msdosfs_remove; static vop_link_t msdosfs_link; static vop_rename_t msdosfs_rename; static vop_mkdir_t msdosfs_mkdir; static vop_rmdir_t msdosfs_rmdir; static vop_symlink_t msdosfs_symlink; static vop_readdir_t msdosfs_readdir; static vop_bmap_t msdosfs_bmap; static vop_getpages_t msdosfs_getpages; static vop_strategy_t msdosfs_strategy; static vop_print_t msdosfs_print; static vop_pathconf_t msdosfs_pathconf; static vop_vptofh_t msdosfs_vptofh; /* * Some general notes: * * In the ufs filesystem the inodes, superblocks, and indirect blocks are * read/written using the vnode for the filesystem. Blocks that represent * the contents of a file are read/written using the vnode for the file * (including directories when they are read/written as files). This * presents problems for the dos filesystem because data that should be in * an inode (if dos had them) resides in the directory itself. Since we * must update directory entries without the benefit of having the vnode * for the directory we must use the vnode for the filesystem. This means * that when a directory is actually read/written (via read, write, or * readdir, or seek) we must use the vnode for the filesystem instead of * the vnode for the directory as would happen in ufs. This is to insure we * retrieve the correct block from the buffer cache since the hash value is * based upon the vnode address and the desired block number. */ /* * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or * only if the SAVESTART bit in cn_flags is clear on success. */ static int msdosfs_create(struct vop_create_args *ap) { struct componentname *cnp = ap->a_cnp; struct denode ndirent; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct timespec ts; int error; #ifdef MSDOSFS_DEBUG printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap); #endif /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad; } /* * Create a directory entry for the file, then call createde() to * have it installed. NOTE: DOS files are always executable. We * use the absence of the owner write bit to make the file * readonly. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_create: no name"); #endif memset(&ndirent, 0, sizeof(ndirent)); error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_ARCHIVE; ndirent.de_LowerCase = 0; ndirent.de_StartCluster = 0; ndirent.de_FileSize = 0; ndirent.de_pmp = pdep->de_pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; *ap->a_vpp = DETOV(dep); if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(ap->a_dvp, *ap->a_vpp, cnp); return (0); bad: return (error); } static int msdosfs_mknod(struct vop_mknod_args *ap) { return (EINVAL); } static int msdosfs_open(struct vop_open_args *ap) { struct denode *dep = VTODE(ap->a_vp); vnode_create_vobject(ap->a_vp, dep->de_FileSize, ap->a_td); return 0; } static int msdosfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct timespec ts; VI_LOCK(vp); if (vp->v_usecount > 1) { getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); } VI_UNLOCK(vp); return 0; } static int msdosfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; mode_t file_mode; accmode_t accmode = ap->a_accmode; file_mode = S_IRWXU|S_IRWXG|S_IRWXO; file_mode &= (vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask); /* * Disallow writing to directories and regular files if the * filesystem is read-only. */ if (accmode & VWRITE) { switch (vp->v_type) { case VREG: case VDIR: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: break; } } return (vaccess(vp->v_type, file_mode, pmp->pm_uid, pmp->pm_gid, ap->a_accmode, ap->a_cred, NULL)); } static int msdosfs_getattr(struct vop_getattr_args *ap) { struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; mode_t mode; struct timespec ts; u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); uint64_t fileid; getnanotime(&ts); DETIMES(dep, &ts, &ts, &ts); vap->va_fsid = dev2udev(pmp->pm_dev); /* * The following computation of the fileid must be the same as that * used in msdosfs_readdir() to compute d_fileno. If not, pwd * doesn't work. */ if (dep->de_Attributes & ATTR_DIRECTORY) { fileid = (uint64_t)cntobn(pmp, dep->de_StartCluster) * dirsperblk; if (dep->de_StartCluster == MSDOSFSROOT) fileid = 1; } else { fileid = (uint64_t)cntobn(pmp, dep->de_dirclust) * dirsperblk; if (dep->de_dirclust == MSDOSFSROOT) fileid = (uint64_t)roottobn(pmp, 0) * dirsperblk; fileid += (uoff_t)dep->de_diroffset / sizeof(struct direntry); } vap->va_fileid = fileid; mode = S_IRWXU|S_IRWXG|S_IRWXO; if (dep->de_Attributes & ATTR_READONLY) mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH); vap->va_mode = mode & (ap->a_vp->v_type == VDIR ? pmp->pm_dirmask : pmp->pm_mask); vap->va_uid = pmp->pm_uid; vap->va_gid = pmp->pm_gid; vap->va_nlink = 1; vap->va_rdev = NODEV; vap->va_size = dep->de_FileSize; fattime2timespec(dep->de_MDate, dep->de_MTime, 0, 0, &vap->va_mtime); vap->va_ctime = vap->va_mtime; if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) { fattime2timespec(dep->de_ADate, 0, 0, 0, &vap->va_atime); fattime2timespec(dep->de_CDate, dep->de_CTime, dep->de_CHun, 0, &vap->va_birthtime); } else { vap->va_atime = vap->va_mtime; vap->va_birthtime.tv_sec = -1; vap->va_birthtime.tv_nsec = 0; } vap->va_flags = 0; if (dep->de_Attributes & ATTR_ARCHIVE) vap->va_flags |= UF_ARCHIVE; if (dep->de_Attributes & ATTR_HIDDEN) vap->va_flags |= UF_HIDDEN; if (dep->de_Attributes & ATTR_READONLY) vap->va_flags |= UF_READONLY; if (dep->de_Attributes & ATTR_SYSTEM) vap->va_flags |= UF_SYSTEM; vap->va_gen = 0; vap->va_blocksize = pmp->pm_bpcluster; vap->va_bytes = (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask; vap->va_type = ap->a_vp->v_type; vap->va_filerev = dep->de_modrev; return (0); } static int msdosfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; int error = 0; #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): vp %p, vap %p, cred %p\n", ap->a_vp, vap, cred); #endif /* * Check for unsettable attributes. */ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { #ifdef MSDOSFS_DEBUG printf("msdosfs_setattr(): returning EINVAL\n"); printf(" va_type %d, va_nlink %llx, va_fsid %llx, va_fileid %llx\n", vap->va_type, (unsigned long long)vap->va_nlink, (unsigned long long)vap->va_fsid, (unsigned long long)vap->va_fileid); printf(" va_blocksize %lx, va_rdev %llx, va_bytes %llx, va_gen %lx\n", vap->va_blocksize, (unsigned long long)vap->va_rdev, (unsigned long long)vap->va_bytes, vap->va_gen); printf(" va_uid %x, va_gid %x\n", vap->va_uid, vap->va_gid); #endif return (EINVAL); } /* * We don't allow setting attributes on the root directory. * The special case for the root directory is because before * FAT32, the root directory didn't have an entry for itself * (and was otherwise special). With FAT32, the root * directory is not so special, but still doesn't have an * entry for itself. */ if (vp->v_vflag & VV_ROOT) return (EINVAL); if (vap->va_flags != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); if (error) return (error); } /* * We are very inconsistent about handling unsupported * attributes. We ignored the access time and the * read and execute bits. We were strict for the other * attributes. */ if (vap->va_flags & ~(UF_ARCHIVE | UF_HIDDEN | UF_READONLY | UF_SYSTEM)) return EOPNOTSUPP; if (vap->va_flags & UF_ARCHIVE) dep->de_Attributes |= ATTR_ARCHIVE; else dep->de_Attributes &= ~ATTR_ARCHIVE; if (vap->va_flags & UF_HIDDEN) dep->de_Attributes |= ATTR_HIDDEN; else dep->de_Attributes &= ~ATTR_HIDDEN; /* We don't allow changing the readonly bit on directories. */ if (vp->v_type != VDIR) { if (vap->va_flags & UF_READONLY) dep->de_Attributes |= ATTR_READONLY; else dep->de_Attributes &= ~ATTR_READONLY; } if (vap->va_flags & UF_SYSTEM) dep->de_Attributes |= ATTR_SYSTEM; else dep->de_Attributes &= ~ATTR_SYSTEM; dep->de_flag |= DE_MODIFIED; } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { uid_t uid; gid_t gid; if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); uid = vap->va_uid; if (uid == (uid_t)VNOVAL) uid = pmp->pm_uid; gid = vap->va_gid; if (gid == (gid_t)VNOVAL) gid = pmp->pm_gid; if (cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid || (gid != pmp->pm_gid && !groupmember(gid, cred))) { error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); if (error) return (error); } if (uid != pmp->pm_uid || gid != pmp->pm_gid) return EINVAL; } if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VREG: /* * Truncation is only supported for regular files, * Disallow it if the filesystem is read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); break; default: /* * According to POSIX, the result is unspecified * for file types other than regular files, * directories and shared memory objects. We * don't support any file types except regular * files and directories in this file system, so * this (default) case is unreachable and can do * anything. Keep falling through to detrunc() * for now. */ break; } error = detrunc(dep, vap->va_size, 0, cred); if (error) return error; } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); error = vn_utimes_perm(vp, vap, cred, td); if (error != 0) return (error); if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 && vap->va_atime.tv_sec != VNOVAL) { dep->de_flag &= ~DE_ACCESS; timespec2fattime(&vap->va_atime, 0, &dep->de_ADate, NULL, NULL); } if (vap->va_mtime.tv_sec != VNOVAL) { dep->de_flag &= ~DE_UPDATE; timespec2fattime(&vap->va_mtime, 0, &dep->de_MDate, &dep->de_MTime, NULL); } /* * We don't set the archive bit when modifying the time of * a directory to emulate the Windows/DOS behavior. */ if (vp->v_type != VDIR) dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } /* * DOS files only have the ability to have their writability * attribute set, so we use the owner write bit to set the readonly * attribute. */ if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if (cred->cr_uid != pmp->pm_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); if (error) return (error); } if (vp->v_type != VDIR) { /* We ignore the read and execute bits. */ if (vap->va_mode & S_IWUSR) dep->de_Attributes &= ~ATTR_READONLY; else dep->de_Attributes |= ATTR_READONLY; dep->de_Attributes |= ATTR_ARCHIVE; dep->de_flag |= DE_MODIFIED; } } return (deupdat(dep, 0)); } static int msdosfs_read(struct vop_read_args *ap) { int error = 0; int blsize; int isadir; ssize_t orig_resid; u_int n; u_long diff; u_long on; daddr_t lbn; daddr_t rablock; int rasize; int seqcount; struct buf *bp; struct vnode *vp = ap->a_vp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct uio *uio = ap->a_uio; /* * If they didn't ask for any data, then we are done. */ orig_resid = uio->uio_resid; if (orig_resid == 0) return (0); /* * The caller is supposed to ensure that * uio->uio_offset >= 0 and uio->uio_resid >= 0. * We don't need to check for large offsets as in ffs because * dep->de_FileSize <= DOS_FILESIZE_MAX < OFF_MAX, so large * offsets cannot cause overflow even in theory. */ seqcount = ap->a_ioflag >> IO_SEQSHIFT; isadir = dep->de_Attributes & ATTR_DIRECTORY; do { if (uio->uio_offset >= dep->de_FileSize) break; lbn = de_cluster(pmp, uio->uio_offset); rablock = lbn + 1; blsize = pmp->pm_bpcluster; on = uio->uio_offset & pmp->pm_crbomask; /* * If we are operating on a directory file then be sure to * do i/o with the vnode for the filesystem instead of the * vnode for the directory. */ if (isadir) { /* convert cluster # to block # */ error = pcbmap(dep, lbn, &lbn, 0, &blsize); if (error == E2BIG) { error = EINVAL; break; } else if (error) break; error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); } else if (de_cn2off(pmp, rablock) >= dep->de_FileSize) { error = bread(vp, lbn, blsize, NOCRED, &bp); } else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { error = cluster_read(vp, dep->de_FileSize, lbn, blsize, NOCRED, on + uio->uio_resid, seqcount, 0, &bp); } else if (seqcount > 1) { rasize = blsize; error = breadn(vp, lbn, blsize, &rablock, &rasize, 1, NOCRED, &bp); } else { error = bread(vp, lbn, blsize, NOCRED, &bp); } if (error) { brelse(bp); break; } diff = pmp->pm_bpcluster - on; n = diff > uio->uio_resid ? uio->uio_resid : diff; diff = dep->de_FileSize - uio->uio_offset; if (diff < n) n = diff; diff = blsize - bp->b_resid; if (diff < n) n = diff; error = vn_io_fault_uiomove(bp->b_data + on, (int) n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); if (!isadir && (error == 0 || uio->uio_resid != orig_resid) && (vp->v_mount->mnt_flag & (MNT_NOATIME | MNT_RDONLY)) == 0) dep->de_flag |= DE_ACCESS; return (error); } /* * Write data to a file or directory. */ static int msdosfs_write(struct vop_write_args *ap) { int n; int croffset; ssize_t resid; u_long osize; int error = 0; u_long count; int seqcount; daddr_t bn, lastcn; struct buf *bp; int ioflag = ap->a_ioflag; struct uio *uio = ap->a_uio; struct vnode *vp = ap->a_vp; struct vnode *thisvp; struct denode *dep = VTODE(vp); struct msdosfsmount *pmp = dep->de_pmp; struct ucred *cred = ap->a_cred; #ifdef MSDOSFS_DEBUG printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n", vp, uio, ioflag, cred); printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n", dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster); #endif switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = dep->de_FileSize; thisvp = vp; break; case VDIR: return EISDIR; default: panic("msdosfs_write(): bad file type"); } /* * This is needed (unlike in ffs_write()) because we extend the * file outside of the loop but we don't want to extend the file * for writes of 0 bytes. */ if (uio->uio_resid == 0) return (0); /* * The caller is supposed to ensure that * uio->uio_offset >= 0 and uio->uio_resid >= 0. */ if ((uoff_t)uio->uio_offset + uio->uio_resid > DOS_FILESIZE_MAX) return (EFBIG); /* * If they've exceeded their filesize limit, tell them about it. */ if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); /* * If the offset we are starting the write at is beyond the end of * the file, then they've done a seek. Unix filesystems allow * files with holes in them, DOS doesn't so we must fill the hole * with zeroed blocks. */ if (uio->uio_offset > dep->de_FileSize) { error = deextend(dep, uio->uio_offset, cred); if (error) return (error); } /* * Remember some values in case the write fails. */ resid = uio->uio_resid; osize = dep->de_FileSize; /* * If we write beyond the end of the file, extend it to its ultimate * size ahead of the time to hopefully get a contiguous area. */ if (uio->uio_offset + resid > osize) { count = de_clcount(pmp, uio->uio_offset + resid) - de_clcount(pmp, osize); error = extendfile(dep, count, NULL, NULL, 0); if (error && (error != ENOSPC || (ioflag & IO_UNIT))) goto errexit; lastcn = dep->de_fc[FC_LASTFC].fc_frcn; } else lastcn = de_clcount(pmp, osize) - 1; seqcount = ioflag >> IO_SEQSHIFT; do { if (de_cluster(pmp, uio->uio_offset) > lastcn) { error = ENOSPC; break; } croffset = uio->uio_offset & pmp->pm_crbomask; n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); if (uio->uio_offset + n > dep->de_FileSize) { dep->de_FileSize = uio->uio_offset + n; /* The object size needs to be set before buffer is allocated */ vnode_pager_setsize(vp, dep->de_FileSize); } bn = de_cluster(pmp, uio->uio_offset); if ((uio->uio_offset & pmp->pm_crbomask) == 0 && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) > de_cluster(pmp, uio->uio_offset) || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { /* * If either the whole cluster gets written, * or we write the cluster from its start beyond EOF, * then no need to read data from disk. */ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0, 0); /* * This call to vfs_bio_clrbuf() ensures that * even if vn_io_fault_uiomove() below faults, * garbage from the newly instantiated buffer * is not exposed to the userspace via mmap(). */ vfs_bio_clrbuf(bp); /* * Do the bmap now, since pcbmap needs buffers * for the FAT table. (see msdosfs_strategy) */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &bn, 0, 0); if (error) bp->b_blkno = -1; else bp->b_blkno = bn; } if (bp->b_blkno == -1) { brelse(bp); if (!error) error = EIO; /* XXX */ break; } } else { /* * The block we need to write into exists, so read it in. */ error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp); if (error) { brelse(bp); break; } } /* * Should these vnode_pager_* functions be done on dir * files? */ /* * Copy the data from user space into the buf header. */ error = vn_io_fault_uiomove(bp->b_data + croffset, n, uio); if (error) { brelse(bp); break; } /* Prepare for clustered writes in some else clauses. */ if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) bp->b_flags |= B_CLUSTEROK; /* * If IO_SYNC, then each buffer is written synchronously. * Otherwise, if we have a severe page deficiency then * write the buffer asynchronously. Otherwise, if on a * cluster boundary then write the buffer asynchronously, * combining it with contiguous clusters if permitted and * possible, since we don't expect more writes into this * buffer soon. Otherwise, do a delayed write because we * expect more writes into this buffer soon. */ if (ioflag & IO_SYNC) (void)bwrite(bp); else if (vm_page_count_severe() || buf_dirty_count_severe()) bawrite(bp); else if (n + croffset == pmp->pm_bpcluster) { if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) cluster_write(vp, bp, dep->de_FileSize, seqcount, 0); else bawrite(bp); } else bdwrite(bp); dep->de_flag |= DE_UPDATE; } while (error == 0 && uio->uio_resid > 0); /* * If the write failed and they want us to, truncate the file back * to the size it was before the write was attempted. */ errexit: if (error) { if (ioflag & IO_UNIT) { detrunc(dep, osize, ioflag & IO_SYNC, NOCRED); uio->uio_offset -= resid - uio->uio_resid; uio->uio_resid = resid; } else { detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED); if (uio->uio_resid != resid) error = 0; } } else if (ioflag & IO_SYNC) error = deupdat(dep, 1); return (error); } /* * Flush the blocks of a file to disk. */ static int msdosfs_fsync(struct vop_fsync_args *ap) { struct vnode *devvp; int allerror, error; vop_stdfsync(ap); /* * If the syncing request comes from fsync(2), sync the entire * FAT and any other metadata that happens to be on devvp. We * need this mainly for the FAT. We write the FAT sloppily, and * syncing it all now is the best we can easily do to get all * directory entries associated with the file (not just the file) * fully synced. The other metadata includes critical metadata * for all directory entries, but only in the MNT_ASYNC case. We * will soon sync all metadata in the file's directory entry. * Non-critical metadata for associated directory entries only * gets synced accidentally, as in most file systems. */ if (ap->a_waitfor == MNT_WAIT) { devvp = VTODE(ap->a_vp)->de_pmp->pm_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); allerror = VOP_FSYNC(devvp, MNT_WAIT, ap->a_td); VOP_UNLOCK(devvp, 0); } else allerror = 0; error = deupdat(VTODE(ap->a_vp), ap->a_waitfor == MNT_WAIT); if (allerror == 0) allerror = error; return (allerror); } static int msdosfs_remove(struct vop_remove_args *ap) { struct denode *dep = VTODE(ap->a_vp); struct denode *ddep = VTODE(ap->a_dvp); int error; if (ap->a_vp->v_type == VDIR) error = EPERM; else error = removede(ddep, dep); #ifdef MSDOSFS_DEBUG printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount); #endif return (error); } /* * DOS filesystems don't know what links are. */ static int msdosfs_link(struct vop_link_args *ap) { return (EOPNOTSUPP); } /* * Renames on files require moving the denode to a new hash queue since the * denode's location is used to compute which hash queue to put the file * in. Unless it is a rename in place. For example "mv a b". * * What follows is the basic algorithm: * * if (file move) { * if (dest file exists) { * remove dest file * } * if (dest and src in same directory) { * rewrite name in existing directory slot * } else { * write new entry in dest directory * update offset and dirclust in denode * move denode to new hash chain * clear old directory entry * } * } else { * directory move * if (dest directory exists) { * if (dest is not empty) { * return ENOTEMPTY * } * remove dest directory * } * if (dest and src in same directory) { * rewrite name in existing entry * } else { * be sure dest is not a child of src directory * write entry in dest directory * update "." and ".." in moved directory * clear old directory entry for moved directory * } * } * * On entry: * source's parent directory is unlocked * source file or directory is unlocked * destination's parent directory is locked * destination file or directory is locked if it exists * * On exit: * all denodes should be released */ static int msdosfs_rename(struct vop_rename_args *ap) { struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tvp = ap->a_tvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; struct denode *ip, *xp, *dp, *zp; u_char toname[12], oldname[11]; u_long from_diroffset, to_diroffset; u_char to_count; int doingdirectory = 0, newparent = 0; int error; u_long cn, pcl; daddr_t bn; struct msdosfsmount *pmp; struct direntry *dotdotp; struct buf *bp; pmp = VFSTOMSDOSFS(fdvp->v_mount); #ifdef DIAGNOSTIC if ((tcnp->cn_flags & HASBUF) == 0 || (fcnp->cn_flags & HASBUF) == 0) panic("msdosfs_rename: no name"); #endif /* * Check for cross-device rename. */ if (fvp->v_mount != tdvp->v_mount || (tvp && fvp->v_mount != tvp->v_mount)) { error = EXDEV; abortit: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } /* * If source and dest are the same, do nothing. */ if (tvp == fvp) { error = 0; goto abortit; } error = vn_lock(fvp, LK_EXCLUSIVE); if (error) goto abortit; dp = VTODE(fdvp); ip = VTODE(fvp); /* * Be sure we are not renaming ".", "..", or an alias of ".". This * leads to a crippled directory tree. It's pretty tough to do a * "ls" or "pwd" with the "." directory entry missing, and "cd .." * doesn't work if the ".." entry is missing. */ if (ip->de_Attributes & ATTR_DIRECTORY) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || dp == ip || (fcnp->cn_flags & ISDOTDOT) || (tcnp->cn_flags & ISDOTDOT) || (ip->de_flag & DE_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } ip->de_flag |= DE_RENAME; doingdirectory++; } /* * When the target exists, both the directory * and target vnodes are returned locked. */ dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; /* * Remember direntry place to use for destination */ to_diroffset = dp->de_fndoffset; to_count = dp->de_fndcnt; /* * If ".." must be changed (ie the directory gets a new * parent) then the source directory must not be in the * directory hierarchy above the target, as this would * orphan everything below the source directory. Also * the user must have write permission in the source so * as to be able to change "..". We must repeat the call * to namei, as the parent directory is unlocked by the * call to doscheckpath(). */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster) newparent = 1; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (xp != NULL) vput(tvp); /* * doscheckpath() vput()'s dp, * so we have to do a relookup afterwards */ error = doscheckpath(ip, dp); if (error) goto out; if ((tcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost to startdir"); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; dp = VTODE(tdvp); xp = tvp ? VTODE(tvp) : NULL; } if (xp != NULL) { /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ if (xp->de_Attributes & ATTR_DIRECTORY) { if (!dosdirempty(xp)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } error = removede(dp, xp); if (error) goto bad; vput(tvp); xp = NULL; } /* * Convert the filename in tcnp into a dos filename. We copy this * into the denode and directory entry for the destination * file/directory. */ error = uniqdosname(VTODE(tdvp), tcnp, toname); if (error) goto abortit; /* * Since from wasn't locked at various places above, * have to do a relookup here. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; if ((fcnp->cn_flags & SAVESTART) == 0) panic("msdosfs_rename: lost from startdir"); if (!newparent) VOP_UNLOCK(tdvp, 0); if (relookup(fdvp, &fvp, fcnp) == 0) vrele(fdvp); if (fvp == NULL) { /* * From name has disappeared. */ if (doingdirectory) panic("rename: lost dir entry"); if (newparent) VOP_UNLOCK(tdvp, 0); vrele(tdvp); vrele(ap->a_fvp); return 0; } xp = VTODE(fvp); zp = VTODE(fdvp); from_diroffset = zp->de_fndoffset; /* * Ensure that the directory entry still exists and has not * changed till now. If the source is a file the entry may * have been unlinked or renamed. In either case there is * no further work to be done. If the source is a directory * then it cannot have been rmdir'ed or renamed; this is * prohibited by the DE_RENAME flag. */ if (xp != ip) { if (doingdirectory) panic("rename: lost dir entry"); VOP_UNLOCK(fvp, 0); if (newparent) VOP_UNLOCK(fdvp, 0); vrele(ap->a_fvp); xp = NULL; } else { vrele(fvp); xp = NULL; /* * First write a new entry in the destination * directory and mark the entry in the source directory * as deleted. Then move the denode to the correct hash * chain for its new location in the filesystem. And, if * we moved a directory, then update its .. entry to point * to the new parent directory. */ memcpy(oldname, ip->de_Name, 11); memcpy(ip->de_Name, toname, 11); /* update denode */ dp->de_fndoffset = to_diroffset; dp->de_fndcnt = to_count; error = createde(ip, dp, (struct denode **)0, tcnp); if (error) { memcpy(ip->de_Name, oldname, 11); if (newparent) VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(fvp, 0); goto bad; } /* * If ip is for a directory, then its name should always * be "." since it is for the directory entry in the * directory itself (msdosfs_lookup() always translates * to the "." entry so as to get a unique denode, except * for the root directory there are different * complications). However, we just corrupted its name * to pass the correct name to createde(). Undo this. */ if ((ip->de_Attributes & ATTR_DIRECTORY) != 0) memcpy(ip->de_Name, oldname, 11); ip->de_refcnt++; zp->de_fndoffset = from_diroffset; error = removede(zp, ip); if (error) { /* XXX should downgrade to ro here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(fvp, 0); goto bad; } if (!doingdirectory) { error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0, &ip->de_dirclust, 0); if (error) { /* XXX should downgrade to ro here, fs is corrupt */ if (newparent) VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(fvp, 0); goto bad; } if (ip->de_dirclust == MSDOSFSROOT) ip->de_diroffset = to_diroffset; else ip->de_diroffset = to_diroffset & pmp->pm_crbomask; } reinsert(ip); if (newparent) VOP_UNLOCK(fdvp, 0); } /* * If we moved a directory to a new parent directory, then we must * fixup the ".." entry in the moved directory. */ if (doingdirectory && newparent) { cn = ip->de_StartCluster; if (cn == MSDOSFSROOT) { /* this should never happen */ panic("msdosfs_rename(): updating .. in root directory?"); } else bn = cntobn(pmp, cn); error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, NOCRED, &bp); if (error) { /* XXX should downgrade to ro here, fs is corrupt */ brelse(bp); VOP_UNLOCK(fvp, 0); goto bad; } dotdotp = (struct direntry *)bp->b_data + 1; pcl = dp->de_StartCluster; if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = MSDOSFSROOT; putushort(dotdotp->deStartCluster, pcl); if (FAT32(pmp)) putushort(dotdotp->deHighClust, pcl >> 16); if (DOINGASYNC(fvp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) { /* XXX should downgrade to ro here, fs is corrupt */ VOP_UNLOCK(fvp, 0); goto bad; } } /* * The msdosfs lookup is case insensitive. Several aliases may * be inserted for a single directory entry. As a consequnce, * name cache purge done by lookup for fvp when DELETE op for * namei is specified, might be not enough to expunge all * namecache entries that were installed for this direntry. */ cache_purge(fvp); VOP_UNLOCK(fvp, 0); bad: if (xp) vput(tvp); vput(tdvp); out: ip->de_flag &= ~DE_RENAME; vrele(fdvp); vrele(fvp); return (error); } static struct { struct direntry dot; struct direntry dotdot; } dosdirtemplate = { { ". ", /* the . entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ }, { ".. ", /* the .. entry */ ATTR_DIRECTORY, /* file attribute */ 0, /* reserved */ 0, { 0, 0 }, { 0, 0 }, /* create time & date */ { 0, 0 }, /* access date */ { 0, 0 }, /* high bits of start cluster */ { 210, 4 }, { 210, 4 }, /* modify time & date */ { 0, 0 }, /* startcluster */ { 0, 0, 0, 0 } /* filesize */ } }; static int msdosfs_mkdir(struct vop_mkdir_args *ap) { struct componentname *cnp = ap->a_cnp; struct denode *dep; struct denode *pdep = VTODE(ap->a_dvp); struct direntry *denp; struct msdosfsmount *pmp = pdep->de_pmp; struct buf *bp; u_long newcluster, pcl; int bn; int error; struct denode ndirent; struct timespec ts; /* * If this is the root directory and there is no space left we * can't do anything. This is because the root directory can not * change size. */ if (pdep->de_StartCluster == MSDOSFSROOT && pdep->de_fndoffset >= pdep->de_FileSize) { error = ENOSPC; goto bad2; } /* * Allocate a cluster to hold the about to be created directory. */ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); if (error) goto bad2; memset(&ndirent, 0, sizeof(ndirent)); ndirent.de_pmp = pmp; ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; getnanotime(&ts); DETIMES(&ndirent, &ts, &ts, &ts); /* * Now fill the cluster with the "." and ".." entries. And write * the cluster to disk. This way it is there for the parent * directory to be pointing at if there were a crash. */ bn = cntobn(pmp, newcluster); /* always succeeds */ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0, 0); memset(bp->b_data, 0, pmp->pm_bpcluster); memcpy(bp->b_data, &dosdirtemplate, sizeof dosdirtemplate); denp = (struct direntry *)bp->b_data; putushort(denp[0].deStartCluster, newcluster); putushort(denp[0].deCDate, ndirent.de_CDate); putushort(denp[0].deCTime, ndirent.de_CTime); denp[0].deCHundredth = ndirent.de_CHun; putushort(denp[0].deADate, ndirent.de_ADate); putushort(denp[0].deMDate, ndirent.de_MDate); putushort(denp[0].deMTime, ndirent.de_MTime); pcl = pdep->de_StartCluster; /* * Although the root directory has a non-magic starting cluster * number for FAT32, chkdsk and fsck_msdosfs still require * references to it in dotdot entries to be magic. */ if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) pcl = MSDOSFSROOT; putushort(denp[1].deStartCluster, pcl); putushort(denp[1].deCDate, ndirent.de_CDate); putushort(denp[1].deCTime, ndirent.de_CTime); denp[1].deCHundredth = ndirent.de_CHun; putushort(denp[1].deADate, ndirent.de_ADate); putushort(denp[1].deMDate, ndirent.de_MDate); putushort(denp[1].deMTime, ndirent.de_MTime); if (FAT32(pmp)) { putushort(denp[0].deHighClust, newcluster >> 16); putushort(denp[1].deHighClust, pcl >> 16); } if (DOINGASYNC(ap->a_dvp)) bdwrite(bp); else if ((error = bwrite(bp)) != 0) goto bad; /* * Now build up a directory entry pointing to the newly allocated * cluster. This will be written to an empty slot in the parent * directory. */ #ifdef DIAGNOSTIC if ((cnp->cn_flags & HASBUF) == 0) panic("msdosfs_mkdir: no name"); #endif error = uniqdosname(pdep, cnp, ndirent.de_Name); if (error) goto bad; ndirent.de_Attributes = ATTR_DIRECTORY; ndirent.de_LowerCase = 0; ndirent.de_StartCluster = newcluster; ndirent.de_FileSize = 0; error = createde(&ndirent, pdep, &dep, cnp); if (error) goto bad; *ap->a_vpp = DETOV(dep); return (0); bad: clusterfree(pmp, newcluster, NULL); bad2: return (error); } static int msdosfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct denode *ip, *dp; int error; ip = VTODE(vp); dp = VTODE(dvp); /* * Verify the directory is empty (and valid). * (Rmdir ".." won't be valid since * ".." will contain a reference to * the current directory and thus be * non-empty.) */ error = 0; if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) { error = ENOTEMPTY; goto out; } /* * Delete the entry from the directory. For dos filesystems this * gets rid of the directory entry on disk, the in memory copy * still exists but the de_refcnt is <= 0. This prevents it from * being found by deget(). When the vput() on dep is done we give * up access and eventually msdosfs_reclaim() will be called which * will remove it from the denode cache. */ error = removede(dp, ip); if (error) goto out; /* * This is where we decrement the link count in the parent * directory. Since dos filesystems don't do this we just purge * the name cache. */ cache_purge(dvp); /* * Truncate the directory that is being deleted. */ error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred); cache_purge(vp); out: return (error); } /* * DOS filesystems don't know what symlinks are. */ static int msdosfs_symlink(struct vop_symlink_args *ap) { return (EOPNOTSUPP); } static int msdosfs_readdir(struct vop_readdir_args *ap) { struct mbnambuf nb; int error = 0; int diff; long n; int blsize; long on; u_long cn; u_long dirsperblk; long bias = 0; daddr_t bn, lbn; struct buf *bp; struct denode *dep = VTODE(ap->a_vp); struct msdosfsmount *pmp = dep->de_pmp; struct direntry *dentp; struct dirent dirbuf; struct uio *uio = ap->a_uio; u_long *cookies = NULL; int ncookies = 0; off_t offset, off; int chksum = -1; #ifdef MSDOSFS_DEBUG printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", ap->a_vp, uio, ap->a_cred, ap->a_eofflag); #endif /* * msdosfs_readdir() won't operate properly on regular files since * it does i/o only with the filesystem vnode, and hence can * retrieve the wrong block from the buffer cache for a plain file. * So, fail attempts to readdir() on a plain file. */ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) return (ENOTDIR); /* * To be safe, initialize dirbuf */ memset(dirbuf.d_name, 0, sizeof(dirbuf.d_name)); /* * If the user buffer is smaller than the size of one dos directory * entry or the file offset is not a multiple of the size of a * directory entry, then we fail the read. */ off = offset = uio->uio_offset; if (uio->uio_resid < sizeof(struct direntry) || (offset & (sizeof(struct direntry) - 1))) return (EINVAL); if (ap->a_ncookies) { ncookies = uio->uio_resid / 16; cookies = malloc(ncookies * sizeof(u_long), M_TEMP, M_WAITOK); *ap->a_cookies = cookies; *ap->a_ncookies = ncookies; } dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); /* * If they are reading from the root directory then, we simulate * the . and .. entries since these don't exist in the root * directory. We also set the offset bias to make up for having to * simulate these entries. By this I mean that at file offset 64 we * read the first entry in the root directory that lives on disk. */ if (dep->de_StartCluster == MSDOSFSROOT || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) { #if 0 printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n", offset); #endif bias = 2 * sizeof(struct direntry); if (offset < bias) { for (n = (int)offset / sizeof(struct direntry); n < 2; n++) { dirbuf.d_fileno = FAT32(pmp) ? (uint64_t)cntobn(pmp, pmp->pm_rootdirblk) * dirsperblk : 1; dirbuf.d_type = DT_DIR; switch (n) { case 0: dirbuf.d_namlen = 1; strcpy(dirbuf.d_name, "."); break; case 1: dirbuf.d_namlen = 2; strcpy(dirbuf.d_name, ".."); break; } dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) goto out; error = uiomove(&dirbuf, dirbuf.d_reclen, uio); if (error) goto out; offset += sizeof(struct direntry); off = offset; if (cookies) { *cookies++ = offset; if (--ncookies <= 0) goto out; } } } } mbnambuf_init(&nb); off = offset; while (uio->uio_resid > 0) { lbn = de_cluster(pmp, offset - bias); on = (offset - bias) & pmp->pm_crbomask; n = min(pmp->pm_bpcluster - on, uio->uio_resid); diff = dep->de_FileSize - (offset - bias); if (diff <= 0) break; n = min(n, diff); error = pcbmap(dep, lbn, &bn, &cn, &blsize); if (error) break; error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { brelse(bp); return (error); } n = min(n, blsize - bp->b_resid); if (n == 0) { brelse(bp); return (EIO); } /* * Convert from dos directory entries to fs-independent * directory entries. */ for (dentp = (struct direntry *)(bp->b_data + on); (char *)dentp < bp->b_data + on + n; dentp++, offset += sizeof(struct direntry)) { #if 0 printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n", dentp, prev, crnt, dentp->deName[0], dentp->deAttributes); #endif /* * If this is an unused entry, we can stop. */ if (dentp->deName[0] == SLOT_EMPTY) { brelse(bp); goto out; } /* * Skip deleted entries. */ if (dentp->deName[0] == SLOT_DELETED) { chksum = -1; mbnambuf_init(&nb); continue; } /* * Handle Win95 long directory entries */ if (dentp->deAttributes == ATTR_WIN95) { if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) continue; chksum = win2unixfn(&nb, (struct winentry *)dentp, chksum, pmp); continue; } /* * Skip volume labels */ if (dentp->deAttributes & ATTR_VOLUME) { chksum = -1; mbnambuf_init(&nb); continue; } /* * This computation of d_fileno must match * the computation of va_fileid in * msdosfs_getattr. */ if (dentp->deAttributes & ATTR_DIRECTORY) { cn = getushort(dentp->deStartCluster); if (FAT32(pmp)) { cn |= getushort(dentp->deHighClust) << 16; if (cn == MSDOSFSROOT) cn = pmp->pm_rootdirblk; } if (cn == MSDOSFSROOT && !FAT32(pmp)) dirbuf.d_fileno = 1; else dirbuf.d_fileno = cntobn(pmp, cn) * dirsperblk; dirbuf.d_type = DT_DIR; } else { dirbuf.d_fileno = (uoff_t)offset / sizeof(struct direntry); dirbuf.d_type = DT_REG; } if (chksum != winChksum(dentp->deName)) { dirbuf.d_namlen = dos2unixfn(dentp->deName, (u_char *)dirbuf.d_name, dentp->deLowerCase | ((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ? (LCASE_BASE | LCASE_EXT) : 0), pmp); mbnambuf_init(&nb); } else mbnambuf_flush(&nb, &dirbuf); chksum = -1; dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); if (uio->uio_resid < dirbuf.d_reclen) { brelse(bp); goto out; } error = uiomove(&dirbuf, dirbuf.d_reclen, uio); if (error) { brelse(bp); goto out; } if (cookies) { *cookies++ = offset + sizeof(struct direntry); if (--ncookies <= 0) { brelse(bp); goto out; } } off = offset + sizeof(struct direntry); } brelse(bp); } out: /* Subtract unused cookies */ if (ap->a_ncookies) *ap->a_ncookies -= ncookies; uio->uio_offset = off; /* * Set the eofflag (NFS uses it) */ if (ap->a_eofflag) { if (dep->de_FileSize - (offset - bias) <= 0) *ap->a_eofflag = 1; else *ap->a_eofflag = 0; } return (error); } /*- * a_vp - pointer to the file's vnode * a_bn - logical block number within the file (cluster number for us) * a_bop - where to return the bufobj of the special file containing the fs * a_bnp - where to return the "physical" block number corresponding to a_bn * (relative to the special file; units are blocks of size DEV_BSIZE) * a_runp - where to return the "run past" a_bn. This is the count of logical * blocks whose physical blocks (together with a_bn's physical block) * are contiguous. * a_runb - where to return the "run before" a_bn. */ static int msdosfs_bmap(struct vop_bmap_args *ap) { struct denode *dep; struct mount *mp; struct msdosfsmount *pmp; struct vnode *vp; daddr_t runbn; u_long cn; int bnpercn, error, maxio, maxrun, run; vp = ap->a_vp; dep = VTODE(vp); pmp = dep->de_pmp; if (ap->a_bop != NULL) *ap->a_bop = &pmp->pm_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; cn = ap->a_bn; if (cn != ap->a_bn) return (EFBIG); error = pcbmap(dep, cn, ap->a_bnp, NULL, NULL); if (error != 0 || (ap->a_runp == NULL && ap->a_runb == NULL)) return (error); mp = vp->v_mount; maxio = mp->mnt_iosize_max / mp->mnt_stat.f_iosize; bnpercn = de_cn2bn(pmp, 1); if (ap->a_runp != NULL) { maxrun = ulmin(maxio - 1, pmp->pm_maxcluster - cn); for (run = 1; run <= maxrun; run++) { if (pcbmap(dep, cn + run, &runbn, NULL, NULL) != 0 || runbn != *ap->a_bnp + run * bnpercn) break; } *ap->a_runp = run - 1; } if (ap->a_runb != NULL) { maxrun = ulmin(maxio - 1, cn); for (run = 1; run < maxrun; run++) { if (pcbmap(dep, cn - run, &runbn, NULL, NULL) != 0 || runbn != *ap->a_bnp - run * bnpercn) break; } *ap->a_runb = run - 1; } return (0); } SYSCTL_NODE(_vfs, OID_AUTO, msdosfs, CTLFLAG_RW, 0, "msdos filesystem"); static int use_buf_pager = 1; SYSCTL_INT(_vfs_msdosfs, OID_AUTO, use_buf_pager, CTLFLAG_RWTUN, &use_buf_pager, 0, "Use buffer pager instead of bmap"); static daddr_t msdosfs_gbp_getblkno(struct vnode *vp, vm_ooffset_t off) { return (de_cluster(VTODE(vp)->de_pmp, off)); } static int msdosfs_gbp_getblksz(struct vnode *vp, daddr_t lbn) { return (VTODE(vp)->de_pmp->pm_bpcluster); } static int msdosfs_getpages(struct vop_getpages_args *ap) { if (use_buf_pager) return (vfs_bio_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, msdosfs_gbp_getblkno, msdosfs_gbp_getblksz)); return (vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, ap->a_rbehind, ap->a_rahead, NULL, NULL)); } static int msdosfs_strategy(struct vop_strategy_args *ap) { struct buf *bp = ap->a_bp; struct denode *dep = VTODE(ap->a_vp); struct bufobj *bo; int error = 0; daddr_t blkno; /* * If we don't already know the filesystem relative block number * then get it using pcbmap(). If pcbmap() returns the block * number as -1 then we've got a hole in the file. DOS filesystems * don't allow files with holes, so we shouldn't ever see this. */ if (bp->b_blkno == bp->b_lblkno) { error = pcbmap(dep, bp->b_lblkno, &blkno, 0, 0); bp->b_blkno = blkno; if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return (0); } if ((long)bp->b_blkno == -1) vfs_bio_clrbuf(bp); } if (bp->b_blkno == -1) { bufdone(bp); return (0); } /* * Read/write the block from/to the disk that contains the desired * file block. */ bp->b_iooffset = dbtob(bp->b_blkno); bo = dep->de_pmp->pm_bo; BO_STRATEGY(bo, bp); return (0); } static int msdosfs_print(struct vop_print_args *ap) { struct denode *dep = VTODE(ap->a_vp); printf("\tstartcluster %lu, dircluster %lu, diroffset %lu, ", dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset); printf("on dev %s\n", devtoname(dep->de_pmp->pm_dev)); return (0); } static int msdosfs_pathconf(struct vop_pathconf_args *ap) { struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; switch (ap->a_name) { case _PC_LINK_MAX: *ap->a_retval = 1; return (0); case _PC_NAME_MAX: *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; return (0); case _PC_NO_TRUNC: *ap->a_retval = 0; return (0); default: return (vop_stdpathconf(ap)); } /* NOTREACHED */ } static int msdosfs_vptofh(struct vop_vptofh_args *ap) { struct denode *dep; struct defid *defhp; dep = VTODE(ap->a_vp); defhp = (struct defid *)ap->a_fhp; defhp->defid_len = sizeof(struct defid); defhp->defid_dirclust = dep->de_dirclust; defhp->defid_dirofs = dep->de_diroffset; /* defhp->defid_gen = dep->de_gen; */ return (0); } /* Global vfs data structures for msdosfs */ struct vop_vector msdosfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = msdosfs_access, .vop_bmap = msdosfs_bmap, .vop_getpages = msdosfs_getpages, .vop_cachedlookup = msdosfs_lookup, .vop_open = msdosfs_open, .vop_close = msdosfs_close, .vop_create = msdosfs_create, .vop_fsync = msdosfs_fsync, .vop_fdatasync = vop_stdfdatasync_buf, .vop_getattr = msdosfs_getattr, .vop_inactive = msdosfs_inactive, .vop_link = msdosfs_link, .vop_lookup = vfs_cache_lookup, .vop_mkdir = msdosfs_mkdir, .vop_mknod = msdosfs_mknod, .vop_pathconf = msdosfs_pathconf, .vop_print = msdosfs_print, .vop_read = msdosfs_read, .vop_readdir = msdosfs_readdir, .vop_reclaim = msdosfs_reclaim, .vop_remove = msdosfs_remove, .vop_rename = msdosfs_rename, .vop_rmdir = msdosfs_rmdir, .vop_setattr = msdosfs_setattr, .vop_strategy = msdosfs_strategy, .vop_symlink = msdosfs_symlink, .vop_write = msdosfs_write, .vop_vptofh = msdosfs_vptofh, }; Index: head/sys/fs/msdosfs/msdosfsmount.h =================================================================== --- head/sys/fs/msdosfs/msdosfsmount.h (revision 326267) +++ head/sys/fs/msdosfs/msdosfsmount.h (revision 326268) @@ -1,259 +1,261 @@ /* $FreeBSD$ */ /* $NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $ */ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. * Copyright (C) 1994, 1995, 1997 TooLs GmbH. * All rights reserved. * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by TooLs GmbH. * 4. The name of TooLs GmbH may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /*- * Written by Paul Popelka (paulp@uts.amdahl.com) * * You can do anything you want with this software, just don't say you wrote * it, and don't remove this notice. * * This software is provided "as is". * * The author supplies this software to be publicly redistributed on the * understanding that the author is not responsible for the correct * functioning of this software in any circumstances and is not liable for * any damages caused by this software. * * October 1992 */ #ifndef _MSDOSFS_MSDOSFSMOUNT_H_ #define _MSDOSFS_MSDOSFSMOUNT_H_ #ifdef _KERNEL #include #include #include #include #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_MSDOSFSMNT); #endif struct msdosfs_fileno; /* * Layout of the mount control block for a MSDOSFS filesystem. */ struct msdosfsmount { struct mount *pm_mountp;/* vfs mount struct for this fs */ struct g_consumer *pm_cp; struct bufobj *pm_bo; uid_t pm_uid; /* uid to set as owner of the files */ gid_t pm_gid; /* gid to set as owner of the files */ mode_t pm_mask; /* mask to and with file protection bits for files */ mode_t pm_dirmask; /* mask to and with file protection bits for directories */ struct vnode *pm_devvp; /* vnode for character device mounted */ struct cdev *pm_dev; /* character device mounted */ struct bpb50 pm_bpb; /* BIOS parameter blk for this fs */ u_long pm_BlkPerSec; /* How many DEV_BSIZE blocks fit inside a physical sector */ u_long pm_FATsecs; /* actual number of FAT sectors */ u_long pm_fatblk; /* block # of first FAT */ u_long pm_rootdirblk; /* block # (cluster # for FAT32) of root directory number */ u_long pm_rootdirsize; /* size in blocks (not clusters) */ u_long pm_firstcluster; /* block number of first cluster */ u_long pm_maxcluster; /* maximum cluster number */ u_long pm_freeclustercount; /* number of free clusters */ u_long pm_cnshift; /* shift file offset right this amount to get a cluster number */ u_long pm_crbomask; /* and a file offset with this mask to get cluster rel offset */ u_long pm_bnshift; /* shift file offset right this amount to get a block number */ u_long pm_bpcluster; /* bytes per cluster */ u_long pm_fmod; /* ~0 if fs is modified, this can rollover to 0 */ u_long pm_fatblocksize; /* size of FAT blocks in bytes */ u_long pm_fatblocksec; /* size of FAT blocks in sectors */ u_long pm_fatsize; /* size of FAT in bytes */ uint32_t pm_fatmask; /* mask to use for FAT numbers */ u_long pm_fsinfo; /* fsinfo block number */ u_long pm_nxtfree; /* next place to search for a free cluster */ u_int pm_fatmult; /* these 2 values are used in FAT */ u_int pm_fatdiv; /* offset computation */ u_int pm_curfat; /* current FAT for FAT32 (0 otherwise) */ u_int *pm_inusemap; /* ptr to bitmap of in-use clusters */ uint64_t pm_flags; /* see below */ void *pm_u2w; /* Local->Unicode iconv handle */ void *pm_w2u; /* Unicode->Local iconv handle */ void *pm_u2d; /* Unicode->DOS iconv handle */ void *pm_d2u; /* DOS->Local iconv handle */ struct lock pm_fatlock; /* lockmgr protecting allocations */ }; /* * A 64-bit file number and the 32-bit file number to which it is mapped, * in a red-black tree node. */ struct msdosfs_fileno { RB_ENTRY(msdosfs_fileno) mf_tree; uint32_t mf_fileno32; uint64_t mf_fileno64; }; /* Byte offset in FAT on filesystem pmp, cluster cn */ #define FATOFS(pmp, cn) ((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv) #define VFSTOMSDOSFS(mp) ((struct msdosfsmount *)mp->mnt_data) /* Number of bits in one pm_inusemap item: */ #define N_INUSEBITS (8 * sizeof(u_int)) /* * Shorthand for fields in the bpb contained in the msdosfsmount structure. */ #define pm_BytesPerSec pm_bpb.bpbBytesPerSec #define pm_ResSectors pm_bpb.bpbResSectors #define pm_FATs pm_bpb.bpbFATs #define pm_RootDirEnts pm_bpb.bpbRootDirEnts #define pm_Sectors pm_bpb.bpbSectors #define pm_Media pm_bpb.bpbMedia #define pm_SecPerTrack pm_bpb.bpbSecPerTrack #define pm_Heads pm_bpb.bpbHeads #define pm_HiddenSects pm_bpb.bpbHiddenSecs #define pm_HugeSectors pm_bpb.bpbHugeSectors /* * Convert pointer to buffer -> pointer to direntry */ #define bptoep(pmp, bp, dirofs) \ ((struct direntry *)(((bp)->b_data) \ + ((dirofs) & (pmp)->pm_crbomask))) /* * Convert block number to cluster number */ #define de_bn2cn(pmp, bn) \ ((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) /* * Convert cluster number to block number */ #define de_cn2bn(pmp, cn) \ ((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) /* * Convert file offset to cluster number */ #define de_cluster(pmp, off) \ ((off) >> (pmp)->pm_cnshift) /* * Clusters required to hold size bytes */ #define de_clcount(pmp, size) \ (((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift) /* * Convert file offset to block number */ #define de_blk(pmp, off) \ (de_cn2bn(pmp, de_cluster((pmp), (off)))) /* * Convert cluster number to file offset */ #define de_cn2off(pmp, cn) \ ((cn) << (pmp)->pm_cnshift) /* * Convert block number to file offset */ #define de_bn2off(pmp, bn) \ ((bn) << (pmp)->pm_bnshift) /* * Map a cluster number into a filesystem relative block number. */ #define cntobn(pmp, cn) \ (de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster) /* * Calculate block number for directory entry in root dir, offset dirofs */ #define roottobn(pmp, dirofs) \ (de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk) /* * Calculate block number for directory entry at cluster dirclu, offset * dirofs */ #define detobn(pmp, dirclu, dirofs) \ ((dirclu) == MSDOSFSROOT \ ? roottobn((pmp), (dirofs)) \ : cntobn((pmp), (dirclu))) #define MSDOSFS_LOCK_MP(pmp) \ lockmgr(&(pmp)->pm_fatlock, LK_EXCLUSIVE, NULL) #define MSDOSFS_UNLOCK_MP(pmp) \ lockmgr(&(pmp)->pm_fatlock, LK_RELEASE, NULL) #define MSDOSFS_ASSERT_MP_LOCKED(pmp) \ lockmgr_assert(&(pmp)->pm_fatlock, KA_XLOCKED) #endif /* _KERNEL */ /* * Arguments to mount MSDOS filesystems. */ struct msdosfs_args { char *fspec; /* blocks special holding the fs to mount */ struct oexport_args export; /* network export information */ uid_t uid; /* uid that owns msdosfs files */ gid_t gid; /* gid that owns msdosfs files */ mode_t mask; /* file mask to be applied for msdosfs perms */ int flags; /* see below */ int unused1; /* unused, was version number */ uint16_t unused2[128]; /* no longer used, was Local->Unicode table */ char *cs_win; /* Windows(Unicode) Charset */ char *cs_dos; /* DOS Charset */ char *cs_local; /* Local Charset */ mode_t dirmask; /* dir mask to be applied for msdosfs perms */ }; /* * Msdosfs mount options: */ #define MSDOSFSMNT_SHORTNAME 1 /* Force old DOS short names only */ #define MSDOSFSMNT_LONGNAME 2 /* Force Win'95 long names */ #define MSDOSFSMNT_NOWIN95 4 /* Completely ignore Win95 entries */ #define MSDOSFSMNT_KICONV 0x10 /* Use libiconv to convert chars */ /* All flags above: */ #define MSDOSFSMNT_MNTOPT \ (MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \ |MSDOSFSMNT_KICONV) #define MSDOSFSMNT_RONLY 0x80000000 /* mounted read-only */ #define MSDOSFSMNT_WAITONFAT 0x40000000 /* mounted synchronous */ #define MSDOSFS_FATMIRROR 0x20000000 /* FAT is mirrored */ #define MSDOSFS_FSIMOD 0x01000000 #endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */ Index: head/sys/fs/nandfs/bmap.c =================================================================== --- head/sys/fs/nandfs/bmap.c (revision 326267) +++ head/sys/fs/nandfs/bmap.c (revision 326268) @@ -1,623 +1,625 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2012 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nandfs_mount.h" #include "nandfs.h" #include "nandfs_subr.h" #include "bmap.h" static int bmap_getlbns(struct nandfs_node *, nandfs_lbn_t, struct nandfs_indir *, int *); int bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk) { struct nandfs_inode *ip; struct nandfs_indir a[NANDFS_NIADDR + 1], *ap; nandfs_daddr_t daddr; struct buf *bp; int error; int num, *nump; DPRINTF(BMAP, ("%s: node %p lblk %jx enter\n", __func__, node, lblk)); ip = &node->nn_inode; ap = a; nump = # error = bmap_getlbns(node, lblk, ap, nump); if (error) return (error); if (num == 0) { *vblk = ip->i_db[lblk]; return (0); } DPRINTF(BMAP, ("%s: node %p lblk=%jx trying ip->i_ib[%x]\n", __func__, node, lblk, ap->in_off)); daddr = ip->i_ib[ap->in_off]; for (bp = NULL, ++ap; --num; ap++) { if (daddr == 0) { DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with " "vblk 0\n", __func__, node, lblk)); *vblk = 0; return (0); } if (ap->in_lbn == lblk) { DPRINTF(BMAP, ("%s: node %p lblk=%jx ap->in_lbn=%jx " "returning address of indirect block (%jx)\n", __func__, node, lblk, ap->in_lbn, daddr)); *vblk = daddr; return (0); } DPRINTF(BMAP, ("%s: node %p lblk=%jx reading block " "ap->in_lbn=%jx\n", __func__, node, lblk, ap->in_lbn)); error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; brelse(bp); } DPRINTF(BMAP, ("%s: node %p lblk=%jx returning with %jx\n", __func__, node, lblk, daddr)); *vblk = daddr; return (0); } int bmap_dirty_meta(struct nandfs_node *node, nandfs_lbn_t lblk, int force) { struct nandfs_indir a[NANDFS_NIADDR+1], *ap; #ifdef DEBUG nandfs_daddr_t daddr; #endif struct buf *bp; int error; int num, *nump; DPRINTF(BMAP, ("%s: node %p lblk=%jx\n", __func__, node, lblk)); ap = a; nump = # error = bmap_getlbns(node, lblk, ap, nump); if (error) return (error); /* * Direct block, nothing to do */ if (num == 0) return (0); DPRINTF(BMAP, ("%s: node %p reading blocks\n", __func__, node)); for (bp = NULL, ++ap; --num; ap++) { error = nandfs_bread_meta(node, ap->in_lbn, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } #ifdef DEBUG daddr = ((nandfs_daddr_t *)bp->b_data)[ap->in_off]; MPASS(daddr != 0 || node->nn_ino == 3); #endif error = nandfs_dirty_buf_meta(bp, force); if (error) return (error); } return (0); } int bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t vblk) { struct nandfs_inode *ip; struct nandfs_indir a[NANDFS_NIADDR+1], *ap; struct buf *bp; nandfs_daddr_t daddr; int error; int num, *nump, i; DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx\n", __func__, node, lblk, vblk)); ip = &node->nn_inode; ap = a; nump = # error = bmap_getlbns(node, lblk, ap, nump); if (error) return (error); DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx got num=%d\n", __func__, node, lblk, vblk, num)); if (num == 0) { DPRINTF(BMAP, ("%s: node %p lblk=%jx direct block\n", __func__, node, lblk)); ip->i_db[lblk] = vblk; return (0); } DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block level %d\n", __func__, node, lblk, ap->in_off)); if (num == 1) { DPRINTF(BMAP, ("%s: node %p lblk=%jx indirect block: inserting " "%jx as vblk for indirect block %d\n", __func__, node, lblk, vblk, ap->in_off)); ip->i_ib[ap->in_off] = vblk; return (0); } bp = NULL; daddr = ip->i_ib[a[0].in_off]; for (i = 1; i < num; i++) { if (bp) brelse(bp); if (daddr == 0) { DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx create " "block %jx %d\n", __func__, node, lblk, vblk, a[i].in_lbn, a[i].in_off)); error = nandfs_bcreate_meta(node, a[i].in_lbn, NOCRED, 0, &bp); if (error) return (error); } else { DPRINTF(BMAP, ("%s: node %p lblk=%jx vblk=%jx read " "block %jx %d\n", __func__, node, daddr, vblk, a[i].in_lbn, a[i].in_off)); error = nandfs_bread_meta(node, a[i].in_lbn, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } } daddr = ((nandfs_daddr_t *)bp->b_data)[a[i].in_off]; } i--; DPRINTF(BMAP, ("%s: bmap node %p lblk=%jx vblk=%jx inserting vblk level %d at " "offset %d at %jx\n", __func__, node, lblk, vblk, i, a[i].in_off, daddr)); if (!bp) { nandfs_error("%s: cannot find indirect block\n", __func__); return (-1); } ((nandfs_daddr_t *)bp->b_data)[a[i].in_off] = vblk; error = nandfs_dirty_buf_meta(bp, 0); if (error) { nandfs_warning("%s: dirty failed buf: %p\n", __func__, bp); return (error); } DPRINTF(BMAP, ("%s: exiting node %p lblk=%jx vblk=%jx\n", __func__, node, lblk, vblk)); return (error); } CTASSERT(NANDFS_NIADDR <= 3); #define SINGLE 0 /* index of single indirect block */ #define DOUBLE 1 /* index of double indirect block */ #define TRIPLE 2 /* index of triple indirect block */ static __inline nandfs_lbn_t lbn_offset(struct nandfs_device *fsdev, int level) { nandfs_lbn_t res; for (res = 1; level > 0; level--) res *= MNINDIR(fsdev); return (res); } static nandfs_lbn_t blocks_inside(struct nandfs_device *fsdev, int level, struct nandfs_indir *nip) { nandfs_lbn_t blocks; for (blocks = 1; level >= SINGLE; level--, nip++) { MPASS(nip->in_off >= 0 && nip->in_off < MNINDIR(fsdev)); blocks += nip->in_off * lbn_offset(fsdev, level); } return (blocks); } static int bmap_truncate_indirect(struct nandfs_node *node, int level, nandfs_lbn_t *left, int *cleaned, struct nandfs_indir *ap, struct nandfs_indir *fp, nandfs_daddr_t *copy) { struct buf *bp; nandfs_lbn_t i, lbn, nlbn, factor, tosub; struct nandfs_device *fsdev; int error, lcleaned, modified; DPRINTF(BMAP, ("%s: node %p level %d left %jx\n", __func__, node, level, *left)); fsdev = node->nn_nandfsdev; MPASS(ap->in_off >= 0 && ap->in_off < MNINDIR(fsdev)); factor = lbn_offset(fsdev, level); lbn = ap->in_lbn; error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); if (error) { if (bp != NULL) brelse(bp); return (error); } bcopy(bp->b_data, copy, fsdev->nd_blocksize); bqrelse(bp); modified = 0; i = ap->in_off; if (ap != fp) ap++; for (nlbn = lbn + 1 - i * factor; i >= 0 && *left > 0; i--, nlbn += factor) { lcleaned = 0; DPRINTF(BMAP, ("%s: node %p i=%jx nlbn=%jx left=%jx ap=%p vblk %jx\n", __func__, node, i, nlbn, *left, ap, copy[i])); if (copy[i] == 0) { tosub = blocks_inside(fsdev, level - 1, ap); if (tosub > *left) tosub = 0; *left -= tosub; } else { if (level > SINGLE) { if (ap == fp) ap->in_lbn = nlbn; error = bmap_truncate_indirect(node, level - 1, left, &lcleaned, ap, fp, copy + MNINDIR(fsdev)); if (error) return (error); } else { error = nandfs_bdestroy(node, copy[i]); if (error) return (error); lcleaned = 1; *left -= 1; } } if (lcleaned) { if (level > SINGLE) { error = nandfs_vblock_end(fsdev, copy[i]); if (error) return (error); } copy[i] = 0; modified++; } ap = fp; } if (i == -1) *cleaned = 1; error = nandfs_bread_meta(node, lbn, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } if (modified) bcopy(copy, bp->b_data, fsdev->nd_blocksize); /* Force success even if we can't dirty the buffer metadata when freeing space */ nandfs_dirty_buf_meta(bp, 1); return (0); } int bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t lastblk, nandfs_lbn_t todo) { struct nandfs_inode *ip; struct nandfs_indir a[NANDFS_NIADDR + 1], f[NANDFS_NIADDR], *ap; nandfs_daddr_t indir_lbn[NANDFS_NIADDR]; nandfs_daddr_t *copy; int error, level; nandfs_lbn_t left, tosub; struct nandfs_device *fsdev; int cleaned, i; int num, *nump; DPRINTF(BMAP, ("%s: node %p lastblk %jx truncating by %jx\n", __func__, node, lastblk, todo)); ip = &node->nn_inode; fsdev = node->nn_nandfsdev; ap = a; nump = # error = bmap_getlbns(node, lastblk, ap, nump); if (error) return (error); indir_lbn[SINGLE] = -NANDFS_NDADDR; indir_lbn[DOUBLE] = indir_lbn[SINGLE] - MNINDIR(fsdev) - 1; indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - MNINDIR(fsdev) * MNINDIR(fsdev) - 1; for (i = 0; i < NANDFS_NIADDR; i++) { f[i].in_off = MNINDIR(fsdev) - 1; f[i].in_lbn = 0xdeadbeef; } left = todo; #ifdef DEBUG a[num].in_off = -1; #endif ap++; num -= 2; if (num < 0) goto direct; copy = malloc(MNINDIR(fsdev) * sizeof(nandfs_daddr_t) * (num + 1), M_NANDFSTEMP, M_WAITOK); for (level = num; level >= SINGLE && left > 0; level--) { cleaned = 0; if (ip->i_ib[level] == 0) { tosub = blocks_inside(fsdev, level, ap); if (tosub > left) left = 0; else left -= tosub; } else { if (ap == f) ap->in_lbn = indir_lbn[level]; error = bmap_truncate_indirect(node, level, &left, &cleaned, ap, f, copy); if (error) { free(copy, M_NANDFSTEMP); nandfs_error("%s: error %d when truncate " "at level %d\n", __func__, error, level); return (error); } } if (cleaned) { nandfs_vblock_end(fsdev, ip->i_ib[level]); ip->i_ib[level] = 0; } ap = f; } free(copy, M_NANDFSTEMP); direct: if (num < 0) i = lastblk; else i = NANDFS_NDADDR - 1; for (; i >= 0 && left > 0; i--) { if (ip->i_db[i] != 0) { error = nandfs_bdestroy(node, ip->i_db[i]); if (error) { nandfs_error("%s: cannot destroy " "block %jx, error %d\n", __func__, (uintmax_t)ip->i_db[i], error); return (error); } ip->i_db[i] = 0; } left--; } KASSERT(left == 0, ("truncated wrong number of blocks (%jd should be 0)", left)); return (error); } nandfs_lbn_t get_maxfilesize(struct nandfs_device *fsdev) { struct nandfs_indir f[NANDFS_NIADDR]; nandfs_lbn_t max; int i; max = NANDFS_NDADDR; for (i = 0; i < NANDFS_NIADDR; i++) { f[i].in_off = MNINDIR(fsdev) - 1; max += blocks_inside(fsdev, i, f); } max *= fsdev->nd_blocksize; return (max); } /* * This is ufs_getlbns with minor modifications. */ /* * Create an array of logical block number/offset pairs which represent the * path of indirect blocks required to access a data block. The first "pair" * contains the logical block number of the appropriate single, double or * triple indirect block and the offset into the inode indirect block array. * Note, the logical block number of the inode single/double/triple indirect * block appears twice in the array, once with the offset into the i_ib and * once with the offset into the page itself. */ static int bmap_getlbns(struct nandfs_node *node, nandfs_lbn_t bn, struct nandfs_indir *ap, int *nump) { nandfs_daddr_t blockcnt; nandfs_lbn_t metalbn, realbn; struct nandfs_device *fsdev; int i, numlevels, off; fsdev = node->nn_nandfsdev; DPRINTF(BMAP, ("%s: node %p bn=%jx mnindir=%zd enter\n", __func__, node, bn, MNINDIR(fsdev))); if (nump) *nump = 0; numlevels = 0; realbn = bn; if (bn < 0) bn = -bn; /* The first NANDFS_NDADDR blocks are direct blocks. */ if (bn < NANDFS_NDADDR) return (0); /* * Determine the number of levels of indirection. After this loop * is done, blockcnt indicates the number of data blocks possible * at the previous level of indirection, and NANDFS_NIADDR - i is the * number of levels of indirection needed to locate the requested block. */ for (blockcnt = 1, i = NANDFS_NIADDR, bn -= NANDFS_NDADDR;; i--, bn -= blockcnt) { DPRINTF(BMAP, ("%s: blockcnt=%jd i=%d bn=%jd\n", __func__, blockcnt, i, bn)); if (i == 0) return (EFBIG); blockcnt *= MNINDIR(fsdev); if (bn < blockcnt) break; } /* Calculate the address of the first meta-block. */ if (realbn >= 0) metalbn = -(realbn - bn + NANDFS_NIADDR - i); else metalbn = -(-realbn - bn + NANDFS_NIADDR - i); /* * At each iteration, off is the offset into the bap array which is * an array of disk addresses at the current level of indirection. * The logical block number and the offset in that block are stored * into the argument array. */ ap->in_lbn = metalbn; ap->in_off = off = NANDFS_NIADDR - i; DPRINTF(BMAP, ("%s: initial: ap->in_lbn=%jx ap->in_off=%d\n", __func__, metalbn, off)); ap++; for (++numlevels; i <= NANDFS_NIADDR; i++) { /* If searching for a meta-data block, quit when found. */ if (metalbn == realbn) break; blockcnt /= MNINDIR(fsdev); off = (bn / blockcnt) % MNINDIR(fsdev); ++numlevels; ap->in_lbn = metalbn; ap->in_off = off; DPRINTF(BMAP, ("%s: in_lbn=%jx in_off=%d\n", __func__, ap->in_lbn, ap->in_off)); ++ap; metalbn -= -1 + off * blockcnt; } if (nump) *nump = numlevels; DPRINTF(BMAP, ("%s: numlevels=%d\n", __func__, numlevels)); return (0); } Index: head/sys/fs/nandfs/bmap.h =================================================================== --- head/sys/fs/nandfs/bmap.h (revision 326267) +++ head/sys/fs/nandfs/bmap.h (revision 326268) @@ -1,40 +1,42 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2012 Semihalf * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _BMAP_H #define _BMAP_H #include "nandfs_fs.h" int bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *); int bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t); int bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, nandfs_lbn_t); int bmap_dirty_meta(struct nandfs_node *, nandfs_lbn_t, int); nandfs_lbn_t get_maxfilesize(struct nandfs_device *); #endif /* _BMAP_H */ Index: head/sys/fs/nandfs/nandfs.h =================================================================== --- head/sys/fs/nandfs/nandfs.h (revision 326267) +++ head/sys/fs/nandfs/nandfs.h (revision 326268) @@ -1,310 +1,312 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs.h,v 1.1 2009/07/18 16:31:42 reinoud * * $FreeBSD$ */ #ifndef _FS_NANDFS_NANDFS_H_ #define _FS_NANDFS_NANDFS_H_ #include #include #include #include #include #include #include #include #include #include #include "nandfs_fs.h" MALLOC_DECLARE(M_NANDFSTEMP); /* Debug categories */ #define NANDFS_DEBUG_VOLUMES 0x000001 #define NANDFS_DEBUG_BLOCK 0x000004 #define NANDFS_DEBUG_LOCKING 0x000008 #define NANDFS_DEBUG_NODE 0x000010 #define NANDFS_DEBUG_LOOKUP 0x000020 #define NANDFS_DEBUG_READDIR 0x000040 #define NANDFS_DEBUG_TRANSLATE 0x000080 #define NANDFS_DEBUG_STRATEGY 0x000100 #define NANDFS_DEBUG_READ 0x000200 #define NANDFS_DEBUG_WRITE 0x000400 #define NANDFS_DEBUG_IFILE 0x000800 #define NANDFS_DEBUG_ATTR 0x001000 #define NANDFS_DEBUG_EXTATTR 0x002000 #define NANDFS_DEBUG_ALLOC 0x004000 #define NANDFS_DEBUG_CPFILE 0x008000 #define NANDFS_DEBUG_DIRHASH 0x010000 #define NANDFS_DEBUG_NOTIMPL 0x020000 #define NANDFS_DEBUG_SHEDULE 0x040000 #define NANDFS_DEBUG_SEG 0x080000 #define NANDFS_DEBUG_SYNC 0x100000 #define NANDFS_DEBUG_PARANOIA 0x200000 #define NANDFS_DEBUG_VNCALL 0x400000 #define NANDFS_DEBUG_BUF 0x1000000 #define NANDFS_DEBUG_BMAP 0x2000000 #define NANDFS_DEBUG_DAT 0x4000000 #define NANDFS_DEBUG_GENERIC 0x8000000 #define NANDFS_DEBUG_CLEAN 0x10000000 extern int nandfs_verbose; #define DPRINTF(name, arg) { \ if (nandfs_verbose & NANDFS_DEBUG_##name) {\ printf arg;\ };\ } #define DPRINTFIF(name, cond, arg) { \ if (nandfs_verbose & NANDFS_DEBUG_##name) { \ if (cond) printf arg;\ };\ } #define VFSTONANDFS(mp) ((struct nandfsmount *)((mp)->mnt_data)) #define VTON(vp) ((struct nandfs_node *)(vp)->v_data) #define NTOV(xp) ((xp)->nn_vnode) int nandfs_init(struct vfsconf *); int nandfs_uninit(struct vfsconf *); extern struct vop_vector nandfs_vnodeops; extern struct vop_vector nandfs_system_vnodeops; struct nandfs_node; /* Structure and derivatives */ struct nandfs_mdt { uint32_t entries_per_block; uint32_t entries_per_group; uint32_t blocks_per_group; uint32_t groups_per_desc_block; /* desc is super group */ uint32_t blocks_per_desc_block; /* desc is super group */ }; struct nandfs_segment { LIST_ENTRY(nandfs_segment) seg_link; struct nandfs_device *fsdev; TAILQ_HEAD(, buf) segsum; TAILQ_HEAD(, buf) data; uint64_t seg_num; uint64_t seg_next; uint64_t start_block; uint32_t num_blocks; uint32_t nblocks; uint32_t nbinfos; uint32_t segsum_blocks; uint32_t segsum_bytes; uint32_t bytes_left; char *current_off; }; struct nandfs_seginfo { LIST_HEAD( ,nandfs_segment) seg_list; struct nandfs_segment *curseg; struct nandfs_device *fsdev; uint32_t blocks; uint8_t reiterate; }; #define NANDFS_FSSTOR_FAILED 1 struct nandfs_fsarea { int offset; int flags; int last_used; }; extern int nandfs_cleaner_enable; extern int nandfs_cleaner_interval; extern int nandfs_cleaner_segments; struct nandfs_device { struct vnode *nd_devvp; struct g_consumer *nd_gconsumer; struct thread *nd_syncer; struct thread *nd_cleaner; int nd_syncer_exit; int nd_cleaner_exit; struct nandfs_fsarea nd_fsarea[NANDFS_NFSAREAS]; int nd_last_fsarea; STAILQ_HEAD(nandfs_mnts, nandfsmount) nd_mounts; SLIST_ENTRY(nandfs_device) nd_next_device; /* FS structures */ struct nandfs_fsdata nd_fsdata; struct nandfs_super_block nd_super; struct nandfs_segment_summary nd_last_segsum; struct nandfs_super_root nd_super_root; struct nandfs_node *nd_dat_node; struct nandfs_node *nd_cp_node; struct nandfs_node *nd_su_node; struct nandfs_node *nd_gc_node; struct nandfs_mdt nd_dat_mdt; struct nandfs_mdt nd_ifile_mdt; struct timespec nd_ts; /* Synchronization */ struct mtx nd_mutex; struct mtx nd_sync_mtx; struct cv nd_sync_cv; struct mtx nd_clean_mtx; struct cv nd_clean_cv; struct lock nd_seg_const; struct nandfs_seginfo *nd_seginfo; /* FS geometry */ uint64_t nd_devsize; uint64_t nd_maxfilesize; uint32_t nd_blocksize; uint32_t nd_erasesize; uint32_t nd_devblocksize; uint32_t nd_segs_reserved; /* Segment usage */ uint64_t nd_clean_segs; uint64_t *nd_free_base; uint64_t nd_free_count; uint64_t nd_dirty_bufs; /* Running values */ uint64_t nd_seg_sequence; uint64_t nd_seg_num; uint64_t nd_next_seg_num; uint64_t nd_last_pseg; uint64_t nd_last_cno; uint64_t nd_last_ino; uint64_t nd_fakevblk; int nd_mount_state; int nd_refcnt; int nd_syncing; int nd_cleaning; }; extern SLIST_HEAD(_nandfs_devices, nandfs_device) nandfs_devices; #define NANDFS_FORCE_SYNCER 0x1 #define NANDFS_UMOUNT 0x2 #define SYNCER_UMOUNT 0x0 #define SYNCER_VFS_SYNC 0x1 #define SYNCER_BDFLUSH 0x2 #define SYNCER_FFORCE 0x3 #define SYNCER_FSYNC 0x4 #define SYNCER_ROUPD 0x5 static __inline int nandfs_writelockflags(struct nandfs_device *fsdev, int flags) { int error = 0; if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) error = lockmgr(&fsdev->nd_seg_const, flags | LK_SHARED, NULL); return (error); } static __inline void nandfs_writeunlock(struct nandfs_device *fsdev) { if (lockstatus(&fsdev->nd_seg_const) != LK_EXCLUSIVE) lockmgr(&(fsdev)->nd_seg_const, LK_RELEASE, NULL); } #define NANDFS_WRITELOCKFLAGS(fsdev, flags) nandfs_writelockflags(fsdev, flags) #define NANDFS_WRITELOCK(fsdev) NANDFS_WRITELOCKFLAGS(fsdev, 0) #define NANDFS_WRITEUNLOCK(fsdev) nandfs_writeunlock(fsdev) #define NANDFS_WRITEASSERT(fsdev) lockmgr_assert(&(fsdev)->nd_seg_const, KA_LOCKED) /* Specific mountpoint; head or a checkpoint/snapshot */ struct nandfsmount { STAILQ_ENTRY(nandfsmount) nm_next_mount; struct mount *nm_vfs_mountp; struct nandfs_device *nm_nandfsdev; struct nandfs_args nm_mount_args; struct nandfs_node *nm_ifile_node; uint8_t nm_flags; int8_t nm_ronly; }; struct nandfs_node { struct vnode *nn_vnode; struct nandfsmount *nn_nmp; struct nandfs_device *nn_nandfsdev; struct lockf *nn_lockf; uint64_t nn_ino; struct nandfs_inode nn_inode; uint64_t nn_diroff; uint32_t nn_flags; }; #define IN_ACCESS 0x0001 /* Inode access time update request */ #define IN_CHANGE 0x0002 /* Inode change time update request */ #define IN_UPDATE 0x0004 /* Inode was written to; update mtime*/ #define IN_MODIFIED 0x0008 /* node has been modified */ #define IN_RENAME 0x0010 /* node is being renamed. */ /* File permissions. */ #define IEXEC 0000100 /* Executable. */ #define IWRITE 0000200 /* Writeable. */ #define IREAD 0000400 /* Readable. */ #define ISVTX 0001000 /* Sticky bit. */ #define ISGID 0002000 /* Set-gid. */ #define ISUID 0004000 /* Set-uid. */ #define PRINT_NODE_FLAGS \ "\10\1IN_ACCESS\2IN_CHANGE\3IN_UPDATE\4IN_MODIFIED\5IN_RENAME" #define NANDFS_GATHER(x) ((x)->b_flags |= B_FS_FLAG1) #define NANDFS_UNGATHER(x) ((x)->b_flags &= ~B_FS_FLAG1) #define NANDFS_ISGATHERED(x) ((x)->b_flags & B_FS_FLAG1) #endif /* !_FS_NANDFS_NANDFS_H_ */ Index: head/sys/fs/nandfs/nandfs_alloc.c =================================================================== --- head/sys/fs/nandfs/nandfs_alloc.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_alloc.c (revision 326268) @@ -1,364 +1,366 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static void nandfs_get_desc_block_nr(struct nandfs_mdt *mdt, uint64_t desc, uint64_t *desc_block) { *desc_block = desc * mdt->blocks_per_desc_block; } static void nandfs_get_group_block_nr(struct nandfs_mdt *mdt, uint64_t group, uint64_t *group_block) { uint64_t desc, group_off; desc = group / mdt->groups_per_desc_block; group_off = group % mdt->groups_per_desc_block; *group_block = desc * mdt->blocks_per_desc_block + 1 + group_off * mdt->blocks_per_group; } static void init_desc_block(struct nandfs_mdt *mdt, uint8_t *block_data) { struct nandfs_block_group_desc *desc; uint32_t i; desc = (struct nandfs_block_group_desc *) block_data; for (i = 0; i < mdt->groups_per_desc_block; i++) desc[i].bg_nfrees = mdt->entries_per_group; } int nandfs_find_free_entry(struct nandfs_mdt *mdt, struct nandfs_node *node, struct nandfs_alloc_request *req) { nandfs_daddr_t desc, group, maxgroup, maxdesc, pos = 0; nandfs_daddr_t start_group, start_desc; nandfs_daddr_t desc_block, group_block; nandfs_daddr_t file_blocks; struct nandfs_block_group_desc *descriptors; struct buf *bp, *bp2; uint32_t *mask, i, mcount, msize; int error; file_blocks = node->nn_inode.i_blocks; maxgroup = 0x100000000ull / mdt->entries_per_group; maxdesc = maxgroup / mdt->groups_per_desc_block; start_group = req->entrynum / mdt->entries_per_group; start_desc = start_group / mdt->groups_per_desc_block; bp = bp2 = NULL; restart: for (desc = start_desc; desc < maxdesc; desc++) { nandfs_get_desc_block_nr(mdt, desc, &desc_block); if (bp) brelse(bp); if (desc_block < file_blocks) { error = nandfs_bread(node, desc_block, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } } else { error = nandfs_bcreate(node, desc_block, NOCRED, 0, &bp); if (error) return (error); file_blocks++; init_desc_block(mdt, bp->b_data); } descriptors = (struct nandfs_block_group_desc *) bp->b_data; for (group = start_group; group < mdt->groups_per_desc_block; group++) { if (descriptors[group].bg_nfrees > 0) { nandfs_get_group_block_nr(mdt, group, &group_block); if (bp2) brelse(bp2); if (group_block < file_blocks) { error = nandfs_bread(node, group_block, NOCRED, 0, &bp2); if (error) { brelse(bp); return (error); } } else { error = nandfs_bcreate(node, group_block, NOCRED, 0, &bp2); if (error) return (error); file_blocks++; } mask = (uint32_t *)bp2->b_data; msize = (sizeof(uint32_t) * __CHAR_BIT); mcount = mdt->entries_per_group / msize; for (i = 0; i < mcount; i++) { if (mask[i] == UINT32_MAX) continue; pos = ffs(~mask[i]) - 1; pos += (msize * i); pos += (group * mdt->entries_per_group); pos += desc * group * mdt->groups_per_desc_block * mdt->entries_per_group; goto found; } } } start_group = 0; } if (start_desc != 0) { maxdesc = start_desc; start_desc = 0; req->entrynum = 0; goto restart; } return (ENOENT); found: req->entrynum = pos; req->bp_desc = bp; req->bp_bitmap = bp2; DPRINTF(ALLOC, ("%s: desc: %p bitmap: %p entry: %#jx\n", __func__, req->bp_desc, req->bp_bitmap, (uintmax_t)pos)); return (0); } int nandfs_find_entry(struct nandfs_mdt* mdt, struct nandfs_node *nnode, struct nandfs_alloc_request *req) { uint64_t dblock, bblock, eblock; uint32_t offset; int error; nandfs_mdt_trans_blk(mdt, req->entrynum, &dblock, &bblock, &eblock, &offset); error = nandfs_bread(nnode, dblock, NOCRED, 0, &req->bp_desc); if (error) { brelse(req->bp_desc); return (error); } error = nandfs_bread(nnode, bblock, NOCRED, 0, &req->bp_bitmap); if (error) { brelse(req->bp_desc); brelse(req->bp_bitmap); return (error); } error = nandfs_bread(nnode, eblock, NOCRED, 0, &req->bp_entry); if (error) { brelse(req->bp_desc); brelse(req->bp_bitmap); brelse(req->bp_entry); return (error); } DPRINTF(ALLOC, ("%s: desc_buf: %p bitmap_buf %p entry_buf %p offset %x\n", __func__, req->bp_desc, req->bp_bitmap, req->bp_entry, offset)); return (0); } static __inline void nandfs_calc_idx_entry(struct nandfs_mdt* mdt, uint32_t entrynum, uint64_t *group, uint64_t *bitmap_idx, uint64_t *bitmap_off) { /* Find group_desc index */ entrynum = entrynum % (mdt->entries_per_group * mdt->groups_per_desc_block); *group = entrynum / mdt->entries_per_group; /* Find bitmap index and bit offset */ entrynum = entrynum % mdt->entries_per_group; *bitmap_idx = entrynum / (sizeof(uint32_t) * __CHAR_BIT); *bitmap_off = entrynum % (sizeof(uint32_t) * __CHAR_BIT); } int nandfs_free_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req) { struct nandfs_block_group_desc *descriptors; uint64_t bitmap_idx, bitmap_off; uint64_t group; uint32_t *mask, maskrw; nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx, &bitmap_off); DPRINTF(ALLOC, ("nandfs_free_entry: req->entrynum=%jx bitmap_idx=%jx" " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum, (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group)); /* Update counter of free entries for group */ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data; descriptors[group].bg_nfrees++; /* Set bit to indicate that entry is taken */ mask = (uint32_t *)req->bp_bitmap->b_data; maskrw = mask[bitmap_idx]; KASSERT(maskrw & (1 << bitmap_off), ("freeing unallocated vblock")); maskrw &= ~(1 << bitmap_off); mask[bitmap_idx] = maskrw; /* Make descriptor, bitmap and entry buffer dirty */ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) { nandfs_dirty_buf(req->bp_bitmap, 1); nandfs_dirty_buf(req->bp_entry, 1); } else { brelse(req->bp_bitmap); brelse(req->bp_entry); return (-1); } return (0); } int nandfs_alloc_entry(struct nandfs_mdt* mdt, struct nandfs_alloc_request *req) { struct nandfs_block_group_desc *descriptors; uint64_t bitmap_idx, bitmap_off; uint64_t group; uint32_t *mask, maskrw; nandfs_calc_idx_entry(mdt, req->entrynum, &group, &bitmap_idx, &bitmap_off); DPRINTF(ALLOC, ("nandfs_alloc_entry: req->entrynum=%jx bitmap_idx=%jx" " bitmap_off=%jx group=%jx\n", (uintmax_t)req->entrynum, (uintmax_t)bitmap_idx, (uintmax_t)bitmap_off, (uintmax_t)group)); /* Update counter of free entries for group */ descriptors = (struct nandfs_block_group_desc *) req->bp_desc->b_data; descriptors[group].bg_nfrees--; /* Clear bit to indicate that entry is free */ mask = (uint32_t *)req->bp_bitmap->b_data; maskrw = mask[bitmap_idx]; maskrw |= 1 << bitmap_off; mask[bitmap_idx] = maskrw; /* Make descriptor, bitmap and entry buffer dirty */ if (nandfs_dirty_buf(req->bp_desc, 0) == 0) { nandfs_dirty_buf(req->bp_bitmap, 1); nandfs_dirty_buf(req->bp_entry, 1); } else { brelse(req->bp_bitmap); brelse(req->bp_entry); return (-1); } return (0); } void nandfs_abort_entry(struct nandfs_alloc_request *req) { brelse(req->bp_desc); brelse(req->bp_bitmap); brelse(req->bp_entry); } int nandfs_get_entry_block(struct nandfs_mdt *mdt, struct nandfs_node *node, struct nandfs_alloc_request *req, uint32_t *entry, int create) { struct buf *bp; nandfs_lbn_t blocknr; int error; /* Find buffer number for given entry */ nandfs_mdt_trans(mdt, req->entrynum, &blocknr, entry); DPRINTF(ALLOC, ("%s: ino %#jx entrynum:%#jx block:%#jx entry:%x\n", __func__, (uintmax_t)node->nn_ino, (uintmax_t)req->entrynum, (uintmax_t)blocknr, *entry)); /* Read entry block or create if 'create' parameter is not zero */ bp = NULL; if (blocknr < node->nn_inode.i_blocks) error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); else if (create) error = nandfs_bcreate(node, blocknr, NOCRED, 0, &bp); else error = E2BIG; if (error) { DPRINTF(ALLOC, ("%s: ino %#jx block %#jx entry %x error %d\n", __func__, (uintmax_t)node->nn_ino, (uintmax_t)blocknr, *entry, error)); if (bp) brelse(bp); return (error); } MPASS(nandfs_vblk_get(bp) != 0 || node->nn_ino == NANDFS_DAT_INO); req->bp_entry = bp; return (0); } Index: head/sys/fs/nandfs/nandfs_bmap.c =================================================================== --- head/sys/fs/nandfs/nandfs_bmap.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_bmap.c (revision 326268) @@ -1,230 +1,232 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nandfs_mount.h" #include "nandfs.h" #include "nandfs_subr.h" #include "bmap.h" nandfs_lbn_t nandfs_get_maxfilesize(struct nandfs_device *fsdev) { return (get_maxfilesize(fsdev)); } int nandfs_bmap_lookup(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t *vblk) { int error = 0; if (node->nn_ino == NANDFS_GC_INO && lblk >= 0) *vblk = lblk; else error = bmap_lookup(node, lblk, vblk); DPRINTF(TRANSLATE, ("%s: error %d ino %#jx lblocknr %#jx -> %#jx\n", __func__, error, (uintmax_t)node->nn_ino, (uintmax_t)lblk, (uintmax_t)*vblk)); if (error) nandfs_error("%s: returned %d", __func__, error); return (error); } int nandfs_bmap_insert_block(struct nandfs_node *node, nandfs_lbn_t lblk, struct buf *bp) { struct nandfs_device *fsdev; nandfs_daddr_t vblk; int error; fsdev = node->nn_nandfsdev; vblk = 0; if (node->nn_ino != NANDFS_DAT_INO) { error = nandfs_vblock_alloc(fsdev, &vblk); if (error) return (error); } nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED); nandfs_vblk_set(bp, vblk); error = bmap_insert_block(node, lblk, vblk); if (error) { nandfs_vblock_free(fsdev, vblk); return (error); } return (0); } int nandfs_bmap_dirty_blocks(struct nandfs_node *node, struct buf *bp, int force) { int error; error = bmap_dirty_meta(node, bp->b_lblkno, force); if (error) nandfs_error("%s: cannot dirty buffer %p\n", __func__, bp); return (error); } static int nandfs_bmap_update_mapping(struct nandfs_node *node, nandfs_lbn_t lblk, nandfs_daddr_t blknr) { int error; DPRINTF(BMAP, ("%s: node: %p ino: %#jx lblk: %#jx vblk: %#jx\n", __func__, node, (uintmax_t)node->nn_ino, (uintmax_t)lblk, (uintmax_t)blknr)); error = bmap_insert_block(node, lblk, blknr); return (error); } int nandfs_bmap_update_block(struct nandfs_node *node, struct buf *bp, nandfs_lbn_t blknr) { nandfs_lbn_t lblk; int error; lblk = bp->b_lblkno; nandfs_vblk_set(bp, blknr); DPRINTF(BMAP, ("%s: node: %p ino: %#jx bp: %p lblk: %#jx blk: %#jx\n", __func__, node, (uintmax_t)node->nn_ino, bp, (uintmax_t)lblk, (uintmax_t)blknr)); error = nandfs_bmap_update_mapping(node, lblk, blknr); if (error) { nandfs_error("%s: cannot update lblk:%jx to blk:%jx for " "node:%p, error:%d\n", __func__, (uintmax_t)lblk, (uintmax_t)blknr, node, error); return (error); } return (error); } int nandfs_bmap_update_dat(struct nandfs_node *node, nandfs_daddr_t oldblk, struct buf *bp) { struct nandfs_device *fsdev; nandfs_daddr_t vblk = 0; int error; if (node->nn_ino == NANDFS_DAT_INO) return (0); if (nandfs_buf_check(bp, NANDFS_VBLK_ASSIGNED)) { nandfs_buf_clear(bp, NANDFS_VBLK_ASSIGNED); return (0); } fsdev = node->nn_nandfsdev; /* First alloc new virtual block.... */ error = nandfs_vblock_alloc(fsdev, &vblk); if (error) return (error); error = nandfs_bmap_update_block(node, bp, vblk); if (error) return (error); /* Then we can end up with old one */ nandfs_vblock_end(fsdev, oldblk); DPRINTF(BMAP, ("%s: ino %#jx block %#jx: update vblk %#jx to %#jx\n", __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno, (uintmax_t)oldblk, (uintmax_t)vblk)); return (error); } int nandfs_bmap_truncate_mapping(struct nandfs_node *node, nandfs_lbn_t oblk, nandfs_lbn_t nblk) { nandfs_lbn_t todo; int error; todo = oblk - nblk; DPRINTF(BMAP, ("%s: node %p oblk %jx nblk %jx truncate by %jx\n", __func__, node, oblk, nblk, todo)); error = bmap_truncate_mapping(node, oblk, todo); if (error) return (error); return (error); } Index: head/sys/fs/nandfs/nandfs_buffer.c =================================================================== --- head/sys/fs/nandfs/nandfs_buffer.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_buffer.c (revision 326268) @@ -1,83 +1,85 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include struct buf * nandfs_geteblk(int size, int flags) { struct buf *bp; /* * XXX * Right now we can call geteblk with GB_NOWAIT_BD flag, which means * it can return NULL. But we cannot afford to get NULL, hence this panic. */ bp = geteblk(size, flags); if (bp == NULL) panic("geteblk returned NULL"); return (bp); } void nandfs_dirty_bufs_increment(struct nandfs_device *fsdev) { mtx_lock(&fsdev->nd_mutex); KASSERT(fsdev->nd_dirty_bufs >= 0, ("negative nd_dirty_bufs")); fsdev->nd_dirty_bufs++; mtx_unlock(&fsdev->nd_mutex); } void nandfs_dirty_bufs_decrement(struct nandfs_device *fsdev) { mtx_lock(&fsdev->nd_mutex); KASSERT(fsdev->nd_dirty_bufs > 0, ("decrementing not-positive nd_dirty_bufs")); fsdev->nd_dirty_bufs--; mtx_unlock(&fsdev->nd_mutex); } Index: head/sys/fs/nandfs/nandfs_cleaner.c =================================================================== --- head/sys/fs/nandfs/nandfs_cleaner.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_cleaner.c (revision 326268) @@ -1,620 +1,622 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NANDFS_CLEANER_KILL 1 static void nandfs_cleaner(struct nandfs_device *); static int nandfs_cleaner_clean_segments(struct nandfs_device *, struct nandfs_vinfo *, uint32_t, struct nandfs_period *, uint32_t, struct nandfs_bdesc *, uint32_t, uint64_t *, uint32_t); static int nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, uint64_t nmembs); static void nandfs_wakeup_wait_cleaner(struct nandfs_device *fsdev, int reason) { mtx_lock(&fsdev->nd_clean_mtx); if (reason == NANDFS_CLEANER_KILL) fsdev->nd_cleaner_exit = 1; if (fsdev->nd_cleaning == 0) { fsdev->nd_cleaning = 1; wakeup(&fsdev->nd_cleaning); } cv_wait(&fsdev->nd_clean_cv, &fsdev->nd_clean_mtx); mtx_unlock(&fsdev->nd_clean_mtx); } int nandfs_start_cleaner(struct nandfs_device *fsdev) { int error; MPASS(fsdev->nd_cleaner == NULL); fsdev->nd_cleaner_exit = 0; error = kthread_add((void(*)(void *))nandfs_cleaner, fsdev, NULL, &fsdev->nd_cleaner, 0, 0, "nandfs_cleaner"); if (error) printf("nandfs: could not start cleaner: %d\n", error); return (error); } int nandfs_stop_cleaner(struct nandfs_device *fsdev) { MPASS(fsdev->nd_cleaner != NULL); nandfs_wakeup_wait_cleaner(fsdev, NANDFS_CLEANER_KILL); fsdev->nd_cleaner = NULL; DPRINTF(CLEAN, ("cleaner stopped\n")); return (0); } static int nandfs_cleaner_finished(struct nandfs_device *fsdev) { int exit; mtx_lock(&fsdev->nd_clean_mtx); fsdev->nd_cleaning = 0; if (!fsdev->nd_cleaner_exit) { DPRINTF(CLEAN, ("%s: sleep\n", __func__)); msleep(&fsdev->nd_cleaning, &fsdev->nd_clean_mtx, PRIBIO, "-", hz * nandfs_cleaner_interval); } exit = fsdev->nd_cleaner_exit; cv_broadcast(&fsdev->nd_clean_cv); mtx_unlock(&fsdev->nd_clean_mtx); if (exit) { DPRINTF(CLEAN, ("%s: no longer active\n", __func__)); return (1); } return (0); } static void print_suinfo(struct nandfs_suinfo *suinfo, int nsegs) { int i; for (i = 0; i < nsegs; i++) { DPRINTF(CLEAN, ("%jx %jd %c%c%c %10u\n", suinfo[i].nsi_num, suinfo[i].nsi_lastmod, (suinfo[i].nsi_flags & (NANDFS_SEGMENT_USAGE_ACTIVE) ? 'a' : '-'), (suinfo[i].nsi_flags & (NANDFS_SEGMENT_USAGE_DIRTY) ? 'd' : '-'), (suinfo[i].nsi_flags & (NANDFS_SEGMENT_USAGE_ERROR) ? 'e' : '-'), suinfo[i].nsi_blocks)); } } static int nandfs_cleaner_vblock_is_alive(struct nandfs_device *fsdev, struct nandfs_vinfo *vinfo, struct nandfs_cpinfo *cp, uint32_t ncps) { int64_t idx, min, max; if (vinfo->nvi_end >= fsdev->nd_last_cno) return (1); if (ncps == 0) return (0); if (vinfo->nvi_end < cp[0].nci_cno || vinfo->nvi_start > cp[ncps - 1].nci_cno) return (0); idx = min = 0; max = ncps - 1; while (min <= max) { idx = (min + max) / 2; if (vinfo->nvi_start == cp[idx].nci_cno) return (1); if (vinfo->nvi_start < cp[idx].nci_cno) max = idx - 1; else min = idx + 1; } return (vinfo->nvi_end >= cp[idx].nci_cno); } static void nandfs_cleaner_vinfo_mark_alive(struct nandfs_device *fsdev, struct nandfs_vinfo *vinfo, uint32_t nmembs, struct nandfs_cpinfo *cp, uint32_t ncps) { uint32_t i; for (i = 0; i < nmembs; i++) vinfo[i].nvi_alive = nandfs_cleaner_vblock_is_alive(fsdev, &vinfo[i], cp, ncps); } static int nandfs_cleaner_bdesc_is_alive(struct nandfs_device *fsdev, struct nandfs_bdesc *bdesc) { int alive; alive = bdesc->bd_oblocknr == bdesc->bd_blocknr; if (!alive) MPASS(abs(bdesc->bd_oblocknr - bdesc->bd_blocknr) > 2); return (alive); } static void nandfs_cleaner_bdesc_mark_alive(struct nandfs_device *fsdev, struct nandfs_bdesc *bdesc, uint32_t nmembs) { uint32_t i; for (i = 0; i < nmembs; i++) bdesc[i].bd_alive = nandfs_cleaner_bdesc_is_alive(fsdev, &bdesc[i]); } static void nandfs_cleaner_iterate_psegment(struct nandfs_device *fsdev, struct nandfs_segment_summary *segsum, union nandfs_binfo *binfo, nandfs_daddr_t blk, struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp) { int i; DPRINTF(CLEAN, ("%s nbinfos %x\n", __func__, segsum->ss_nbinfos)); for (i = 0; i < segsum->ss_nbinfos; i++) { if (binfo[i].bi_v.bi_ino == NANDFS_DAT_INO) { (*bdpp)->bd_oblocknr = blk + segsum->ss_nblocks - segsum->ss_nbinfos + i; /* * XXX Hack */ if (segsum->ss_flags & NANDFS_SS_SR) (*bdpp)->bd_oblocknr--; (*bdpp)->bd_level = binfo[i].bi_dat.bi_level; (*bdpp)->bd_offset = binfo[i].bi_dat.bi_blkoff; (*bdpp)++; } else { (*vipp)->nvi_ino = binfo[i].bi_v.bi_ino; (*vipp)->nvi_vblocknr = binfo[i].bi_v.bi_vblocknr; (*vipp)++; } } } static int nandfs_cleaner_iterate_segment(struct nandfs_device *fsdev, uint64_t segno, struct nandfs_vinfo **vipp, struct nandfs_bdesc **bdpp, int *select) { struct nandfs_segment_summary *segsum; union nandfs_binfo *binfo; struct buf *bp; uint32_t nblocks; nandfs_daddr_t curr, start, end; int error = 0; nandfs_get_segment_range(fsdev, segno, &start, &end); DPRINTF(CLEAN, ("%s: segno %jx start %jx end %jx\n", __func__, segno, start, end)); *select = 0; for (curr = start; curr < end; curr += nblocks) { error = nandfs_dev_bread(fsdev, curr, NOCRED, 0, &bp); if (error) { brelse(bp); nandfs_error("%s: couldn't load segment summary of %jx: %d\n", __func__, segno, error); return (error); } segsum = (struct nandfs_segment_summary *)bp->b_data; binfo = (union nandfs_binfo *)(bp->b_data + segsum->ss_bytes); if (!nandfs_segsum_valid(segsum)) { brelse(bp); nandfs_error("nandfs: invalid summary of segment %jx\n", segno); return (error); } DPRINTF(CLEAN, ("%s: %jx magic %x bytes %x nblocks %x nbinfos " "%x\n", __func__, segno, segsum->ss_magic, segsum->ss_bytes, segsum->ss_nblocks, segsum->ss_nbinfos)); nandfs_cleaner_iterate_psegment(fsdev, segsum, binfo, curr, vipp, bdpp); nblocks = segsum->ss_nblocks; brelse(bp); } if (error == 0) *select = 1; return (error); } static int nandfs_cleaner_choose_segment(struct nandfs_device *fsdev, uint64_t **segpp, uint64_t nsegs, uint64_t *rseg) { struct nandfs_suinfo *suinfo; uint64_t i, ssegs; int error; suinfo = malloc(sizeof(*suinfo) * nsegs, M_NANDFSTEMP, M_ZERO | M_WAITOK); if (*rseg >= fsdev->nd_fsdata.f_nsegments) *rseg = 0; retry: error = nandfs_get_segment_info_filter(fsdev, suinfo, nsegs, *rseg, &ssegs, NANDFS_SEGMENT_USAGE_DIRTY, NANDFS_SEGMENT_USAGE_ACTIVE | NANDFS_SEGMENT_USAGE_ERROR | NANDFS_SEGMENT_USAGE_GC); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } if (ssegs == 0 && *rseg != 0) { *rseg = 0; goto retry; } if (ssegs > 0) { print_suinfo(suinfo, ssegs); for (i = 0; i < ssegs; i++) { (**segpp) = suinfo[i].nsi_num; (*segpp)++; } *rseg = suinfo[i - 1].nsi_num + 1; } out: free(suinfo, M_NANDFSTEMP); return (error); } static int nandfs_cleaner_body(struct nandfs_device *fsdev, uint64_t *rseg) { struct nandfs_vinfo *vinfo, *vip, *vipi; struct nandfs_bdesc *bdesc, *bdp, *bdpi; struct nandfs_cpstat cpstat; struct nandfs_cpinfo *cpinfo = NULL; uint64_t *segnums, *segp; int select, selected; int error = 0; int nsegs; int i; nsegs = nandfs_cleaner_segments; vip = vinfo = malloc(sizeof(*vinfo) * fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP, M_ZERO | M_WAITOK); bdp = bdesc = malloc(sizeof(*bdesc) * fsdev->nd_fsdata.f_blocks_per_segment * nsegs, M_NANDFSTEMP, M_ZERO | M_WAITOK); segp = segnums = malloc(sizeof(*segnums) * nsegs, M_NANDFSTEMP, M_WAITOK); error = nandfs_cleaner_choose_segment(fsdev, &segp, nsegs, rseg); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } if (segnums == segp) goto out; selected = 0; for (i = 0; i < segp - segnums; i++) { error = nandfs_cleaner_iterate_segment(fsdev, segnums[i], &vip, &bdp, &select); if (error) { /* * XXX deselect (see below)? */ goto out; } if (!select) segnums[i] = NANDFS_NOSEGMENT; else { error = nandfs_markgc_segment(fsdev, segnums[i]); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); goto out; } selected++; } } if (selected == 0) { MPASS(vinfo == vip); MPASS(bdesc == bdp); goto out; } error = nandfs_get_cpstat(fsdev->nd_cp_node, &cpstat); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); goto out; } if (cpstat.ncp_nss != 0) { cpinfo = malloc(sizeof(struct nandfs_cpinfo) * cpstat.ncp_nss, M_NANDFSTEMP, M_WAITOK); error = nandfs_get_cpinfo(fsdev->nd_cp_node, 1, NANDFS_SNAPSHOT, cpinfo, cpstat.ncp_nss, NULL); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); goto out_locked; } } NANDFS_WRITELOCK(fsdev); DPRINTF(CLEAN, ("%s: got lock\n", __func__)); error = nandfs_get_dat_vinfo(fsdev, vinfo, vip - vinfo); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); goto out_locked; } nandfs_cleaner_vinfo_mark_alive(fsdev, vinfo, vip - vinfo, cpinfo, cpstat.ncp_nss); error = nandfs_get_dat_bdescs(fsdev, bdesc, bdp - bdesc); if (error) { nandfs_error("%s:%d\n", __FILE__, __LINE__); goto out_locked; } nandfs_cleaner_bdesc_mark_alive(fsdev, bdesc, bdp - bdesc); DPRINTF(CLEAN, ("got:\n")); for (vipi = vinfo; vipi < vip; vipi++) { DPRINTF(CLEAN, ("v ino %jx vblocknr %jx start %jx end %jx " "alive %d\n", vipi->nvi_ino, vipi->nvi_vblocknr, vipi->nvi_start, vipi->nvi_end, vipi->nvi_alive)); } for (bdpi = bdesc; bdpi < bdp; bdpi++) { DPRINTF(CLEAN, ("b oblocknr %jx blocknr %jx offset %jx " "alive %d\n", bdpi->bd_oblocknr, bdpi->bd_blocknr, bdpi->bd_offset, bdpi->bd_alive)); } DPRINTF(CLEAN, ("end list\n")); error = nandfs_cleaner_clean_segments(fsdev, vinfo, vip - vinfo, NULL, 0, bdesc, bdp - bdesc, segnums, segp - segnums); if (error) nandfs_error("%s:%d\n", __FILE__, __LINE__); out_locked: NANDFS_WRITEUNLOCK(fsdev); out: free(cpinfo, M_NANDFSTEMP); free(segnums, M_NANDFSTEMP); free(bdesc, M_NANDFSTEMP); free(vinfo, M_NANDFSTEMP); return (error); } static void nandfs_cleaner(struct nandfs_device *fsdev) { uint64_t checked_seg = 0; int error; while (!nandfs_cleaner_finished(fsdev)) { if (!nandfs_cleaner_enable || rebooting) continue; DPRINTF(CLEAN, ("%s: run started\n", __func__)); fsdev->nd_cleaning = 1; error = nandfs_cleaner_body(fsdev, &checked_seg); DPRINTF(CLEAN, ("%s: run finished error %d\n", __func__, error)); } DPRINTF(CLEAN, ("%s: exiting\n", __func__)); kthread_exit(); } static int nandfs_cleaner_clean_segments(struct nandfs_device *nffsdev, struct nandfs_vinfo *vinfo, uint32_t nvinfo, struct nandfs_period *pd, uint32_t npd, struct nandfs_bdesc *bdesc, uint32_t nbdesc, uint64_t *segments, uint32_t nsegs) { struct nandfs_node *gc; struct buf *bp; uint32_t i; int error = 0; gc = nffsdev->nd_gc_node; DPRINTF(CLEAN, ("%s: enter\n", __func__)); VOP_LOCK(NTOV(gc), LK_EXCLUSIVE); for (i = 0; i < nvinfo; i++) { if (!vinfo[i].nvi_alive) continue; DPRINTF(CLEAN, ("%s: read vblknr:%#jx blk:%#jx\n", __func__, (uintmax_t)vinfo[i].nvi_vblocknr, (uintmax_t)vinfo[i].nvi_blocknr)); error = nandfs_bread(nffsdev->nd_gc_node, vinfo[i].nvi_blocknr, NULL, 0, &bp); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); VOP_UNLOCK(NTOV(gc), 0); goto out; } nandfs_vblk_set(bp, vinfo[i].nvi_vblocknr); nandfs_buf_set(bp, NANDFS_VBLK_ASSIGNED); nandfs_dirty_buf(bp, 1); } VOP_UNLOCK(NTOV(gc), 0); /* Delete checkpoints */ for (i = 0; i < npd; i++) { DPRINTF(CLEAN, ("delete checkpoint: %jx\n", (uintmax_t)pd[i].p_start)); error = nandfs_delete_cp(nffsdev->nd_cp_node, pd[i].p_start, pd[i].p_end); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } } /* Update vblocks */ for (i = 0; i < nvinfo; i++) { if (vinfo[i].nvi_alive) continue; DPRINTF(CLEAN, ("freeing vblknr: %jx\n", vinfo[i].nvi_vblocknr)); error = nandfs_vblock_free(nffsdev, vinfo[i].nvi_vblocknr); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } } error = nandfs_process_bdesc(nffsdev, bdesc, nbdesc); if (error) { nandfs_error("%s:%d", __FILE__, __LINE__); goto out; } /* Add segments to clean */ if (nffsdev->nd_free_count) { nffsdev->nd_free_base = realloc(nffsdev->nd_free_base, (nffsdev->nd_free_count + nsegs) * sizeof(uint64_t), M_NANDFSTEMP, M_WAITOK | M_ZERO); memcpy(&nffsdev->nd_free_base[nffsdev->nd_free_count], segments, nsegs * sizeof(uint64_t)); nffsdev->nd_free_count += nsegs; } else { nffsdev->nd_free_base = malloc(nsegs * sizeof(uint64_t), M_NANDFSTEMP, M_WAITOK|M_ZERO); memcpy(nffsdev->nd_free_base, segments, nsegs * sizeof(uint64_t)); nffsdev->nd_free_count = nsegs; } out: DPRINTF(CLEAN, ("%s: exit error %d\n", __func__, error)); return (error); } static int nandfs_process_bdesc(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, uint64_t nmembs) { struct nandfs_node *dat_node; struct buf *bp; uint64_t i; int error; dat_node = nffsdev->nd_dat_node; VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { if (!bd[i].bd_alive) continue; DPRINTF(CLEAN, ("%s: idx %jx offset %jx\n", __func__, i, bd[i].bd_offset)); if (bd[i].bd_level) { error = nandfs_bread_meta(dat_node, bd[i].bd_offset, NULL, 0, &bp); if (error) { nandfs_error("%s: cannot read dat node " "level:%d\n", __func__, bd[i].bd_level); brelse(bp); VOP_UNLOCK(NTOV(dat_node), 0); return (error); } nandfs_dirty_buf_meta(bp, 1); nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1); } else { error = nandfs_bread(dat_node, bd[i].bd_offset, NULL, 0, &bp); if (error) { nandfs_error("%s: cannot read dat node\n", __func__); brelse(bp); VOP_UNLOCK(NTOV(dat_node), 0); return (error); } nandfs_dirty_buf(bp, 1); } DPRINTF(CLEAN, ("%s: bp: %p\n", __func__, bp)); } VOP_UNLOCK(NTOV(dat_node), 0); return (0); } Index: head/sys/fs/nandfs/nandfs_cpfile.c =================================================================== --- head/sys/fs/nandfs/nandfs_cpfile.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_cpfile.c (revision 326268) @@ -1,776 +1,778 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nandfs_mount.h" #include "nandfs.h" #include "nandfs_subr.h" static int nandfs_checkpoint_size(struct nandfs_device *fsdev) { return (fsdev->nd_fsdata.f_checkpoint_size); } static int nandfs_checkpoint_blk_offset(struct nandfs_device *fsdev, uint64_t cn, uint64_t *blk, uint64_t *offset) { uint64_t off; uint16_t cp_size, cp_per_blk; KASSERT((cn), ("checkpoing cannot be zero")); cp_size = fsdev->nd_fsdata.f_checkpoint_size; cp_per_blk = fsdev->nd_blocksize / cp_size; off = roundup(sizeof(struct nandfs_cpfile_header), cp_size) / cp_size; off += (cn - 1); *blk = off / cp_per_blk; *offset = (off % cp_per_blk) * cp_size; return (0); } static int nandfs_checkpoint_blk_remaining(struct nandfs_device *fsdev, uint64_t cn, uint64_t blk, uint64_t offset) { uint16_t cp_size, cp_remaining; cp_size = fsdev->nd_fsdata.f_checkpoint_size; cp_remaining = (fsdev->nd_blocksize - offset) / cp_size; return (cp_remaining); } int nandfs_get_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node, uint64_t cn) { struct buf *bp; uint64_t blk, offset; int error; if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) { return (-1); } error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (-1); } error = nandfs_dirty_buf(bp, 0); if (error) return (-1); nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset); if (blk != 0) { if (blk < cp_node->nn_inode.i_blocks) error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); else error = nandfs_bcreate(cp_node, blk, NOCRED, 0, &bp); if (error) { if (bp) brelse(bp); return (-1); } nandfs_dirty_buf(bp, 1); } DPRINTF(CPFILE, ("%s: cn:%#jx entry block:%#jx offset:%#jx\n", __func__, (uintmax_t)cn, (uintmax_t)blk, (uintmax_t)offset)); return (0); } int nandfs_set_checkpoint(struct nandfs_device *fsdev, struct nandfs_node *cp_node, uint64_t cn, struct nandfs_inode *ifile_inode, uint64_t nblocks) { struct nandfs_cpfile_header *cnh; struct nandfs_checkpoint *cnp; struct buf *bp; uint64_t blk, offset; int error; if (cn != fsdev->nd_last_cno && cn != (fsdev->nd_last_cno + 1)) { nandfs_error("%s: trying to set invalid chekpoint %jx - %jx\n", __func__, cn, fsdev->nd_last_cno); return (-1); } error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return error; } cnh = (struct nandfs_cpfile_header *) bp->b_data; cnh->ch_ncheckpoints++; nandfs_checkpoint_blk_offset(fsdev, cn, &blk, &offset); if(blk != 0) { brelse(bp); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return error; } } cnp = (struct nandfs_checkpoint *)((uint8_t *)bp->b_data + offset); cnp->cp_flags = 0; cnp->cp_checkpoints_count = 1; memset(&cnp->cp_snapshot_list, 0, sizeof(struct nandfs_snapshot_list)); cnp->cp_cno = cn; cnp->cp_create = fsdev->nd_ts.tv_sec; cnp->cp_nblk_inc = nblocks; cnp->cp_blocks_count = 0; memcpy (&cnp->cp_ifile_inode, ifile_inode, sizeof(cnp->cp_ifile_inode)); DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx nblk:%#jx\n", __func__, (uintmax_t)cn, (uintmax_t)cnp->cp_create, (uintmax_t)nblocks)); brelse(bp); return (0); } static int nandfs_cp_mounted(struct nandfs_device *nandfsdev, uint64_t cno) { struct nandfsmount *nmp; int mounted = 0; mtx_lock(&nandfsdev->nd_mutex); /* No double-mounting of the same checkpoint */ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { if (nmp->nm_mount_args.cpno == cno) { mounted = 1; break; } } mtx_unlock(&nandfsdev->nd_mutex); return (mounted); } static int nandfs_cp_set_snapshot(struct nandfs_node *cp_node, uint64_t cno) { struct nandfs_device *fsdev; struct nandfs_cpfile_header *cnh; struct nandfs_checkpoint *cnp; struct nandfs_snapshot_list *list; struct buf *bp; uint64_t blk, prev_blk, offset; uint64_t curr, prev; int error; fsdev = cp_node->nn_nandfsdev; /* Get snapshot data */ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) { brelse(bp); return (ENOENT); } if ((cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) { brelse(bp); return (EINVAL); } brelse(bp); /* Get list from header */ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnh = (struct nandfs_cpfile_header *) bp->b_data; list = &cnh->ch_snapshot_list; prev = list->ssl_prev; brelse(bp); prev_blk = ~(0); curr = 0; while (prev > cno) { curr = prev; nandfs_checkpoint_blk_offset(fsdev, prev, &prev_blk, &offset); error = nandfs_bread(cp_node, prev_blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; prev = list->ssl_prev; brelse(bp); } if (curr == 0) { nandfs_bread(cp_node, 0, NOCRED, 0, &bp); cnh = (struct nandfs_cpfile_header *) bp->b_data; list = &cnh->ch_snapshot_list; } else { nandfs_checkpoint_blk_offset(fsdev, curr, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; } list->ssl_prev = cno; error = nandfs_dirty_buf(bp, 0); if (error) return (error); /* Update snapshot for cno */ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; list->ssl_prev = prev; list->ssl_next = curr; cnp->cp_flags |= NANDFS_CHECKPOINT_SNAPSHOT; nandfs_dirty_buf(bp, 1); if (prev == 0) { nandfs_bread(cp_node, 0, NOCRED, 0, &bp); cnh = (struct nandfs_cpfile_header *) bp->b_data; list = &cnh->ch_snapshot_list; } else { /* Update snapshot list for prev */ nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; } list->ssl_next = cno; nandfs_dirty_buf(bp, 1); /* Update header */ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnh = (struct nandfs_cpfile_header *) bp->b_data; cnh->ch_nsnapshots++; nandfs_dirty_buf(bp, 1); return (0); } static int nandfs_cp_clr_snapshot(struct nandfs_node *cp_node, uint64_t cno) { struct nandfs_device *fsdev; struct nandfs_cpfile_header *cnh; struct nandfs_checkpoint *cnp; struct nandfs_snapshot_list *list; struct buf *bp; uint64_t blk, offset, snapshot_cnt; uint64_t next, prev; int error; fsdev = cp_node->nn_nandfsdev; /* Get snapshot data */ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); if (cnp->cp_flags & NANDFS_CHECKPOINT_INVALID) { brelse(bp); return (ENOENT); } if (!(cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT)) { brelse(bp); return (EINVAL); } list = &cnp->cp_snapshot_list; next = list->ssl_next; prev = list->ssl_prev; brelse(bp); /* Get previous snapshot */ if (prev != 0) { nandfs_checkpoint_blk_offset(fsdev, prev, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; } else { nandfs_bread(cp_node, 0, NOCRED, 0, &bp); cnh = (struct nandfs_cpfile_header *) bp->b_data; list = &cnh->ch_snapshot_list; } list->ssl_next = next; error = nandfs_dirty_buf(bp, 0); if (error) return (error); /* Get next snapshot */ if (next != 0) { nandfs_checkpoint_blk_offset(fsdev, next, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; } else { nandfs_bread(cp_node, 0, NOCRED, 0, &bp); cnh = (struct nandfs_cpfile_header *) bp->b_data; list = &cnh->ch_snapshot_list; } list->ssl_prev = prev; nandfs_dirty_buf(bp, 1); /* Update snapshot list for cno */ nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); error = nandfs_bread(cp_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); list = &cnp->cp_snapshot_list; list->ssl_prev = 0; list->ssl_next = 0; cnp->cp_flags &= !NANDFS_CHECKPOINT_SNAPSHOT; nandfs_dirty_buf(bp, 1); /* Update header */ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnh = (struct nandfs_cpfile_header *) bp->b_data; snapshot_cnt = cnh->ch_nsnapshots; snapshot_cnt--; cnh->ch_nsnapshots = snapshot_cnt; nandfs_dirty_buf(bp, 1); return (0); } int nandfs_chng_cpmode(struct nandfs_node *node, struct nandfs_cpmode *ncpm) { struct nandfs_device *fsdev; uint64_t cno = ncpm->ncpm_cno; int mode = ncpm->ncpm_mode; int ret; fsdev = node->nn_nandfsdev; VOP_LOCK(NTOV(node), LK_EXCLUSIVE); switch (mode) { case NANDFS_CHECKPOINT: if (nandfs_cp_mounted(fsdev, cno)) { ret = EBUSY; } else ret = nandfs_cp_clr_snapshot(node, cno); break; case NANDFS_SNAPSHOT: ret = nandfs_cp_set_snapshot(node, cno); break; default: ret = EINVAL; break; } VOP_UNLOCK(NTOV(node), 0); return (ret); } static void nandfs_cpinfo_fill(struct nandfs_checkpoint *cnp, struct nandfs_cpinfo *nci) { nci->nci_flags = cnp->cp_flags; nci->nci_pad = 0; nci->nci_cno = cnp->cp_cno; nci->nci_create = cnp->cp_create; nci->nci_nblk_inc = cnp->cp_nblk_inc; nci->nci_blocks_count = cnp->cp_blocks_count; nci->nci_next = cnp->cp_snapshot_list.ssl_next; DPRINTF(CPFILE, ("%s: cn:%#jx ctime:%#jx\n", __func__, (uintmax_t)cnp->cp_cno, (uintmax_t)cnp->cp_create)); } static int nandfs_get_cpinfo_cp(struct nandfs_node *node, uint64_t cno, struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs) { struct nandfs_device *fsdev; struct buf *bp; uint64_t blk, offset, last_cno, i; uint16_t remaining; int error; #ifdef INVARIANTS uint64_t testblk, testoffset; #endif if (cno == 0) { return (ENOENT); } if (mnmembs < 1) { return (EINVAL); } fsdev = node->nn_nandfsdev; last_cno = fsdev->nd_last_cno; DPRINTF(CPFILE, ("%s: cno:%#jx mnmembs: %#jx last:%#jx\n", __func__, (uintmax_t)cno, (uintmax_t)mnmembs, (uintmax_t)fsdev->nd_last_cno)); /* * do { * get block * read checkpoints until we hit last checkpoint, end of block or * requested number * } while (last read checkpoint <= last checkpoint on fs && * read checkpoints < request number); */ *nmembs = i = 0; do { nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); remaining = nandfs_checkpoint_blk_remaining(fsdev, cno, blk, offset); error = nandfs_bread(node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } while (cno <= last_cno && i < mnmembs && remaining) { #ifdef INVARIANTS nandfs_checkpoint_blk_offset(fsdev, cno, &testblk, &testoffset); KASSERT(testblk == blk, ("testblk != blk")); KASSERT(testoffset == offset, ("testoffset != offset")); #endif DPRINTF(CPFILE, ("%s: cno %#jx\n", __func__, (uintmax_t)cno)); nandfs_cpinfo_fill((struct nandfs_checkpoint *) (bp->b_data + offset), nci); offset += nandfs_checkpoint_size(fsdev); i++; nci++; cno++; (*nmembs)++; remaining--; } brelse(bp); } while (cno <= last_cno && i < mnmembs); return (0); } static int nandfs_get_cpinfo_sp(struct nandfs_node *node, uint64_t cno, struct nandfs_cpinfo *nci, uint32_t mnmembs, uint32_t *nmembs) { struct nandfs_checkpoint *cnp; struct nandfs_cpfile_header *cnh; struct nandfs_device *fsdev; struct buf *bp = NULL; uint64_t curr = 0; uint64_t blk, offset, curr_cno; uint32_t flag; int i, error; if (cno == 0 || cno == ~(0)) return (ENOENT); fsdev = node->nn_nandfsdev; curr_cno = cno; if (nmembs) *nmembs = 0; if (curr_cno == 1) { /* Get list from header */ error = nandfs_bread(node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } cnh = (struct nandfs_cpfile_header *) bp->b_data; curr_cno = cnh->ch_snapshot_list.ssl_next; brelse(bp); bp = NULL; /* No snapshots */ if (curr_cno == 0) return (0); } for (i = 0; i < mnmembs; i++, nci++) { nandfs_checkpoint_blk_offset(fsdev, curr_cno, &blk, &offset); if (i == 0 || curr != blk) { if (bp) brelse(bp); error = nandfs_bread(node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (ENOENT); } curr = blk; } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); flag = cnp->cp_flags; if (!(flag & NANDFS_CHECKPOINT_SNAPSHOT) || (flag & NANDFS_CHECKPOINT_INVALID)) break; nci->nci_flags = flag; nci->nci_pad = 0; nci->nci_cno = cnp->cp_cno; nci->nci_create = cnp->cp_create; nci->nci_nblk_inc = cnp->cp_nblk_inc; nci->nci_blocks_count = cnp->cp_blocks_count; nci->nci_next = cnp->cp_snapshot_list.ssl_next; if (nmembs) (*nmembs)++; curr_cno = nci->nci_next; if (!curr_cno) break; } brelse(bp); return (0); } int nandfs_get_cpinfo(struct nandfs_node *node, uint64_t cno, uint16_t flags, struct nandfs_cpinfo *nci, uint32_t nmembs, uint32_t *nnmembs) { int error; VOP_LOCK(NTOV(node), LK_EXCLUSIVE); switch (flags) { case NANDFS_CHECKPOINT: error = nandfs_get_cpinfo_cp(node, cno, nci, nmembs, nnmembs); break; case NANDFS_SNAPSHOT: error = nandfs_get_cpinfo_sp(node, cno, nci, nmembs, nnmembs); break; default: error = EINVAL; break; } VOP_UNLOCK(NTOV(node), 0); return (error); } int nandfs_get_cpinfo_ioctl(struct nandfs_node *node, struct nandfs_argv *nargv) { struct nandfs_cpinfo *nci; uint64_t cno = nargv->nv_index; void *buf = (void *)((uintptr_t)nargv->nv_base); uint16_t flags = nargv->nv_flags; uint32_t nmembs = 0; int error; if (nargv->nv_nmembs > NANDFS_CPINFO_MAX) return (EINVAL); nci = malloc(sizeof(struct nandfs_cpinfo) * nargv->nv_nmembs, M_NANDFSTEMP, M_WAITOK | M_ZERO); error = nandfs_get_cpinfo(node, cno, flags, nci, nargv->nv_nmembs, &nmembs); if (error == 0) { nargv->nv_nmembs = nmembs; error = copyout(nci, buf, sizeof(struct nandfs_cpinfo) * nmembs); } free(nci, M_NANDFSTEMP); return (error); } int nandfs_delete_cp(struct nandfs_node *node, uint64_t start, uint64_t end) { struct nandfs_checkpoint *cnp; struct nandfs_device *fsdev; struct buf *bp; uint64_t cno = start, blk, offset; int error; DPRINTF(CPFILE, ("%s: delete cno %jx-%jx\n", __func__, start, end)); VOP_LOCK(NTOV(node), LK_EXCLUSIVE); fsdev = node->nn_nandfsdev; for (cno = start; cno <= end; cno++) { if (!cno) continue; nandfs_checkpoint_blk_offset(fsdev, cno, &blk, &offset); error = nandfs_bread(node, blk, NOCRED, 0, &bp); if (error) { VOP_UNLOCK(NTOV(node), 0); brelse(bp); return (error); } cnp = (struct nandfs_checkpoint *)(bp->b_data + offset); if (cnp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) { brelse(bp); VOP_UNLOCK(NTOV(node), 0); return (0); } cnp->cp_flags |= NANDFS_CHECKPOINT_INVALID; error = nandfs_dirty_buf(bp, 0); if (error) return (error); } VOP_UNLOCK(NTOV(node), 0); return (0); } int nandfs_make_snap(struct nandfs_device *fsdev, uint64_t *cno) { struct nandfs_cpmode cpm; int error; *cno = cpm.ncpm_cno = fsdev->nd_last_cno; cpm.ncpm_mode = NANDFS_SNAPSHOT; error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm); return (error); } int nandfs_delete_snap(struct nandfs_device *fsdev, uint64_t cno) { struct nandfs_cpmode cpm; int error; cpm.ncpm_cno = cno; cpm.ncpm_mode = NANDFS_CHECKPOINT; error = nandfs_chng_cpmode(fsdev->nd_cp_node, &cpm); return (error); } int nandfs_get_cpstat(struct nandfs_node *cp_node, struct nandfs_cpstat *ncp) { struct nandfs_device *fsdev; struct nandfs_cpfile_header *cnh; struct buf *bp; int error; VOP_LOCK(NTOV(cp_node), LK_EXCLUSIVE); fsdev = cp_node->nn_nandfsdev; /* Get header */ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (error); } cnh = (struct nandfs_cpfile_header *) bp->b_data; ncp->ncp_cno = fsdev->nd_last_cno; ncp->ncp_ncps = cnh->ch_ncheckpoints; ncp->ncp_nss = cnh->ch_nsnapshots; DPRINTF(CPFILE, ("%s: cno:%#jx ncps:%#jx nss:%#jx\n", __func__, ncp->ncp_cno, ncp->ncp_ncps, ncp->ncp_nss)); brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (0); } Index: head/sys/fs/nandfs/nandfs_dat.c =================================================================== --- head/sys/fs/nandfs/nandfs_dat.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_dat.c (revision 326268) @@ -1,344 +1,346 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int nandfs_vblock_alloc(struct nandfs_device *nandfsdev, nandfs_daddr_t *vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint64_t start; uint32_t entry; int locked, error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; start = nandfsdev->nd_last_cno + 1; locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); if (!locked) VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = 0; /* Alloc vblock number */ error = nandfs_find_free_entry(mdt, dat, &req); if (error) { nandfs_error("%s: cannot find free vblk entry\n", __func__); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Read/create buffer */ error = nandfs_get_entry_block(mdt, dat, &req, &entry, 1); if (error) { nandfs_error("%s: cannot get free vblk entry\n", __func__); nandfs_abort_entry(&req); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Fill out vblock data */ dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; dat_entry[entry].de_start = start; dat_entry[entry].de_end = UINTMAX_MAX; dat_entry[entry].de_blocknr = 0; /* Commit allocation */ error = nandfs_alloc_entry(mdt, &req); if (error) { nandfs_error("%s: cannot get free vblk entry\n", __func__); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } /* Return allocated vblock */ *vblock = req.entrynum; DPRINTF(DAT, ("%s: allocated vblock %#jx\n", __func__, (uintmax_t)*vblock)); if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } int nandfs_vblock_assign(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock, nandfs_lbn_t block) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint32_t entry; int locked, error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); if (!locked) VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = vblock; error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0); if (!error) { dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; dat_entry[entry].de_blocknr = block; DPRINTF(DAT, ("%s: assing vblock %jx->%jx\n", __func__, (uintmax_t)vblock, (uintmax_t)block)); /* * It is mostly called from syncer() so * we want to force making buf dirty */ error = nandfs_dirty_buf(req.bp_entry, 1); } if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } int nandfs_vblock_end(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint64_t end; uint32_t entry; int locked, error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; end = nandfsdev->nd_last_cno; locked = NANDFS_VOP_ISLOCKED(NTOV(dat)); if (!locked) VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = vblock; error = nandfs_get_entry_block(mdt, dat, &req, &entry, 0); if (!error) { dat_entry = (struct nandfs_dat_entry *) req.bp_entry->b_data; dat_entry[entry].de_end = end; DPRINTF(DAT, ("%s: end vblock %#jx at checkpoint %#jx\n", __func__, (uintmax_t)vblock, (uintmax_t)end)); /* * It is mostly called from syncer() so * we want to force making buf dirty */ error = nandfs_dirty_buf(req.bp_entry, 1); } if (!locked) VOP_UNLOCK(NTOV(dat), 0); return (error); } int nandfs_vblock_free(struct nandfs_device *nandfsdev, nandfs_daddr_t vblock) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; int error; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); req.entrynum = vblock; error = nandfs_find_entry(mdt, dat, &req); if (!error) { DPRINTF(DAT, ("%s: vblk %#jx\n", __func__, (uintmax_t)vblock)); nandfs_free_entry(mdt, &req); } VOP_UNLOCK(NTOV(dat), 0); return (error); } int nandfs_get_dat_vinfo_ioctl(struct nandfs_device *nandfsdev, struct nandfs_argv *nargv) { struct nandfs_vinfo *vinfo; size_t size; int error; if (nargv->nv_nmembs > NANDFS_VINFO_MAX) return (EINVAL); size = sizeof(struct nandfs_vinfo) * nargv->nv_nmembs; vinfo = malloc(size, M_NANDFSTEMP, M_WAITOK|M_ZERO); error = copyin((void *)(uintptr_t)nargv->nv_base, vinfo, size); if (error) { free(vinfo, M_NANDFSTEMP); return (error); } error = nandfs_get_dat_vinfo(nandfsdev, vinfo, nargv->nv_nmembs); if (error == 0) error = copyout(vinfo, (void *)(uintptr_t)nargv->nv_base, size); free(vinfo, M_NANDFSTEMP); return (error); } int nandfs_get_dat_vinfo(struct nandfs_device *nandfsdev, struct nandfs_vinfo *vinfo, uint32_t nmembs) { struct nandfs_node *dat; struct nandfs_mdt *mdt; struct nandfs_alloc_request req; struct nandfs_dat_entry *dat_entry; uint32_t i, idx; int error = 0; dat = nandfsdev->nd_dat_node; mdt = &nandfsdev->nd_dat_mdt; DPRINTF(DAT, ("%s: nmembs %#x\n", __func__, nmembs)); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { req.entrynum = vinfo[i].nvi_vblocknr; error = nandfs_get_entry_block(mdt, dat,&req, &idx, 0); if (error) break; dat_entry = ((struct nandfs_dat_entry *) req.bp_entry->b_data); vinfo[i].nvi_start = dat_entry[idx].de_start; vinfo[i].nvi_end = dat_entry[idx].de_end; vinfo[i].nvi_blocknr = dat_entry[idx].de_blocknr; DPRINTF(DAT, ("%s: vinfo: %jx[%jx-%jx]->%jx\n", __func__, vinfo[i].nvi_vblocknr, vinfo[i].nvi_start, vinfo[i].nvi_end, vinfo[i].nvi_blocknr)); brelse(req.bp_entry); } VOP_UNLOCK(NTOV(dat), 0); return (error); } int nandfs_get_dat_bdescs_ioctl(struct nandfs_device *nffsdev, struct nandfs_argv *nargv) { struct nandfs_bdesc *bd; size_t size; int error; size = nargv->nv_nmembs * sizeof(struct nandfs_bdesc); bd = malloc(size, M_NANDFSTEMP, M_WAITOK); error = copyin((void *)(uintptr_t)nargv->nv_base, bd, size); if (error) { free(bd, M_NANDFSTEMP); return (error); } error = nandfs_get_dat_bdescs(nffsdev, bd, nargv->nv_nmembs); if (error == 0) error = copyout(bd, (void *)(uintptr_t)nargv->nv_base, size); free(bd, M_NANDFSTEMP); return (error); } int nandfs_get_dat_bdescs(struct nandfs_device *nffsdev, struct nandfs_bdesc *bd, uint32_t nmembs) { struct nandfs_node *dat_node; uint64_t map; uint32_t i; int error = 0; dat_node = nffsdev->nd_dat_node; VOP_LOCK(NTOV(dat_node), LK_EXCLUSIVE); for (i = 0; i < nmembs; i++) { DPRINTF(CLEAN, ("%s: bd ino:%#jx oblk:%#jx blocknr:%#jx off:%#jx\n", __func__, (uintmax_t)bd[i].bd_ino, (uintmax_t)bd[i].bd_oblocknr, (uintmax_t)bd[i].bd_blocknr, (uintmax_t)bd[i].bd_offset)); error = nandfs_bmap_lookup(dat_node, bd[i].bd_offset, &map); if (error) break; bd[i].bd_blocknr = map; } VOP_UNLOCK(NTOV(dat_node), 0); return (error); } Index: head/sys/fs/nandfs/nandfs_dir.c =================================================================== --- head/sys/fs/nandfs/nandfs_dir.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_dir.c (revision 326268) @@ -1,314 +1,316 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nandfs_mount.h" #include "nandfs.h" #include "nandfs_subr.h" int nandfs_add_dirent(struct vnode *dvp, uint64_t ino, char *nameptr, long namelen, uint8_t type) { struct nandfs_node *dir_node = VTON(dvp); struct nandfs_dir_entry *dirent, *pdirent; uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize; uint64_t filesize = dir_node->nn_inode.i_size; uint64_t inode_blks = dir_node->nn_inode.i_blocks; uint32_t off, rest; uint8_t *pos; struct buf *bp; int error; pdirent = NULL; bp = NULL; if (inode_blks) { error = nandfs_bread(dir_node, inode_blks - 1, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } pos = bp->b_data; off = 0; while (off < blocksize) { pdirent = (struct nandfs_dir_entry *) (pos + off); if (!pdirent->rec_len) { pdirent = NULL; break; } off += pdirent->rec_len; } if (pdirent) rest = pdirent->rec_len - NANDFS_DIR_REC_LEN(pdirent->name_len); else rest = blocksize; if (rest < NANDFS_DIR_REC_LEN(namelen)) { /* Do not update pdirent as new block is created */ pdirent = NULL; brelse(bp); /* Set to NULL to create new */ bp = NULL; filesize += rest; } } /* If no bp found create new */ if (!bp) { error = nandfs_bcreate(dir_node, inode_blks, NOCRED, 0, &bp); if (error) return (error); off = 0; pos = bp->b_data; } /* Modify pdirent if exists */ if (pdirent) { DPRINTF(LOOKUP, ("modify pdirent %p\n", pdirent)); /* modify last de */ off -= pdirent->rec_len; pdirent->rec_len = NANDFS_DIR_REC_LEN(pdirent->name_len); off += pdirent->rec_len; } /* Create new dirent */ dirent = (struct nandfs_dir_entry *) (pos + off); dirent->rec_len = blocksize - off; dirent->inode = ino; dirent->name_len = namelen; memset(dirent->name, 0, NANDFS_DIR_NAME_LEN(namelen)); memcpy(dirent->name, nameptr, namelen); dirent->file_type = type; filesize += NANDFS_DIR_REC_LEN(dirent->name_len); DPRINTF(LOOKUP, ("create dir_entry '%.*s' at %p with size %x " "new filesize: %jx\n", (int)namelen, dirent->name, dirent, dirent->rec_len, (uintmax_t)filesize)); error = nandfs_dirty_buf(bp, 0); if (error) return (error); dir_node->nn_inode.i_size = filesize; dir_node->nn_flags |= IN_CHANGE | IN_UPDATE; vnode_pager_setsize(dvp, filesize); return (0); } int nandfs_remove_dirent(struct vnode *dvp, struct nandfs_node *node, struct componentname *cnp) { struct nandfs_node *dir_node; struct nandfs_dir_entry *dirent, *pdirent; struct buf *bp; uint64_t filesize, blocknr, ino, offset; uint32_t blocksize, limit, off; uint16_t newsize; uint8_t *pos; int error, found; dir_node = VTON(dvp); filesize = dir_node->nn_inode.i_size; if (!filesize) return (0); if (node) { offset = node->nn_diroff; ino = node->nn_ino; } else { offset = dir_node->nn_diroff; ino = NANDFS_WHT_INO; } dirent = pdirent = NULL; blocksize = dir_node->nn_nandfsdev->nd_blocksize; blocknr = offset / blocksize; DPRINTF(LOOKUP, ("rm direntry dvp %p node %p ino %#jx at off %#jx\n", dvp, node, (uintmax_t)ino, (uintmax_t)offset)); error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } pos = bp->b_data; off = 0; found = 0; limit = offset % blocksize; pdirent = (struct nandfs_dir_entry *) bp->b_data; while (off <= limit) { dirent = (struct nandfs_dir_entry *) (pos + off); if ((off == limit) && (dirent->inode == ino)) { found = 1; break; } if (dirent->inode != 0) pdirent = dirent; off += dirent->rec_len; } if (!found) { nandfs_error("cannot find entry to remove"); brelse(bp); return (error); } DPRINTF(LOOKUP, ("rm dirent ino %#jx at %#x with size %#x\n", (uintmax_t)dirent->inode, off, dirent->rec_len)); newsize = (uintptr_t)dirent - (uintptr_t)pdirent; newsize += dirent->rec_len; pdirent->rec_len = newsize; dirent->inode = 0; error = nandfs_dirty_buf(bp, 0); if (error) return (error); dir_node->nn_flags |= IN_CHANGE | IN_UPDATE; /* If last one modify filesize */ if ((offset + NANDFS_DIR_REC_LEN(dirent->name_len)) == filesize) { filesize = blocknr * blocksize + ((uintptr_t)pdirent - (uintptr_t)pos) + NANDFS_DIR_REC_LEN(pdirent->name_len); dir_node->nn_inode.i_size = filesize; } return (0); } int nandfs_update_parent_dir(struct vnode *dvp, uint64_t newparent) { struct nandfs_dir_entry *dirent; struct nandfs_node *dir_node; struct buf *bp; int error; dir_node = VTON(dvp); error = nandfs_bread(dir_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } dirent = (struct nandfs_dir_entry *)bp->b_data; dirent->inode = newparent; error = nandfs_dirty_buf(bp, 0); if (error) return (error); return (0); } int nandfs_update_dirent(struct vnode *dvp, struct nandfs_node *fnode, struct nandfs_node *tnode) { struct nandfs_node *dir_node; struct nandfs_dir_entry *dirent; struct buf *bp; uint64_t file_size, blocknr; uint32_t blocksize, off; uint8_t *pos; int error; dir_node = VTON(dvp); file_size = dir_node->nn_inode.i_size; if (!file_size) return (0); DPRINTF(LOOKUP, ("chg direntry dvp %p ino %#jx to in %#jx at off %#jx\n", dvp, (uintmax_t)tnode->nn_ino, (uintmax_t)fnode->nn_ino, (uintmax_t)tnode->nn_diroff)); blocksize = dir_node->nn_nandfsdev->nd_blocksize; blocknr = tnode->nn_diroff / blocksize; off = tnode->nn_diroff % blocksize; error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } pos = bp->b_data; dirent = (struct nandfs_dir_entry *) (pos + off); KASSERT((dirent->inode == tnode->nn_ino), ("direntry mismatch")); dirent->inode = fnode->nn_ino; error = nandfs_dirty_buf(bp, 0); if (error) return (error); return (0); } int nandfs_init_dir(struct vnode *dvp, uint64_t ino, uint64_t parent_ino) { if (nandfs_add_dirent(dvp, parent_ino, "..", 2, DT_DIR) || nandfs_add_dirent(dvp, ino, ".", 1, DT_DIR)) { nandfs_error("%s: cannot initialize dir ino:%jd(pino:%jd)\n", __func__, ino, parent_ino); return (-1); } return (0); } Index: head/sys/fs/nandfs/nandfs_fs.h =================================================================== --- head/sys/fs/nandfs/nandfs_fs.h (revision 326267) +++ head/sys/fs/nandfs/nandfs_fs.h (revision 326268) @@ -1,565 +1,567 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * Original definitions written by Koji Sato * and Ryusuke Konishi * From: NetBSD: nandfs_fs.h,v 1.1 2009/07/18 16:31:42 reinoud * * $FreeBSD$ */ #ifndef _NANDFS_FS_H #define _NANDFS_FS_H #include #define MNINDIR(fsdev) ((fsdev)->nd_blocksize / sizeof(nandfs_daddr_t)) /* * Inode structure. There are a few dedicated inode numbers that are * defined here first. */ #define NANDFS_WHT_INO 1 /* Whiteout ino */ #define NANDFS_ROOT_INO 2 /* Root file inode */ #define NANDFS_DAT_INO 3 /* DAT file */ #define NANDFS_CPFILE_INO 4 /* checkpoint file */ #define NANDFS_SUFILE_INO 5 /* segment usage file */ #define NANDFS_IFILE_INO 6 /* ifile */ #define NANDFS_GC_INO 7 /* Cleanerd node */ #define NANDFS_ATIME_INO 8 /* Atime file (reserved) */ #define NANDFS_XATTR_INO 9 /* Xattribute file (reserved) */ #define NANDFS_SKETCH_INO 10 /* Sketch file (obsolete) */ #define NANDFS_USER_INO 11 /* First user's file inode number */ #define NANDFS_SYS_NODE(ino) \ (((ino) >= NANDFS_DAT_INO) && ((ino) <= NANDFS_GC_INO)) #define NANDFS_NDADDR 12 /* Direct addresses in inode. */ #define NANDFS_NIADDR 3 /* Indirect addresses in inode. */ typedef int64_t nandfs_daddr_t; typedef int64_t nandfs_lbn_t; struct nandfs_inode { uint64_t i_blocks; /* 0: size in device blocks */ uint64_t i_size; /* 8: size in bytes */ uint64_t i_ctime; /* 16: creation time in seconds */ uint64_t i_mtime; /* 24: modification time in seconds part*/ uint32_t i_ctime_nsec; /* 32: creation time nanoseconds part */ uint32_t i_mtime_nsec; /* 36: modification time in nanoseconds */ uint32_t i_uid; /* 40: user id */ uint32_t i_gid; /* 44: group id */ uint16_t i_mode; /* 48: file mode */ uint16_t i_links_count; /* 50: number of references to the inode*/ uint32_t i_flags; /* 52: NANDFS_*_FL flags */ nandfs_daddr_t i_special; /* 56: special */ nandfs_daddr_t i_db[NANDFS_NDADDR]; /* 64: Direct disk blocks. */ nandfs_daddr_t i_ib[NANDFS_NIADDR]; /* 160: Indirect disk blocks. */ uint64_t i_xattr; /* 184: reserved for extended attributes*/ uint32_t i_generation; /* 192: file generation for NFS */ uint32_t i_pad[15]; /* 196: make it 64 bits aligned */ }; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_inode) == 256); #endif /* * Each checkpoint/snapshot has a super root. * * The super root holds the inodes of the three system files: `dat', `cp' and * 'su' files. All other FS state is defined by those. * * It is CRC checksum'ed and time stamped. */ struct nandfs_super_root { uint32_t sr_sum; /* check-sum */ uint16_t sr_bytes; /* byte count of this structure */ uint16_t sr_flags; /* reserved for flags */ uint64_t sr_nongc_ctime; /* timestamp, not for cleaner(?) */ struct nandfs_inode sr_dat; /* DAT, virt->phys translation inode */ struct nandfs_inode sr_cpfile; /* CP, checkpoints inode */ struct nandfs_inode sr_sufile; /* SU, segment usage inode */ }; #define NANDFS_SR_MDT_OFFSET(inode_size, i) \ ((uint32_t)&((struct nandfs_super_root *)0)->sr_dat + \ (inode_size) * (i)) #define NANDFS_SR_DAT_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 0) #define NANDFS_SR_CPFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 1) #define NANDFS_SR_SUFILE_OFFSET(inode_size) NANDFS_SR_MDT_OFFSET(inode_size, 2) #define NANDFS_SR_BYTES (sizeof(struct nandfs_super_root)) /* * The superblock describes the basic structure and mount history. It also * records some sizes of structures found on the disc for sanity checks. * * The superblock is stored at two places: NANDFS_SB_OFFSET_BYTES and * NANDFS_SB2_OFFSET_BYTES. */ /* File system states stored on media in superblock's sbp->s_state */ #define NANDFS_VALID_FS 0x0001 /* cleanly unmounted and all is ok */ #define NANDFS_ERROR_FS 0x0002 /* there were errors detected, fsck */ #define NANDFS_RESIZE_FS 0x0004 /* resize required, XXX unknown flag*/ #define NANDFS_MOUNT_STATE_BITS "\20\1VALID_FS\2ERROR_FS\3RESIZE_FS" /* * Brief description of control structures: * * NANDFS_NFSAREAS first blocks contain fsdata and some amount of super blocks. * Simple round-robin policy is used in order to choose which block will * contain new super block. * * Simple case with 2 blocks: * 1: fsdata sblock1 [sblock3 [sblock5 ..]] * 2: fsdata sblock2 [sblock4 [sblock6 ..]] */ struct nandfs_fsdata { uint16_t f_magic; uint16_t f_bytes; uint32_t f_sum; /* checksum of fsdata */ uint32_t f_rev_level; /* major disk format revision */ uint64_t f_ctime; /* creation time (execution time of newfs) */ /* Block size represented as: blocksize = 1 << (f_log_block_size + 10) */ uint32_t f_log_block_size; uint16_t f_inode_size; /* size of an inode */ uint16_t f_dat_entry_size; /* size of a dat entry */ uint16_t f_checkpoint_size; /* size of a checkpoint */ uint16_t f_segment_usage_size; /* size of a segment usage */ uint16_t f_sbbytes; /* byte count of CRC calculation for super blocks. s_reserved is excluded! */ uint16_t f_errors; /* behaviour on detecting errors */ uint32_t f_erasesize; uint64_t f_nsegments; /* number of segm. in filesystem */ nandfs_daddr_t f_first_data_block; /* 1st seg disk block number */ uint32_t f_blocks_per_segment; /* number of blocks per segment */ uint32_t f_r_segments_percentage; /* reserved segments percentage */ struct uuid f_uuid; /* 128-bit uuid for volume */ char f_volume_name[16]; /* volume name */ uint32_t f_pad[104]; } __packed; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_fsdata) == 512); #endif struct nandfs_super_block { uint16_t s_magic; /* magic value for identification */ uint32_t s_sum; /* check sum of super block */ uint64_t s_last_cno; /* last checkpoint number */ uint64_t s_last_pseg; /* addr part. segm. written last */ uint64_t s_last_seq; /* seq.number of seg written last */ uint64_t s_free_blocks_count; /* free blocks count */ uint64_t s_mtime; /* mount time */ uint64_t s_wtime; /* write time */ uint16_t s_state; /* file system state */ char s_last_mounted[64]; /* directory where last mounted */ uint32_t s_c_interval; /* commit interval of segment */ uint32_t s_c_block_max; /* threshold of data amount for the segment construction */ uint32_t s_reserved[32]; /* padding to end of the block */ } __packed; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_super_block) == 256); #endif #define NANDFS_FSDATA_MAGIC 0xf8da #define NANDFS_SUPER_MAGIC 0x8008 #define NANDFS_NFSAREAS 4 #define NANDFS_DATA_OFFSET_BYTES(esize) (NANDFS_NFSAREAS * (esize)) #define NANDFS_SBLOCK_OFFSET_BYTES (sizeof(struct nandfs_fsdata)) #define NANDFS_DEF_BLOCKSIZE 4096 #define NANDFS_MIN_BLOCKSIZE 512 #define NANDFS_DEF_ERASESIZE (2 << 16) #define NANDFS_MIN_SEGSIZE NANDFS_DEF_ERASESIZE #define NANDFS_CURRENT_REV 9 /* current major revision */ #define NANDFS_FSDATA_CRC_BYTES offsetof(struct nandfs_fsdata, f_pad) /* Bytes count of super_block for CRC-calculation */ #define NANDFS_SB_BYTES offsetof(struct nandfs_super_block, s_reserved) /* Maximal count of links to a file */ #define NANDFS_LINK_MAX 32000 /* * Structure of a directory entry. * * Note that they can't span blocks; the rec_len fills out. */ #define NANDFS_NAME_LEN 255 struct nandfs_dir_entry { uint64_t inode; /* inode number */ uint16_t rec_len; /* directory entry length */ uint8_t name_len; /* name length */ uint8_t file_type; char name[NANDFS_NAME_LEN]; /* file name */ char pad; }; /* * NANDFS_DIR_PAD defines the directory entries boundaries * * NOTE: It must be a multiple of 8 */ #define NANDFS_DIR_PAD 8 #define NANDFS_DIR_ROUND (NANDFS_DIR_PAD - 1) #define NANDFS_DIR_NAME_OFFSET (offsetof(struct nandfs_dir_entry, name)) #define NANDFS_DIR_REC_LEN(name_len) \ (((name_len) + NANDFS_DIR_NAME_OFFSET + NANDFS_DIR_ROUND) \ & ~NANDFS_DIR_ROUND) #define NANDFS_DIR_NAME_LEN(name_len) \ (NANDFS_DIR_REC_LEN(name_len) - NANDFS_DIR_NAME_OFFSET) /* * NiLFS/NANDFS devides the disc into fixed length segments. Each segment is * filled with one or more partial segments of variable lengths. * * Each partial segment has a segment summary header followed by updates of * files and optionally a super root. */ /* * Virtual to physical block translation information. For data blocks it maps * logical block number bi_blkoff to virtual block nr bi_vblocknr. For non * datablocks it is the virtual block number assigned to an indirect block * and has no bi_blkoff. The physical block number is the next * available data block in the partial segment after all the binfo's. */ struct nandfs_binfo_v { uint64_t bi_ino; /* file's inode */ uint64_t bi_vblocknr; /* assigned virtual block number */ uint64_t bi_blkoff; /* for file's logical block number */ }; /* * DAT allocation. For data blocks just the logical block number that maps on * the next available data block in the partial segment after the binfo's. */ struct nandfs_binfo_dat { uint64_t bi_ino; uint64_t bi_blkoff; /* DAT file's logical block number */ uint8_t bi_level; /* whether this is meta block */ uint8_t bi_pad[7]; }; #ifdef _KERNEL CTASSERT(sizeof(struct nandfs_binfo_v) == sizeof(struct nandfs_binfo_dat)); #endif /* Convenience union for both types of binfo's */ union nandfs_binfo { struct nandfs_binfo_v bi_v; struct nandfs_binfo_dat bi_dat; }; /* Indirect buffers path */ struct nandfs_indir { nandfs_daddr_t in_lbn; int in_off; }; /* The (partial) segment summary */ struct nandfs_segment_summary { uint32_t ss_datasum; /* CRC of complete data block */ uint32_t ss_sumsum; /* CRC of segment summary only */ uint32_t ss_magic; /* magic to identify segment summary */ uint16_t ss_bytes; /* size of segment summary structure */ uint16_t ss_flags; /* NANDFS_SS_* flags */ uint64_t ss_seq; /* sequence number of this segm. sum */ uint64_t ss_create; /* creation timestamp in seconds */ uint64_t ss_next; /* blocknumber of next segment */ uint32_t ss_nblocks; /* number of blocks used by summary */ uint32_t ss_nbinfos; /* number of binfo structures */ uint32_t ss_sumbytes; /* total size of segment summary */ uint32_t ss_pad; /* stream of binfo structures */ }; #define NANDFS_SEGSUM_MAGIC 0x8e680011 /* segment summary magic number */ /* Segment summary flags */ #define NANDFS_SS_LOGBGN 0x0001 /* begins a logical segment */ #define NANDFS_SS_LOGEND 0x0002 /* ends a logical segment */ #define NANDFS_SS_SR 0x0004 /* has super root */ #define NANDFS_SS_SYNDT 0x0008 /* includes data only updates */ #define NANDFS_SS_GC 0x0010 /* segment written for cleaner operation */ #define NANDFS_SS_FLAG_BITS "\20\1LOGBGN\2LOGEND\3SR\4SYNDT\5GC" /* Segment summary constrains */ #define NANDFS_SEG_MIN_BLOCKS 16 /* minimum number of blocks in a full segment */ #define NANDFS_PSEG_MIN_BLOCKS 2 /* minimum number of blocks in a partial segment */ #define NANDFS_MIN_NRSVSEGS 8 /* minimum number of reserved segments */ /* * Structure of DAT/inode file. * * A DAT file is divided into groups. The maximum number of groups is the * number of block group descriptors that fit into one block; this descriptor * only gives the number of free entries in the associated group. * * Each group has a block sized bitmap indicating if an entry is taken or * empty. Each bit stands for a DAT entry. * * The inode file has exactly the same format only the entries are inode * entries. */ struct nandfs_block_group_desc { uint32_t bg_nfrees; /* num. free entries in block group */ }; /* DAT entry in a super root's DAT file */ struct nandfs_dat_entry { uint64_t de_blocknr; /* block number */ uint64_t de_start; /* valid from checkpoint */ uint64_t de_end; /* valid till checkpoint */ uint64_t de_rsv; /* reserved for future use */ }; /* * Structure of CP file. * * A snapshot is just a checkpoint only it's protected against removal by the * cleaner. The snapshots are kept on a double linked list of checkpoints. */ struct nandfs_snapshot_list { uint64_t ssl_next; /* checkpoint nr. forward */ uint64_t ssl_prev; /* checkpoint nr. back */ }; /* Checkpoint entry structure */ struct nandfs_checkpoint { uint32_t cp_flags; /* NANDFS_CHECKPOINT_* flags */ uint32_t cp_checkpoints_count; /* ZERO, not used anymore? */ struct nandfs_snapshot_list cp_snapshot_list; /* list of snapshots */ uint64_t cp_cno; /* checkpoint number */ uint64_t cp_create; /* creation timestamp */ uint64_t cp_nblk_inc; /* number of blocks incremented */ uint64_t cp_blocks_count; /* reserved (might be deleted) */ struct nandfs_inode cp_ifile_inode; /* inode file inode */ }; /* Checkpoint flags */ #define NANDFS_CHECKPOINT_SNAPSHOT 1 #define NANDFS_CHECKPOINT_INVALID 2 #define NANDFS_CHECKPOINT_SKETCH 4 #define NANDFS_CHECKPOINT_MINOR 8 #define NANDFS_CHECKPOINT_BITS "\20\1SNAPSHOT\2INVALID\3SKETCH\4MINOR" /* Header of the checkpoint file */ struct nandfs_cpfile_header { uint64_t ch_ncheckpoints; /* number of checkpoints */ uint64_t ch_nsnapshots; /* number of snapshots */ struct nandfs_snapshot_list ch_snapshot_list; /* snapshot list */ }; #define NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET \ ((sizeof(struct nandfs_cpfile_header) + \ sizeof(struct nandfs_checkpoint) - 1) / \ sizeof(struct nandfs_checkpoint)) #define NANDFS_NOSEGMENT 0xffffffff /* * Structure of SU file. * * The segment usage file sums up how each of the segments are used. They are * indexed by their segment number. */ /* Segment usage entry */ struct nandfs_segment_usage { uint64_t su_lastmod; /* last modified timestamp */ uint32_t su_nblocks; /* number of blocks in segment */ uint32_t su_flags; /* NANDFS_SEGMENT_USAGE_* flags */ }; /* Segment usage flag */ #define NANDFS_SEGMENT_USAGE_ACTIVE 1 #define NANDFS_SEGMENT_USAGE_DIRTY 2 #define NANDFS_SEGMENT_USAGE_ERROR 4 #define NANDFS_SEGMENT_USAGE_GC 8 #define NANDFS_SEGMENT_USAGE_BITS "\20\1ACTIVE\2DIRTY\3ERROR" /* Header of the segment usage file */ struct nandfs_sufile_header { uint64_t sh_ncleansegs; /* number of segments marked clean */ uint64_t sh_ndirtysegs; /* number of segments marked dirty */ uint64_t sh_last_alloc; /* last allocated segment number */ }; #define NANDFS_SUFILE_FIRST_SEGMENT_USAGE_OFFSET \ ((sizeof(struct nandfs_sufile_header) + \ sizeof(struct nandfs_segment_usage) - 1) / \ sizeof(struct nandfs_segment_usage)) struct nandfs_seg_stat { uint64_t nss_nsegs; uint64_t nss_ncleansegs; uint64_t nss_ndirtysegs; uint64_t nss_ctime; uint64_t nss_nongc_ctime; uint64_t nss_prot_seq; }; enum { NANDFS_CHECKPOINT, NANDFS_SNAPSHOT }; #define NANDFS_CPINFO_MAX 512 struct nandfs_cpinfo { uint32_t nci_flags; uint32_t nci_pad; uint64_t nci_cno; uint64_t nci_create; uint64_t nci_nblk_inc; uint64_t nci_blocks_count; uint64_t nci_next; }; #define NANDFS_SEGMENTS_MAX 512 struct nandfs_suinfo { uint64_t nsi_num; uint64_t nsi_lastmod; uint32_t nsi_blocks; uint32_t nsi_flags; }; #define NANDFS_VINFO_MAX 512 struct nandfs_vinfo { uint64_t nvi_ino; uint64_t nvi_vblocknr; uint64_t nvi_start; uint64_t nvi_end; uint64_t nvi_blocknr; int nvi_alive; }; struct nandfs_cpmode { uint64_t ncpm_cno; uint32_t ncpm_mode; uint32_t ncpm_pad; }; struct nandfs_argv { uint64_t nv_base; uint32_t nv_nmembs; uint16_t nv_size; uint16_t nv_flags; uint64_t nv_index; }; struct nandfs_cpstat { uint64_t ncp_cno; uint64_t ncp_ncps; uint64_t ncp_nss; }; struct nandfs_period { uint64_t p_start; uint64_t p_end; }; struct nandfs_vdesc { uint64_t vd_ino; uint64_t vd_cno; uint64_t vd_vblocknr; struct nandfs_period vd_period; uint64_t vd_blocknr; uint64_t vd_offset; uint32_t vd_flags; uint32_t vd_pad; }; struct nandfs_bdesc { uint64_t bd_ino; uint64_t bd_oblocknr; uint64_t bd_blocknr; uint64_t bd_offset; uint32_t bd_level; uint32_t bd_alive; }; #ifndef _KERNEL #ifndef MNAMELEN #define MNAMELEN 1024 #endif #endif struct nandfs_fsinfo { struct nandfs_fsdata fs_fsdata; struct nandfs_super_block fs_super; char fs_dev[MNAMELEN]; }; #define NANDFS_MAX_MOUNTS 65535 #define NANDFS_IOCTL_GET_SUSTAT _IOR('N', 100, struct nandfs_seg_stat) #define NANDFS_IOCTL_CHANGE_CPMODE _IOWR('N', 101, struct nandfs_cpmode) #define NANDFS_IOCTL_GET_CPINFO _IOWR('N', 102, struct nandfs_argv) #define NANDFS_IOCTL_DELETE_CP _IOWR('N', 103, uint64_t[2]) #define NANDFS_IOCTL_GET_CPSTAT _IOR('N', 104, struct nandfs_cpstat) #define NANDFS_IOCTL_GET_SUINFO _IOWR('N', 105, struct nandfs_argv) #define NANDFS_IOCTL_GET_VINFO _IOWR('N', 106, struct nandfs_argv) #define NANDFS_IOCTL_GET_BDESCS _IOWR('N', 107, struct nandfs_argv) #define NANDFS_IOCTL_GET_FSINFO _IOR('N', 108, struct nandfs_fsinfo) #define NANDFS_IOCTL_MAKE_SNAP _IOWR('N', 109, uint64_t) #define NANDFS_IOCTL_DELETE_SNAP _IOWR('N', 110, uint64_t) #define NANDFS_IOCTL_SYNC _IOWR('N', 111, uint64_t) #endif /* _NANDFS_FS_H */ Index: head/sys/fs/nandfs/nandfs_ifile.c =================================================================== --- head/sys/fs/nandfs/nandfs_ifile.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_ifile.c (revision 326268) @@ -1,213 +1,215 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include int nandfs_node_create(struct nandfsmount *nmp, struct nandfs_node **node, uint16_t mode) { struct nandfs_alloc_request req; struct nandfs_device *nandfsdev; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct nandfs_inode *inode; struct vnode *vp; uint32_t entry; int error = 0; nandfsdev = nmp->nm_nandfsdev; mdt = &nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); VOP_LOCK(vp, LK_EXCLUSIVE); /* Allocate new inode in ifile */ req.entrynum = nandfsdev->nd_last_ino + 1; error = nandfs_find_free_entry(mdt, ifile, &req); if (error) { VOP_UNLOCK(vp, 0); return (error); } error = nandfs_get_entry_block(mdt, ifile, &req, &entry, 1); if (error) { VOP_UNLOCK(vp, 0); return (error); } /* Inode initialization */ inode = ((struct nandfs_inode *) req.bp_entry->b_data) + entry; nandfs_inode_init(inode, mode); error = nandfs_alloc_entry(mdt, &req); if (error) { VOP_UNLOCK(vp, 0); return (error); } VOP_UNLOCK(vp, 0); nandfsdev->nd_last_ino = req.entrynum; error = nandfs_get_node(nmp, req.entrynum, node); DPRINTF(IFILE, ("%s: node: %p ino: %#jx\n", __func__, node, (uintmax_t)((*node)->nn_ino))); return (error); } int nandfs_node_destroy(struct nandfs_node *node) { struct nandfs_alloc_request req; struct nandfsmount *nmp; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct vnode *vp; int error = 0; nmp = node->nn_nmp; req.entrynum = node->nn_ino; mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); DPRINTF(IFILE, ("%s: destroy node: %p ino: %#jx\n", __func__, node, (uintmax_t)node->nn_ino)); VOP_LOCK(vp, LK_EXCLUSIVE); error = nandfs_find_entry(mdt, ifile, &req); if (error) { nandfs_error("%s: finding entry error:%d node %p(%jx)", __func__, error, node, node->nn_ino); VOP_UNLOCK(vp, 0); return (error); } nandfs_inode_destroy(&node->nn_inode); error = nandfs_free_entry(mdt, &req); if (error) { nandfs_error("%s: freing entry error:%d node %p(%jx)", __func__, error, node, node->nn_ino); VOP_UNLOCK(vp, 0); return (error); } VOP_UNLOCK(vp, 0); DPRINTF(IFILE, ("%s: freed node %p ino %#jx\n", __func__, node, (uintmax_t)node->nn_ino)); return (error); } int nandfs_node_update(struct nandfs_node *node) { struct nandfs_alloc_request req; struct nandfsmount *nmp; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct nandfs_inode *inode; uint32_t index; int error = 0; nmp = node->nn_nmp; ifile = nmp->nm_ifile_node; ASSERT_VOP_LOCKED(NTOV(ifile), __func__); req.entrynum = node->nn_ino; mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; DPRINTF(IFILE, ("%s: node:%p ino:%#jx\n", __func__, &node->nn_inode, (uintmax_t)node->nn_ino)); error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0); if (error) { printf("nandfs_get_entry_block returned with ERROR=%d\n", error); return (error); } inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index; memcpy(inode, &node->nn_inode, sizeof(*inode)); error = nandfs_dirty_buf(req.bp_entry, 0); return (error); } int nandfs_get_node_entry(struct nandfsmount *nmp, struct nandfs_inode **inode, uint64_t ino, struct buf **bp) { struct nandfs_alloc_request req; struct nandfs_mdt *mdt; struct nandfs_node *ifile; struct vnode *vp; uint32_t index; int error = 0; req.entrynum = ino; mdt = &nmp->nm_nandfsdev->nd_ifile_mdt; ifile = nmp->nm_ifile_node; vp = NTOV(ifile); VOP_LOCK(vp, LK_EXCLUSIVE); error = nandfs_get_entry_block(mdt, ifile, &req, &index, 0); if (error) { VOP_UNLOCK(vp, 0); return (error); } *inode = ((struct nandfs_inode *) req.bp_entry->b_data) + index; *bp = req.bp_entry; VOP_UNLOCK(vp, 0); return (0); } Index: head/sys/fs/nandfs/nandfs_mount.h =================================================================== --- head/sys/fs/nandfs/nandfs_mount.h (revision 326267) +++ head/sys/fs/nandfs/nandfs_mount.h (revision 326268) @@ -1,50 +1,52 @@ /*- + * SPDX-License-Identifier: BSD-4-Clause + * * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed for the * NetBSD Project. See http://www.NetBSD.org/ for * information about NetBSD. * 4. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_mount.h,v 1.1 2009/07/18 16:31:42 reinoud * * $FreeBSD$ */ #ifndef _FS_NANDFS_NANDFS_MOUNT_H_ #define _FS_NANDFS_NANDFS_MOUNT_H_ /* * Arguments to mount NANDFS filingsystem. */ struct nandfs_args { char *fspec; /* mount specifier */ int64_t cpno; /* checkpoint number */ }; #endif /* !_FS_NANDFS_NANDFS_MOUNT_H_ */ Index: head/sys/fs/nandfs/nandfs_segment.c =================================================================== --- head/sys/fs/nandfs/nandfs_segment.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_segment.c (revision 326268) @@ -1,1311 +1,1313 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_ddb.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int nandfs_new_segment(struct nandfs_device *fsdev) { int error = 0; uint64_t new; error = nandfs_alloc_segment(fsdev, &new); if (!error) { fsdev->nd_seg_num = fsdev->nd_next_seg_num; fsdev->nd_next_seg_num = new; } DPRINTF(SYNC, ("%s: new segment %jx next %jx error %d\n", __func__, (uintmax_t)fsdev->nd_seg_num, (uintmax_t)new, error)); if (error) nandfs_error("%s: cannot create segment error %d\n", __func__, error); return (error); } static int create_segment(struct nandfs_seginfo *seginfo) { struct nandfs_segment *seg; struct nandfs_device *fsdev; struct nandfs_segment *prev; struct buf *bp; uint64_t start_block, curr; uint32_t blks_per_seg, nblocks; int error; fsdev = seginfo->fsdev; prev = seginfo->curseg; blks_per_seg = fsdev->nd_fsdata.f_blocks_per_segment; nblocks = fsdev->nd_last_segsum.ss_nblocks; if (!prev) { vfs_timestamp(&fsdev->nd_ts); /* Touch current segment */ error = nandfs_touch_segment(fsdev, fsdev->nd_seg_num); if (error) { nandfs_error("%s: cannot preallocate segment %jx\n", __func__, fsdev->nd_seg_num); return (error); } error = nandfs_touch_segment(fsdev, 0); if (error) { nandfs_error("%s: cannot dirty block with segment 0\n", __func__); return (error); } start_block = fsdev->nd_last_pseg + (uint64_t)nblocks; /* * XXX Hack */ if (blks_per_seg - (start_block % blks_per_seg) - 1 == 0) start_block++; curr = nandfs_get_segnum_of_block(fsdev, start_block); /* Allocate new segment if last one is full */ if (fsdev->nd_seg_num != curr) { error = nandfs_new_segment(fsdev); if (error) { nandfs_error("%s: cannot create new segment\n", __func__); return (error); } /* * XXX Hack */ nandfs_get_segment_range(fsdev, fsdev->nd_seg_num, &start_block, NULL); } } else { nandfs_get_segment_range(fsdev, fsdev->nd_next_seg_num, &start_block, NULL); /* Touch current segment and allocate and touch new one */ error = nandfs_new_segment(fsdev); if (error) { nandfs_error("%s: cannot create next segment\n", __func__); return (error); } /* Reiterate in case new buf is dirty */ seginfo->reiterate = 1; } /* Allocate and initialize nandfs_segment structure */ seg = malloc(sizeof(*seg), M_DEVBUF, M_WAITOK|M_ZERO); TAILQ_INIT(&seg->segsum); TAILQ_INIT(&seg->data); seg->fsdev = fsdev; seg->start_block = start_block; seg->num_blocks = blks_per_seg - (start_block % blks_per_seg) - 1; seg->seg_num = fsdev->nd_seg_num; seg->seg_next = fsdev->nd_next_seg_num; seg->segsum_blocks = 1; seg->bytes_left = fsdev->nd_blocksize - sizeof(struct nandfs_segment_summary); seg->segsum_bytes = sizeof(struct nandfs_segment_summary); /* Allocate buffer for segment summary */ bp = getblk(fsdev->nd_devvp, nandfs_block_to_dblock(fsdev, seg->start_block), fsdev->nd_blocksize, 0, 0, 0); bzero(bp->b_data, seginfo->fsdev->nd_blocksize); bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; bp->b_flags |= B_MANAGED; /* Add buffer to segment */ TAILQ_INSERT_TAIL(&seg->segsum, bp, b_cluster.cluster_entry); seg->current_off = bp->b_data + sizeof(struct nandfs_segment_summary); DPRINTF(SYNC, ("%s: seg %p : initial settings: start %#jx size :%#x\n", __func__, seg, (uintmax_t)seg->start_block, seg->num_blocks)); DPRINTF(SYNC, ("%s: seg->seg_num %#jx cno %#jx next %#jx\n", __func__, (uintmax_t)seg->seg_num, (uintmax_t)(fsdev->nd_last_cno + 1), (uintmax_t)seg->seg_next)); if (!prev) LIST_INSERT_HEAD(&seginfo->seg_list, seg, seg_link); else LIST_INSERT_AFTER(prev, seg, seg_link); seginfo->curseg = seg; return (0); } static int delete_segment(struct nandfs_seginfo *seginfo) { struct nandfs_segment *seg, *tseg; struct buf *bp, *tbp; LIST_FOREACH_SAFE(seg, &seginfo->seg_list, seg_link, tseg) { TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) { TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); bp->b_flags &= ~B_MANAGED; brelse(bp); } LIST_REMOVE(seg, seg_link); free(seg, M_DEVBUF); } return (0); } static int create_seginfo(struct nandfs_device *fsdev, struct nandfs_seginfo **seginfo) { struct nandfs_seginfo *info; info = malloc(sizeof(*info), M_DEVBUF, M_WAITOK); LIST_INIT(&info->seg_list); info->fsdev = fsdev; info->curseg = NULL; info->blocks = 0; *seginfo = info; fsdev->nd_seginfo = info; return (0); } static int delete_seginfo(struct nandfs_seginfo *seginfo) { struct nandfs_device *nffsdev; nffsdev = seginfo->fsdev; delete_segment(seginfo); nffsdev->nd_seginfo = NULL; free(seginfo, M_DEVBUF); return (0); } static int nandfs_create_superroot_block(struct nandfs_seginfo *seginfo, struct buf **newbp) { struct buf *bp; int error; bp = nandfs_geteblk(seginfo->fsdev->nd_blocksize, GB_NOWAIT_BD); bzero(bp->b_data, seginfo->fsdev->nd_blocksize); bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; bp->b_flags |= B_MANAGED; if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { error = create_segment(seginfo); if (error) { brelse(bp); nandfs_error("%s: no segment for superroot\n", __func__); return (error); } } TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry); seginfo->curseg->nblocks++; seginfo->curseg->num_blocks--; seginfo->blocks++; *newbp = bp; return (0); } static int nandfs_add_superroot(struct nandfs_seginfo *seginfo) { struct nandfs_device *fsdev; struct nandfs_super_root *sr; struct buf *bp = NULL; uint64_t crc_skip; uint32_t crc_calc; int error; fsdev = seginfo->fsdev; error = nandfs_create_superroot_block(seginfo, &bp); if (error) { nandfs_error("%s: cannot add superroot\n", __func__); return (error); } sr = (struct nandfs_super_root *)bp->b_data; /* Save superroot CRC */ sr->sr_bytes = NANDFS_SR_BYTES; sr->sr_flags = 0; sr->sr_nongc_ctime = 0; memcpy(&sr->sr_dat, &fsdev->nd_dat_node->nn_inode, sizeof(struct nandfs_inode)); memcpy(&sr->sr_cpfile, &fsdev->nd_cp_node->nn_inode, sizeof(struct nandfs_inode)); memcpy(&sr->sr_sufile, &fsdev->nd_su_node->nn_inode, sizeof(struct nandfs_inode)); crc_skip = sizeof(sr->sr_sum); crc_calc = crc32((uint8_t *)sr + crc_skip, NANDFS_SR_BYTES - crc_skip); sr->sr_sum = crc_calc; bp->b_flags |= B_MANAGED; bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; bp->b_flags &= ~B_INVAL; nandfs_dirty_bufs_increment(fsdev); DPRINTF(SYNC, ("%s: bp:%p\n", __func__, bp)); return (0); } static int nandfs_add_segsum_block(struct nandfs_seginfo *seginfo, struct buf **newbp) { struct nandfs_device *fsdev; nandfs_daddr_t blk; struct buf *bp; int error; if (!(seginfo->curseg) || seginfo->curseg->num_blocks <= 1) { error = create_segment(seginfo); if (error) { nandfs_error("%s: error:%d when creating segment\n", __func__, error); return (error); } *newbp = TAILQ_FIRST(&seginfo->curseg->segsum); return (0); } fsdev = seginfo->fsdev; blk = nandfs_block_to_dblock(fsdev, seginfo->curseg->start_block + seginfo->curseg->segsum_blocks); bp = getblk(fsdev->nd_devvp, blk, fsdev->nd_blocksize, 0, 0, 0); bzero(bp->b_data, seginfo->fsdev->nd_blocksize); bp->b_bufobj = &seginfo->fsdev->nd_devvp->v_bufobj; bp->b_flags |= B_MANAGED; TAILQ_INSERT_TAIL(&seginfo->curseg->segsum, bp, b_cluster.cluster_entry); seginfo->curseg->num_blocks--; seginfo->curseg->segsum_blocks++; seginfo->curseg->bytes_left = seginfo->fsdev->nd_blocksize; seginfo->curseg->current_off = bp->b_data; seginfo->blocks++; *newbp = bp; DPRINTF(SYNC, ("%s: bp %p\n", __func__, bp)); return (0); } static int nandfs_add_blocks(struct nandfs_seginfo *seginfo, struct nandfs_node *node, struct buf *bp) { union nandfs_binfo *binfo; struct buf *seg_bp; int error; if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { error = create_segment(seginfo); if (error) { nandfs_error("%s: error:%d when creating segment\n", __func__, error); return (error); } } if (seginfo->curseg->bytes_left < sizeof(union nandfs_binfo)) { error = nandfs_add_segsum_block(seginfo, &seg_bp); if (error) { nandfs_error("%s: error:%d when adding segsum\n", __func__, error); return (error); } } binfo = (union nandfs_binfo *)seginfo->curseg->current_off; if (node->nn_ino != NANDFS_DAT_INO) { binfo->bi_v.bi_blkoff = bp->b_lblkno; binfo->bi_v.bi_ino = node->nn_ino; } else { binfo->bi_dat.bi_blkoff = bp->b_lblkno; binfo->bi_dat.bi_ino = node->nn_ino; if (NANDFS_IS_INDIRECT(bp)) binfo->bi_dat.bi_level = 1; else binfo->bi_dat.bi_level = 0; } binfo++; seginfo->curseg->bytes_left -= sizeof(union nandfs_binfo); seginfo->curseg->segsum_bytes += sizeof(union nandfs_binfo); seginfo->curseg->current_off = (char *)binfo; TAILQ_INSERT_TAIL(&seginfo->curseg->data, bp, b_cluster.cluster_entry); seginfo->curseg->nbinfos++; seginfo->curseg->nblocks++; seginfo->curseg->num_blocks--; seginfo->blocks++; DPRINTF(SYNC, ("%s: bp (%p) number %x (left %x)\n", __func__, bp, seginfo->curseg->nblocks, seginfo->curseg->num_blocks)); return (0); } static int nandfs_iterate_dirty_buf(struct vnode *vp, struct nandfs_seginfo *seginfo, uint8_t hold) { struct buf *bp, *tbd; struct bufobj *bo; struct nandfs_node *node; int error; node = VTON(vp); bo = &vp->v_bufobj; ASSERT_VOP_ELOCKED(vp, __func__); /* Iterate dirty data bufs */ TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, tbd) { DPRINTF(SYNC, ("%s: vp (%p): bp (%p) with lblkno %jx ino %jx " "add buf\n", __func__, vp, bp, bp->b_lblkno, node->nn_ino)); if (!(NANDFS_ISGATHERED(bp))) { error = nandfs_bmap_update_dat(node, nandfs_vblk_get(bp), bp); if (error) return (error); NANDFS_GATHER(bp); nandfs_add_blocks(seginfo, node, bp); } } return (0); } static int nandfs_iterate_system_vnode(struct nandfs_node *node, struct nandfs_seginfo *seginfo) { struct vnode *vp; int nblocks; uint8_t hold = 0; if (node->nn_ino != NANDFS_IFILE_INO) hold = 1; vp = NTOV(node); nblocks = vp->v_bufobj.bo_dirty.bv_cnt; DPRINTF(SYNC, ("%s: vp (%p): nblocks %x ino %jx\n", __func__, vp, nblocks, node->nn_ino)); if (nblocks) nandfs_iterate_dirty_buf(vp, seginfo, hold); return (0); } static int nandfs_iterate_dirty_vnodes(struct mount *mp, struct nandfs_seginfo *seginfo) { struct nandfs_node *nandfs_node; struct vnode *vp, *mvp; struct thread *td; struct bufobj *bo; int error, update; td = curthread; MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) { update = 0; if (mp->mnt_syncer == vp || VOP_ISLOCKED(vp)) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT, td) != 0) continue; nandfs_node = VTON(vp); if (nandfs_node->nn_flags & IN_MODIFIED) { nandfs_node->nn_flags &= ~(IN_MODIFIED); update = 1; } bo = &vp->v_bufobj; BO_LOCK(bo); if (vp->v_bufobj.bo_dirty.bv_cnt) { error = nandfs_iterate_dirty_buf(vp, seginfo, 0); if (error) { nandfs_error("%s: cannot iterate vnode:%p " "err:%d\n", __func__, vp, error); vput(vp); BO_UNLOCK(bo); return (error); } update = 1; } else vput(vp); BO_UNLOCK(bo); if (update) nandfs_node_update(nandfs_node); } return (0); } static int nandfs_update_phys_block(struct nandfs_device *fsdev, struct buf *bp, uint64_t phys_blknr, union nandfs_binfo *binfo) { struct nandfs_node *node, *dat; struct vnode *vp; uint64_t new_blknr; int error; vp = bp->b_vp; node = VTON(vp); new_blknr = nandfs_vblk_get(bp); dat = fsdev->nd_dat_node; DPRINTF(BMAP, ("%s: ino %#jx lblk %#jx: vblk %#jx -> %#jx\n", __func__, (uintmax_t)node->nn_ino, (uintmax_t)bp->b_lblkno, (uintmax_t)new_blknr, (uintmax_t)phys_blknr)); if (node->nn_ino != NANDFS_DAT_INO) { KASSERT((new_blknr != 0), ("vblk for bp %p is 0", bp)); nandfs_vblock_assign(fsdev, new_blknr, phys_blknr); binfo->bi_v.bi_vblocknr = new_blknr; binfo->bi_v.bi_blkoff = bp->b_lblkno; binfo->bi_v.bi_ino = node->nn_ino; } else { VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); error = nandfs_bmap_update_block(node, bp, phys_blknr); if (error) { nandfs_error("%s: error updating block:%jx for bp:%p\n", __func__, (uintmax_t)phys_blknr, bp); VOP_UNLOCK(NTOV(dat), 0); return (error); } VOP_UNLOCK(NTOV(dat), 0); binfo->bi_dat.bi_blkoff = bp->b_lblkno; binfo->bi_dat.bi_ino = node->nn_ino; if (NANDFS_IS_INDIRECT(bp)) binfo->bi_dat.bi_level = 1; else binfo->bi_dat.bi_level = 0; } return (0); } #define NBINFO(off) ((off) + sizeof(union nandfs_binfo)) static int nandfs_segment_assign_pblk(struct nandfs_segment *nfsseg) { struct nandfs_device *fsdev; union nandfs_binfo *binfo; struct buf *bp, *seg_bp; uint64_t blocknr; uint32_t curr_off, blocksize; int error; fsdev = nfsseg->fsdev; blocksize = fsdev->nd_blocksize; blocknr = nfsseg->start_block + nfsseg->segsum_blocks; seg_bp = TAILQ_FIRST(&nfsseg->segsum); DPRINTF(SYNC, ("%s: seg:%p segsum bp:%p data:%p\n", __func__, nfsseg, seg_bp, seg_bp->b_data)); binfo = (union nandfs_binfo *)(seg_bp->b_data + sizeof(struct nandfs_segment_summary)); curr_off = sizeof(struct nandfs_segment_summary); TAILQ_FOREACH(bp, &nfsseg->data, b_cluster.cluster_entry) { KASSERT((bp->b_vp), ("bp %p has not vp", bp)); DPRINTF(BMAP, ("\n\n%s: assign buf %p for ino %#jx next %p\n", __func__, bp, (uintmax_t)VTON(bp->b_vp)->nn_ino, TAILQ_NEXT(bp, b_cluster.cluster_entry))); if (NBINFO(curr_off) > blocksize) { seg_bp = TAILQ_NEXT(seg_bp, b_cluster.cluster_entry); binfo = (union nandfs_binfo *)seg_bp->b_data; curr_off = 0; DPRINTF(SYNC, ("%s: next segsum %p data %p\n", __func__, seg_bp, seg_bp->b_data)); } error = nandfs_update_phys_block(fsdev, bp, blocknr, binfo); if (error) { nandfs_error("%s: err:%d when updatinng phys block:%jx" " for bp:%p and binfo:%p\n", __func__, error, (uintmax_t)blocknr, bp, binfo); return (error); } binfo++; curr_off = NBINFO(curr_off); blocknr++; } return (0); } static int nandfs_seginfo_assign_pblk(struct nandfs_seginfo *seginfo) { struct nandfs_segment *nfsseg; int error = 0; LIST_FOREACH(nfsseg, &seginfo->seg_list, seg_link) { error = nandfs_segment_assign_pblk(nfsseg); if (error) break; } return (error); } static struct nandfs_segment_summary * nandfs_fill_segsum(struct nandfs_segment *seg, int has_sr) { struct nandfs_segment_summary *ss; struct nandfs_device *fsdev; struct buf *bp; uint32_t rest, segsum_size, blocksize, crc_calc; uint16_t flags; uint8_t *crc_area, crc_skip; DPRINTF(SYNC, ("%s: seg %#jx nblocks %#x sumbytes %#x\n", __func__, (uintmax_t) seg->seg_num, seg->nblocks + seg->segsum_blocks, seg->segsum_bytes)); fsdev = seg->fsdev; flags = NANDFS_SS_LOGBGN | NANDFS_SS_LOGEND; if (has_sr) flags |= NANDFS_SS_SR; bp = TAILQ_FIRST(&seg->segsum); ss = (struct nandfs_segment_summary *) bp->b_data; ss->ss_magic = NANDFS_SEGSUM_MAGIC; ss->ss_bytes = sizeof(struct nandfs_segment_summary); ss->ss_flags = flags; ss->ss_seq = ++(fsdev->nd_seg_sequence); ss->ss_create = fsdev->nd_ts.tv_sec; nandfs_get_segment_range(fsdev, seg->seg_next, &ss->ss_next, NULL); ss->ss_nblocks = seg->nblocks + seg->segsum_blocks; ss->ss_nbinfos = seg->nbinfos; ss->ss_sumbytes = seg->segsum_bytes; crc_skip = sizeof(ss->ss_datasum) + sizeof(ss->ss_sumsum); blocksize = seg->fsdev->nd_blocksize; segsum_size = seg->segsum_bytes - crc_skip; rest = min(seg->segsum_bytes, blocksize) - crc_skip; crc_area = (uint8_t *)ss + crc_skip; crc_calc = ~0U; while (segsum_size > 0) { crc_calc = crc32_raw(crc_area, rest, crc_calc); segsum_size -= rest; if (!segsum_size) break; bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); crc_area = (uint8_t *)bp->b_data; rest = segsum_size <= blocksize ? segsum_size : blocksize; } ss->ss_sumsum = crc_calc ^ ~0U; return (ss); } static int nandfs_save_buf(struct buf *bp, uint64_t blocknr, struct nandfs_device *fsdev) { struct bufobj *bo; int error; bo = &fsdev->nd_devvp->v_bufobj; bp->b_blkno = nandfs_block_to_dblock(fsdev, blocknr); bp->b_iooffset = dbtob(bp->b_blkno); KASSERT(bp->b_bufobj != NULL, ("no bufobj for %p", bp)); if (bp->b_bufobj != bo) { BO_LOCK(bp->b_bufobj); BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, BO_LOCKPTR(bp->b_bufobj)); KASSERT(BUF_ISLOCKED(bp), ("Problem with locking buffer")); } DPRINTF(SYNC, ("%s: buf: %p offset %#jx blk %#jx size %#x\n", __func__, bp, (uintmax_t)bp->b_offset, (uintmax_t)blocknr, fsdev->nd_blocksize)); NANDFS_UNGATHER(bp); nandfs_buf_clear(bp, 0xffffffff); bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED); error = bwrite(bp); if (error) { nandfs_error("%s: error:%d when writing buffer:%p\n", __func__, error, bp); return (error); } return (error); } static void nandfs_clean_buf(struct nandfs_device *fsdev, struct buf *bp) { DPRINTF(SYNC, ("%s: buf: %p\n", __func__, bp)); NANDFS_UNGATHER(bp); nandfs_buf_clear(bp, 0xffffffff); bp->b_flags &= ~(B_ASYNC|B_INVAL|B_MANAGED); nandfs_undirty_buf_fsdev(fsdev, bp); } static void nandfs_clean_segblocks(struct nandfs_segment *seg, uint8_t unlock) { struct nandfs_device *fsdev = seg->fsdev; struct nandfs_segment *next_seg; struct buf *bp, *tbp, *next_bp; struct vnode *vp, *next_vp; VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE); TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) { TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); nandfs_clean_buf(fsdev, bp); } TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) { TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry); /* * If bp is not super-root and vnode is not currently * locked lock it. */ vp = bp->b_vp; next_vp = NULL; next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); if (!next_bp) { next_seg = LIST_NEXT(seg, seg_link); if (next_seg) next_bp = TAILQ_FIRST(&next_seg->data); } if (next_bp) next_vp = next_bp->b_vp; nandfs_clean_buf(fsdev, bp); if (unlock && vp != NULL && next_vp != vp && !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) vput(vp); nandfs_dirty_bufs_decrement(fsdev); } VOP_UNLOCK(fsdev->nd_devvp, 0); } static int nandfs_save_segblocks(struct nandfs_segment *seg, uint8_t unlock) { struct nandfs_device *fsdev = seg->fsdev; struct nandfs_segment *next_seg; struct buf *bp, *tbp, *next_bp; struct vnode *vp, *next_vp; uint64_t blocknr; uint32_t i = 0; int error = 0; VOP_LOCK(fsdev->nd_devvp, LK_EXCLUSIVE); TAILQ_FOREACH_SAFE(bp, &seg->segsum, b_cluster.cluster_entry, tbp) { TAILQ_REMOVE(&seg->segsum, bp, b_cluster.cluster_entry); blocknr = seg->start_block + i; error = nandfs_save_buf(bp, blocknr, fsdev); if (error) { nandfs_error("%s: error saving buf: %p blocknr:%jx\n", __func__, bp, (uintmax_t)blocknr); goto out; } i++; } i = 0; TAILQ_FOREACH_SAFE(bp, &seg->data, b_cluster.cluster_entry, tbp) { TAILQ_REMOVE(&seg->data, bp, b_cluster.cluster_entry); blocknr = seg->start_block + seg->segsum_blocks + i; /* * If bp is not super-root and vnode is not currently * locked lock it. */ vp = bp->b_vp; next_vp = NULL; next_bp = TAILQ_NEXT(bp, b_cluster.cluster_entry); if (!next_bp) { next_seg = LIST_NEXT(seg, seg_link); if (next_seg) next_bp = TAILQ_FIRST(&next_seg->data); } if (next_bp) next_vp = next_bp->b_vp; error = nandfs_save_buf(bp, blocknr, fsdev); if (error) { nandfs_error("%s: error saving buf: %p blknr: %jx\n", __func__, bp, (uintmax_t)blocknr); if (unlock && vp != NULL && next_vp != vp && !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) vput(vp); goto out; } if (unlock && vp != NULL && next_vp != vp && !NANDFS_SYS_NODE(VTON(vp)->nn_ino)) vput(vp); i++; nandfs_dirty_bufs_decrement(fsdev); } out: if (error) { nandfs_clean_segblocks(seg, unlock); VOP_UNLOCK(fsdev->nd_devvp, 0); return (error); } VOP_UNLOCK(fsdev->nd_devvp, 0); return (error); } static void clean_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock) { struct nandfs_segment *seg; DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo)); LIST_FOREACH(seg, &seginfo->seg_list, seg_link) { nandfs_clean_segblocks(seg, unlock); } } static int save_seginfo(struct nandfs_seginfo *seginfo, uint8_t unlock) { struct nandfs_segment *seg; struct nandfs_device *fsdev; struct nandfs_segment_summary *ss; int error = 0; fsdev = seginfo->fsdev; DPRINTF(SYNC, ("%s: seginfo %p\n", __func__, seginfo)); LIST_FOREACH(seg, &seginfo->seg_list, seg_link) { if (LIST_NEXT(seg, seg_link)) { nandfs_fill_segsum(seg, 0); error = nandfs_save_segblocks(seg, unlock); if (error) { nandfs_error("%s: error:%d saving seg:%p\n", __func__, error, seg); goto out; } } else { ss = nandfs_fill_segsum(seg, 1); fsdev->nd_last_segsum = *ss; error = nandfs_save_segblocks(seg, unlock); if (error) { nandfs_error("%s: error:%d saving seg:%p\n", __func__, error, seg); goto out; } fsdev->nd_last_cno++; fsdev->nd_last_pseg = seg->start_block; } } out: if (error) clean_seginfo(seginfo, unlock); return (error); } static void nandfs_invalidate_bufs(struct nandfs_device *fsdev, uint64_t segno) { uint64_t start, end; struct buf *bp, *tbd; struct bufobj *bo; nandfs_get_segment_range(fsdev, segno, &start, &end); bo = &NTOV(fsdev->nd_gc_node)->v_bufobj; BO_LOCK(bo); restart_locked_gc: TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, tbd) { if (!(bp->b_lblkno >= start && bp->b_lblkno <= end)) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) goto restart_locked_gc; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); BO_UNLOCK(bo); brelse(bp); BO_LOCK(bo); } BO_UNLOCK(bo); } /* Process segments marks to free by cleaner */ static void nandfs_process_segments(struct nandfs_device *fsdev) { uint64_t saved_segment; int i; if (fsdev->nd_free_base) { saved_segment = nandfs_get_segnum_of_block(fsdev, fsdev->nd_super.s_last_pseg); for (i = 0; i < fsdev->nd_free_count; i++) { if (fsdev->nd_free_base[i] == NANDFS_NOSEGMENT) continue; /* Update superblock if clearing segment point by it */ if (fsdev->nd_free_base[i] == saved_segment) { nandfs_write_superblock(fsdev); saved_segment = nandfs_get_segnum_of_block( fsdev, fsdev->nd_super.s_last_pseg); } nandfs_invalidate_bufs(fsdev, fsdev->nd_free_base[i]); nandfs_clear_segment(fsdev, fsdev->nd_free_base[i]); } free(fsdev->nd_free_base, M_NANDFSTEMP); fsdev->nd_free_base = NULL; fsdev->nd_free_count = 0; } } /* Collect and write dirty buffers */ int nandfs_sync_file(struct vnode *vp) { struct nandfs_device *fsdev; struct nandfs_node *nandfs_node; struct nandfsmount *nmp; struct nandfs_node *dat, *su, *ifile, *cp; struct nandfs_seginfo *seginfo = NULL; struct nandfs_segment *seg; int update, error; int cno_changed; ASSERT_VOP_LOCKED(vp, __func__); DPRINTF(SYNC, ("%s: START\n", __func__)); error = 0; nmp = VFSTONANDFS(vp->v_mount); fsdev = nmp->nm_nandfsdev; dat = fsdev->nd_dat_node; su = fsdev->nd_su_node; cp = fsdev->nd_cp_node; ifile = nmp->nm_ifile_node; NANDFS_WRITEASSERT(fsdev); if (lockmgr(&fsdev->nd_seg_const, LK_UPGRADE, NULL) != 0) { DPRINTF(SYNC, ("%s: lost shared lock\n", __func__)); if (lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL) != 0) panic("couldn't lock exclusive"); } DPRINTF(SYNC, ("%s: got lock\n", __func__)); VOP_LOCK(NTOV(su), LK_EXCLUSIVE); create_seginfo(fsdev, &seginfo); update = 0; nandfs_node = VTON(vp); if (nandfs_node->nn_flags & IN_MODIFIED) { nandfs_node->nn_flags &= ~(IN_MODIFIED); update = 1; } if (vp->v_bufobj.bo_dirty.bv_cnt) { error = nandfs_iterate_dirty_buf(vp, seginfo, 0); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d iterating dirty bufs vp:%p", __func__, error, vp); return (error); } update = 1; } if (update) { VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE); error = nandfs_node_update(nandfs_node); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(ifile), 0); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d updating vp:%p", __func__, error, vp); return (error); } VOP_UNLOCK(NTOV(ifile), 0); } cno_changed = 0; if (seginfo->blocks) { VOP_LOCK(NTOV(cp), LK_EXCLUSIVE); cno_changed = 1; /* Create new checkpoint */ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d getting cp:%jx", __func__, error, fsdev->nd_last_cno + 1); return (error); } /* Reiterate all blocks and assign physical block number */ nandfs_seginfo_assign_pblk(seginfo); /* Fill checkpoint data */ error = nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1, &ifile->nn_inode, seginfo->blocks); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d setting cp:%jx", __func__, error, fsdev->nd_last_cno + 1); return (error); } VOP_UNLOCK(NTOV(cp), 0); LIST_FOREACH(seg, &seginfo->seg_list, seg_link) nandfs_update_segment(fsdev, seg->seg_num, seg->nblocks + seg->segsum_blocks); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); error = save_seginfo(seginfo, 0); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); VOP_UNLOCK(NTOV(dat), 0); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); nandfs_error("%s: err:%d updating seg", __func__, error); return (error); } VOP_UNLOCK(NTOV(dat), 0); } VOP_UNLOCK(NTOV(su), 0); delete_seginfo(seginfo); lockmgr(&fsdev->nd_seg_const, LK_DOWNGRADE, NULL); if (cno_changed && !error) { if (nandfs_cps_between_sblocks != 0 && fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) nandfs_write_superblock(fsdev); } ASSERT_VOP_LOCKED(vp, __func__); DPRINTF(SYNC, ("%s: END error %d\n", __func__, error)); return (error); } int nandfs_segment_constructor(struct nandfsmount *nmp, int flags) { struct nandfs_device *fsdev; struct nandfs_seginfo *seginfo = NULL; struct nandfs_segment *seg; struct nandfs_node *dat, *su, *ifile, *cp, *gc; int cno_changed, error; DPRINTF(SYNC, ("%s: START\n", __func__)); fsdev = nmp->nm_nandfsdev; lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); DPRINTF(SYNC, ("%s: git lock\n", __func__)); again: create_seginfo(fsdev, &seginfo); dat = fsdev->nd_dat_node; su = fsdev->nd_su_node; cp = fsdev->nd_cp_node; gc = fsdev->nd_gc_node; ifile = nmp->nm_ifile_node; VOP_LOCK(NTOV(su), LK_EXCLUSIVE); VOP_LOCK(NTOV(ifile), LK_EXCLUSIVE); VOP_LOCK(NTOV(gc), LK_EXCLUSIVE); VOP_LOCK(NTOV(cp), LK_EXCLUSIVE); nandfs_iterate_system_vnode(gc, seginfo); nandfs_iterate_dirty_vnodes(nmp->nm_vfs_mountp, seginfo); nandfs_iterate_system_vnode(ifile, seginfo); nandfs_iterate_system_vnode(su, seginfo); cno_changed = 0; if (seginfo->blocks || flags) { cno_changed = 1; /* Create new checkpoint */ error = nandfs_get_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); goto error_locks; } /* Collect blocks from system files */ nandfs_iterate_system_vnode(cp, seginfo); nandfs_iterate_system_vnode(su, seginfo); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); nandfs_iterate_system_vnode(dat, seginfo); VOP_UNLOCK(NTOV(dat), 0); reiterate: seginfo->reiterate = 0; nandfs_iterate_system_vnode(su, seginfo); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); nandfs_iterate_system_vnode(dat, seginfo); VOP_UNLOCK(NTOV(dat), 0); if (seginfo->reiterate) goto reiterate; if (!(seginfo->curseg) || !seginfo->curseg->num_blocks) { error = create_segment(seginfo); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); goto error_locks; } goto reiterate; } /* Reiterate all blocks and assign physical block number */ nandfs_seginfo_assign_pblk(seginfo); /* Fill superroot */ error = nandfs_add_superroot(seginfo); if (error) { clean_seginfo(seginfo, 0); delete_seginfo(seginfo); goto error_locks; } KASSERT(!(seginfo->reiterate), ("reiteration after superroot")); /* Fill checkpoint data */ nandfs_set_checkpoint(fsdev, cp, fsdev->nd_last_cno + 1, &ifile->nn_inode, seginfo->blocks); LIST_FOREACH(seg, &seginfo->seg_list, seg_link) nandfs_update_segment(fsdev, seg->seg_num, seg->nblocks + seg->segsum_blocks); VOP_LOCK(NTOV(dat), LK_EXCLUSIVE); error = save_seginfo(seginfo, 1); if (error) { clean_seginfo(seginfo, 1); delete_seginfo(seginfo); goto error_dat; } VOP_UNLOCK(NTOV(dat), 0); } VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(gc), 0); VOP_UNLOCK(NTOV(ifile), 0); nandfs_process_segments(fsdev); VOP_UNLOCK(NTOV(su), 0); delete_seginfo(seginfo); /* * XXX: a hack, will go away soon */ if ((NTOV(dat)->v_bufobj.bo_dirty.bv_cnt != 0 || NTOV(cp)->v_bufobj.bo_dirty.bv_cnt != 0 || NTOV(gc)->v_bufobj.bo_dirty.bv_cnt != 0 || NTOV(ifile)->v_bufobj.bo_dirty.bv_cnt != 0 || NTOV(su)->v_bufobj.bo_dirty.bv_cnt != 0) && (flags & NANDFS_UMOUNT)) { DPRINTF(SYNC, ("%s: RERUN\n", __func__)); goto again; } MPASS(fsdev->nd_free_base == NULL); lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); if (cno_changed) { if ((nandfs_cps_between_sblocks != 0 && fsdev->nd_last_cno % nandfs_cps_between_sblocks == 0) || flags & NANDFS_UMOUNT) nandfs_write_superblock(fsdev); } DPRINTF(SYNC, ("%s: END\n", __func__)); return (0); error_dat: VOP_UNLOCK(NTOV(dat), 0); error_locks: VOP_UNLOCK(NTOV(cp), 0); VOP_UNLOCK(NTOV(gc), 0); VOP_UNLOCK(NTOV(ifile), 0); VOP_UNLOCK(NTOV(su), 0); lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); return (error); } #ifdef DDB /* * Show details about the given NANDFS mount point. */ DB_SHOW_COMMAND(nandfs, db_show_nandfs) { struct mount *mp; struct nandfs_device *nffsdev; struct nandfs_segment *seg; struct nandfsmount *nmp; struct buf *bp; struct vnode *vp; if (!have_addr) { db_printf("\nUsage: show nandfs \n"); return; } mp = (struct mount *)addr; db_printf("%p %s on %s (%s)\n", mp, mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname, mp->mnt_stat.f_fstypename); nmp = (struct nandfsmount *)(mp->mnt_data); nffsdev = nmp->nm_nandfsdev; db_printf("dev vnode:%p\n", nffsdev->nd_devvp); db_printf("blocksize:%jx last cno:%jx last pseg:%jx seg num:%jx\n", (uintmax_t)nffsdev->nd_blocksize, (uintmax_t)nffsdev->nd_last_cno, (uintmax_t)nffsdev->nd_last_pseg, (uintmax_t)nffsdev->nd_seg_num); db_printf("system nodes: dat:%p cp:%p su:%p ifile:%p gc:%p\n", nffsdev->nd_dat_node, nffsdev->nd_cp_node, nffsdev->nd_su_node, nmp->nm_ifile_node, nffsdev->nd_gc_node); if (nffsdev->nd_seginfo != NULL) { LIST_FOREACH(seg, &nffsdev->nd_seginfo->seg_list, seg_link) { db_printf("seg: %p\n", seg); TAILQ_FOREACH(bp, &seg->segsum, b_cluster.cluster_entry) db_printf("segbp %p\n", bp); TAILQ_FOREACH(bp, &seg->data, b_cluster.cluster_entry) { vp = bp->b_vp; db_printf("bp:%p bp->b_vp:%p ino:%jx\n", bp, vp, (uintmax_t)(vp ? VTON(vp)->nn_ino : 0)); } } } } #endif Index: head/sys/fs/nandfs/nandfs_subr.c =================================================================== --- head/sys/fs/nandfs/nandfs_subr.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_subr.c (revision 326268) @@ -1,1088 +1,1090 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_subr.c,v 1.4 2009/07/29 17:06:57 reinoud */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "nandfs_mount.h" #include "nandfs.h" #include "nandfs_subr.h" MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount"); MALLOC_DEFINE(M_NANDFSTEMP, "nandfs_tmt", "NANDFS tmp"); uma_zone_t nandfs_node_zone; void nandfs_bdflush(struct bufobj *bo, struct buf *bp); int nandfs_bufsync(struct bufobj *bo, int waitfor); struct buf_ops buf_ops_nandfs = { .bop_name = "buf_ops_nandfs", .bop_write = bufwrite, .bop_strategy = bufstrategy, .bop_sync = nandfs_bufsync, .bop_bdflush = nandfs_bdflush, }; int nandfs_bufsync(struct bufobj *bo, int waitfor) { struct vnode *vp; int error = 0; vp = bo2vnode(bo); ASSERT_VOP_LOCKED(vp, __func__); error = nandfs_sync_file(vp); if (error) nandfs_warning("%s: cannot flush buffers err:%d\n", __func__, error); return (error); } void nandfs_bdflush(bo, bp) struct bufobj *bo; struct buf *bp; { struct vnode *vp; int error; if (bo->bo_dirty.bv_cnt <= ((dirtybufthresh * 8) / 10)) return; vp = bp->b_vp; if (NANDFS_SYS_NODE(VTON(vp)->nn_ino)) return; if (NANDFS_IS_INDIRECT(bp)) return; error = nandfs_sync_file(vp); if (error) nandfs_warning("%s: cannot flush buffers err:%d\n", __func__, error); } int nandfs_init(struct vfsconf *vfsp) { nandfs_node_zone = uma_zcreate("nandfs node zone", sizeof(struct nandfs_node), NULL, NULL, NULL, NULL, 0, 0); return (0); } int nandfs_uninit(struct vfsconf *vfsp) { uma_zdestroy(nandfs_node_zone); return (0); } /* Basic calculators */ uint64_t nandfs_get_segnum_of_block(struct nandfs_device *nandfsdev, nandfs_daddr_t blocknr) { uint64_t segnum, blks_per_seg; MPASS(blocknr >= nandfsdev->nd_fsdata.f_first_data_block); blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment; segnum = blocknr / blks_per_seg; segnum -= nandfsdev->nd_fsdata.f_first_data_block / blks_per_seg; DPRINTF(SYNC, ("%s: returning blocknr %jx -> segnum %jx\n", __func__, blocknr, segnum)); return (segnum); } void nandfs_get_segment_range(struct nandfs_device *nandfsdev, uint64_t segnum, uint64_t *seg_start, uint64_t *seg_end) { uint64_t blks_per_seg; blks_per_seg = nandfsdev->nd_fsdata.f_blocks_per_segment; *seg_start = nandfsdev->nd_fsdata.f_first_data_block + blks_per_seg * segnum; if (seg_end != NULL) *seg_end = *seg_start + blks_per_seg -1; } void nandfs_calc_mdt_consts(struct nandfs_device *nandfsdev, struct nandfs_mdt *mdt, int entry_size) { uint32_t blocksize = nandfsdev->nd_blocksize; mdt->entries_per_group = blocksize * 8; mdt->entries_per_block = blocksize / entry_size; mdt->blocks_per_group = (mdt->entries_per_group -1) / mdt->entries_per_block + 1 + 1; mdt->groups_per_desc_block = blocksize / sizeof(struct nandfs_block_group_desc); mdt->blocks_per_desc_block = mdt->groups_per_desc_block * mdt->blocks_per_group + 1; } int nandfs_dev_bread(struct nandfs_device *nandfsdev, nandfs_lbn_t blocknr, struct ucred *cred, int flags, struct buf **bpp) { int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; int error; DPRINTF(BLOCK, ("%s: read from block %jx vp %p\n", __func__, blocknr * blk2dev, nandfsdev->nd_devvp)); error = bread(nandfsdev->nd_devvp, blocknr * blk2dev, nandfsdev->nd_blocksize, NOCRED, bpp); if (error) nandfs_error("%s: cannot read from device - blk:%jx\n", __func__, blocknr); return (error); } /* Read on a node */ int nandfs_bread(struct nandfs_node *node, nandfs_lbn_t blocknr, struct ucred *cred, int flags, struct buf **bpp) { nandfs_daddr_t vblk; int error; DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, cred, bpp); KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__, NTOV(node), blocknr, error)); if (!nandfs_vblk_get(*bpp) && ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) { nandfs_bmap_lookup(node, blocknr, &vblk); nandfs_vblk_set(*bpp, vblk); } return (error); } int nandfs_bread_meta(struct nandfs_node *node, nandfs_lbn_t blocknr, struct ucred *cred, int flags, struct buf **bpp) { nandfs_daddr_t vblk; int error; DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); error = bread(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, cred, bpp); KASSERT(error == 0, ("%s: vp:%p lbn:%#jx err:%d\n", __func__, NTOV(node), blocknr, error)); if (!nandfs_vblk_get(*bpp) && ((*bpp)->b_flags & B_CACHE) && node->nn_ino != NANDFS_DAT_INO) { nandfs_bmap_lookup(node, blocknr, &vblk); nandfs_vblk_set(*bpp, vblk); } return (error); } int nandfs_bdestroy(struct nandfs_node *node, nandfs_daddr_t vblk) { int error; if (!NANDFS_SYS_NODE(node->nn_ino)) NANDFS_WRITEASSERT(node->nn_nandfsdev); error = nandfs_vblock_end(node->nn_nandfsdev, vblk); if (error) { nandfs_error("%s: ending vblk: %jx failed\n", __func__, (uintmax_t)vblk); return (error); } node->nn_inode.i_blocks--; return (0); } int nandfs_bcreate(struct nandfs_node *node, nandfs_lbn_t blocknr, struct ucred *cred, int flags, struct buf **bpp) { int error; ASSERT_VOP_LOCKED(NTOV(node), __func__); if (!NANDFS_SYS_NODE(node->nn_ino)) NANDFS_WRITEASSERT(node->nn_nandfsdev); DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, 0, 0, 0); KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); if (*bpp) { vfs_bio_clrbuf(*bpp); (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */ error = nandfs_bmap_insert_block(node, blocknr, *bpp); if (error) { nandfs_warning("%s: failed bmap insert node:%p" " blk:%jx\n", __func__, node, blocknr); brelse(*bpp); return (error); } node->nn_inode.i_blocks++; return (0); } return (-1); } int nandfs_bcreate_meta(struct nandfs_node *node, nandfs_lbn_t blocknr, struct ucred *cred, int flags, struct buf **bpp) { struct nandfs_device *fsdev; nandfs_daddr_t vblk; int error; ASSERT_VOP_LOCKED(NTOV(node), __func__); NANDFS_WRITEASSERT(node->nn_nandfsdev); DPRINTF(BLOCK, ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); fsdev = node->nn_nandfsdev; *bpp = getblk(NTOV(node), blocknr, node->nn_nandfsdev->nd_blocksize, 0, 0, 0); KASSERT((*bpp), ("%s: vp:%p lbn:%#jx\n", __func__, NTOV(node), blocknr)); memset((*bpp)->b_data, 0, fsdev->nd_blocksize); vfs_bio_clrbuf(*bpp); (*bpp)->b_blkno = ~(0); /* To avoid VOP_BMAP in bdwrite */ nandfs_buf_set(*bpp, NANDFS_VBLK_ASSIGNED); if (node->nn_ino != NANDFS_DAT_INO) { error = nandfs_vblock_alloc(fsdev, &vblk); if (error) { nandfs_buf_clear(*bpp, NANDFS_VBLK_ASSIGNED); brelse(*bpp); return (error); } } else vblk = fsdev->nd_fakevblk++; nandfs_vblk_set(*bpp, vblk); nandfs_bmap_insert_block(node, blocknr, *bpp); return (0); } /* Translate index to a file block number and an entry */ void nandfs_mdt_trans(struct nandfs_mdt *mdt, uint64_t index, nandfs_lbn_t *blocknr, uint32_t *entry_in_block) { uint64_t blknr; uint64_t group, group_offset, blocknr_in_group; uint64_t desc_block, desc_offset; /* Calculate our offset in the file */ group = index / mdt->entries_per_group; group_offset = index % mdt->entries_per_group; desc_block = group / mdt->groups_per_desc_block; desc_offset = group % mdt->groups_per_desc_block; blocknr_in_group = group_offset / mdt->entries_per_block; /* To descgroup offset */ blknr = 1 + desc_block * mdt->blocks_per_desc_block; /* To group offset */ blknr += desc_offset * mdt->blocks_per_group; /* To actual file block */ blknr += 1 + blocknr_in_group; *blocknr = blknr; *entry_in_block = group_offset % mdt->entries_per_block; } void nandfs_mdt_trans_blk(struct nandfs_mdt *mdt, uint64_t index, uint64_t *desc, uint64_t *bitmap, nandfs_lbn_t *blocknr, uint32_t *entry_in_block) { uint64_t blknr; uint64_t group, group_offset, blocknr_in_group; uint64_t desc_block, desc_offset; /* Calculate our offset in the file */ group = index / mdt->entries_per_group; group_offset = index % mdt->entries_per_group; desc_block = group / mdt->groups_per_desc_block; desc_offset = group % mdt->groups_per_desc_block; blocknr_in_group = group_offset / mdt->entries_per_block; /* To descgroup offset */ *desc = desc_block * mdt->blocks_per_desc_block; blknr = 1 + desc_block * mdt->blocks_per_desc_block; /* To group offset */ blknr += desc_offset * mdt->blocks_per_group; *bitmap = blknr; /* To actual file block */ blknr += 1 + blocknr_in_group; *blocknr = blknr; *entry_in_block = group_offset % mdt->entries_per_block; DPRINTF(ALLOC, ("%s: desc_buf: %jx bitmap_buf: %jx entry_buf: %jx entry: %x\n", __func__, (uintmax_t)*desc, (uintmax_t)*bitmap, (uintmax_t)*blocknr, *entry_in_block)); } int nandfs_vtop(struct nandfs_node *node, nandfs_daddr_t vblocknr, nandfs_daddr_t *pblocknr) { struct nandfs_node *dat_node; struct nandfs_dat_entry *entry; struct buf *bp; nandfs_lbn_t ldatblknr; uint32_t entry_in_block; int locked, error; if (node->nn_ino == NANDFS_DAT_INO || node->nn_ino == NANDFS_GC_INO) { *pblocknr = vblocknr; return (0); } /* only translate valid vblocknrs */ if (vblocknr == 0) return (0); dat_node = node->nn_nandfsdev->nd_dat_node; nandfs_mdt_trans(&node->nn_nandfsdev->nd_dat_mdt, vblocknr, &ldatblknr, &entry_in_block); locked = NANDFS_VOP_ISLOCKED(NTOV(dat_node)); if (!locked) VOP_LOCK(NTOV(dat_node), LK_SHARED); error = nandfs_bread(dat_node, ldatblknr, NOCRED, 0, &bp); if (error) { DPRINTF(TRANSLATE, ("vtop: can't read in DAT block %#jx!\n", (uintmax_t)ldatblknr)); brelse(bp); VOP_UNLOCK(NTOV(dat_node), 0); return (error); } /* Get our translation */ entry = ((struct nandfs_dat_entry *) bp->b_data) + entry_in_block; DPRINTF(TRANSLATE, ("\tentry %p data %p entry_in_block %x\n", entry, bp->b_data, entry_in_block)) DPRINTF(TRANSLATE, ("\tvblk %#jx -> %#jx for cp [%#jx-%#jx]\n", (uintmax_t)vblocknr, (uintmax_t)entry->de_blocknr, (uintmax_t)entry->de_start, (uintmax_t)entry->de_end)); *pblocknr = entry->de_blocknr; brelse(bp); if (!locked) VOP_UNLOCK(NTOV(dat_node), 0); MPASS(*pblocknr >= node->nn_nandfsdev->nd_fsdata.f_first_data_block || *pblocknr == 0); return (0); } int nandfs_segsum_valid(struct nandfs_segment_summary *segsum) { return (segsum->ss_magic == NANDFS_SEGSUM_MAGIC); } int nandfs_load_segsum(struct nandfs_device *fsdev, nandfs_daddr_t blocknr, struct nandfs_segment_summary *segsum) { struct buf *bp; int error; DPRINTF(VOLUMES, ("nandfs: try segsum at block %jx\n", (uintmax_t)blocknr)); error = nandfs_dev_bread(fsdev, blocknr, NOCRED, 0, &bp); if (error) return (error); memcpy(segsum, bp->b_data, sizeof(struct nandfs_segment_summary)); brelse(bp); if (!nandfs_segsum_valid(segsum)) { DPRINTF(VOLUMES, ("%s: bad magic pseg:%jx\n", __func__, blocknr)); return (EINVAL); } return (error); } static int nandfs_load_super_root(struct nandfs_device *nandfsdev, struct nandfs_segment_summary *segsum, uint64_t pseg) { struct nandfs_super_root super_root; struct buf *bp; uint64_t blocknr; uint32_t super_root_crc, comp_crc; int off, error; /* Check if there is a superroot */ if ((segsum->ss_flags & NANDFS_SS_SR) == 0) { DPRINTF(VOLUMES, ("%s: no super root in pseg:%jx\n", __func__, pseg)); return (ENOENT); } /* Get our super root, located at the end of the pseg */ blocknr = pseg + segsum->ss_nblocks - 1; DPRINTF(VOLUMES, ("%s: try at %#jx\n", __func__, (uintmax_t)blocknr)); error = nandfs_dev_bread(nandfsdev, blocknr, NOCRED, 0, &bp); if (error) return (error); memcpy(&super_root, bp->b_data, sizeof(struct nandfs_super_root)); brelse(bp); /* Check super root CRC */ super_root_crc = super_root.sr_sum; off = sizeof(super_root.sr_sum); comp_crc = crc32((uint8_t *)&super_root + off, NANDFS_SR_BYTES - off); if (super_root_crc != comp_crc) { DPRINTF(VOLUMES, ("%s: invalid crc:%#x [expect:%#x]\n", __func__, super_root_crc, comp_crc)); return (EINVAL); } nandfsdev->nd_super_root = super_root; DPRINTF(VOLUMES, ("%s: got valid superroot\n", __func__)); return (0); } /* * Search for the last super root recorded. */ int nandfs_search_super_root(struct nandfs_device *nandfsdev) { struct nandfs_super_block *super; struct nandfs_segment_summary segsum; uint64_t seg_start, seg_end, cno, seq, create, pseg; uint64_t segnum; int error, found; error = found = 0; /* Search for last super root */ pseg = nandfsdev->nd_super.s_last_pseg; segnum = nandfs_get_segnum_of_block(nandfsdev, pseg); cno = nandfsdev->nd_super.s_last_cno; create = seq = 0; DPRINTF(VOLUMES, ("%s: start in pseg %#jx\n", __func__, (uintmax_t)pseg)); for (;;) { error = nandfs_load_segsum(nandfsdev, pseg, &segsum); if (error) break; if (segsum.ss_seq < seq || segsum.ss_create < create) break; /* Try to load super root */ if (segsum.ss_flags & NANDFS_SS_SR) { error = nandfs_load_super_root(nandfsdev, &segsum, pseg); if (error) break; /* confused */ found = 1; super = &nandfsdev->nd_super; nandfsdev->nd_last_segsum = segsum; super->s_last_pseg = pseg; super->s_last_cno = cno++; super->s_last_seq = segsum.ss_seq; super->s_state = NANDFS_VALID_FS; seq = segsum.ss_seq; create = segsum.ss_create; } else { seq = segsum.ss_seq; create = segsum.ss_create; } /* Calculate next partial segment location */ pseg += segsum.ss_nblocks; DPRINTF(VOLUMES, ("%s: next partial seg is %jx\n", __func__, (uintmax_t)pseg)); /* Did we reach the end of the segment? if so, go to the next */ nandfs_get_segment_range(nandfsdev, segnum, &seg_start, &seg_end); if (pseg >= seg_end) { pseg = segsum.ss_next; DPRINTF(VOLUMES, (" partial seg oor next is %jx[%jx - %jx]\n", (uintmax_t)pseg, (uintmax_t)seg_start, (uintmax_t)seg_end)); } segnum = nandfs_get_segnum_of_block(nandfsdev, pseg); } if (error && !found) return (error); return (0); } int nandfs_get_node_raw(struct nandfs_device *nandfsdev, struct nandfsmount *nmp, uint64_t ino, struct nandfs_inode *inode, struct nandfs_node **nodep) { struct nandfs_node *node; struct vnode *nvp; struct mount *mp; int error; *nodep = NULL; /* Associate with mountpoint if present */ if (nmp) { mp = nmp->nm_vfs_mountp; error = getnewvnode("nandfs", mp, &nandfs_vnodeops, &nvp); if (error) return (error); } else { mp = NULL; error = getnewvnode("snandfs", mp, &nandfs_system_vnodeops, &nvp); if (error) return (error); } if (mp) NANDFS_WRITELOCK(nandfsdev); DPRINTF(IFILE, ("%s: ino: %#jx -> vp: %p\n", __func__, (uintmax_t)ino, nvp)); /* Lock node */ lockmgr(nvp->v_vnlock, LK_EXCLUSIVE, NULL); if (mp) { error = insmntque(nvp, mp); if (error != 0) { *nodep = NULL; return (error); } } node = uma_zalloc(nandfs_node_zone, M_WAITOK | M_ZERO); /* Crosslink */ node->nn_vnode = nvp; nvp->v_bufobj.bo_ops = &buf_ops_nandfs; node->nn_nmp = nmp; node->nn_nandfsdev = nandfsdev; nvp->v_data = node; /* Initiase NANDFS node */ node->nn_ino = ino; if (inode != NULL) node->nn_inode = *inode; nandfs_vinit(nvp, ino); /* Return node */ *nodep = node; DPRINTF(IFILE, ("%s: ino:%#jx vp:%p node:%p\n", __func__, (uintmax_t)ino, nvp, *nodep)); return (0); } int nandfs_get_node(struct nandfsmount *nmp, uint64_t ino, struct nandfs_node **nodep) { struct nandfs_device *nandfsdev; struct nandfs_inode inode, *entry; struct vnode *nvp, *vpp; struct thread *td; struct buf *bp; uint64_t ivblocknr; uint32_t entry_in_block; int error; /* Look up node in hash table */ td = curthread; *nodep = NULL; if ((ino < NANDFS_ATIME_INO) && (ino != NANDFS_ROOT_INO)) { printf("nandfs_get_node: system ino %"PRIu64" not in mount " "point!\n", ino); return (ENOENT); } error = vfs_hash_get(nmp->nm_vfs_mountp, ino, LK_EXCLUSIVE, td, &nvp, NULL, NULL); if (error) return (error); if (nvp != NULL) { *nodep = (struct nandfs_node *)nvp->v_data; return (0); } /* Look up inode structure in mountpoints ifile */ nandfsdev = nmp->nm_nandfsdev; nandfs_mdt_trans(&nandfsdev->nd_ifile_mdt, ino, &ivblocknr, &entry_in_block); VOP_LOCK(NTOV(nmp->nm_ifile_node), LK_SHARED); error = nandfs_bread(nmp->nm_ifile_node, ivblocknr, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0); return (ENOENT); } /* Get inode entry */ entry = (struct nandfs_inode *) bp->b_data + entry_in_block; memcpy(&inode, entry, sizeof(struct nandfs_inode)); brelse(bp); VOP_UNLOCK(NTOV(nmp->nm_ifile_node), 0); /* Get node */ error = nandfs_get_node_raw(nmp->nm_nandfsdev, nmp, ino, &inode, nodep); if (error) { *nodep = NULL; return (error); } nvp = (*nodep)->nn_vnode; error = vfs_hash_insert(nvp, ino, 0, td, &vpp, NULL, NULL); if (error) { *nodep = NULL; return (error); } return (error); } void nandfs_dispose_node(struct nandfs_node **nodep) { struct nandfs_node *node; struct vnode *vp; /* Protect against rogue values */ node = *nodep; if (!node) { return; } DPRINTF(NODE, ("nandfs_dispose_node: %p\n", *nodep)); vp = NTOV(node); vp->v_data = NULL; /* Free our associated memory */ uma_zfree(nandfs_node_zone, node); *nodep = NULL; } int nandfs_lookup_name_in_dir(struct vnode *dvp, const char *name, int namelen, uint64_t *ino, int *found, uint64_t *off) { struct nandfs_node *dir_node = VTON(dvp); struct nandfs_dir_entry *ndirent; struct buf *bp; uint64_t file_size, diroffset, blkoff; uint64_t blocknr; uint32_t blocksize = dir_node->nn_nandfsdev->nd_blocksize; uint8_t *pos, name_len; int error; *found = 0; DPRINTF(VNCALL, ("%s: %s file\n", __func__, name)); if (dvp->v_type != VDIR) { return (ENOTDIR); } /* Get directory filesize */ file_size = dir_node->nn_inode.i_size; /* Walk the directory */ diroffset = 0; blocknr = 0; blkoff = 0; error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } while (diroffset < file_size) { if (blkoff >= blocksize) { blkoff = 0; blocknr++; brelse(bp); error = nandfs_bread(dir_node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } } /* Read in one dirent */ pos = (uint8_t *) bp->b_data + blkoff; ndirent = (struct nandfs_dir_entry *) pos; name_len = ndirent->name_len; if ((name_len == namelen) && (strncmp(name, ndirent->name, name_len) == 0) && (ndirent->inode != 0)) { *ino = ndirent->inode; *off = diroffset; DPRINTF(LOOKUP, ("found `%.*s` with ino %"PRIx64"\n", name_len, ndirent->name, *ino)); *found = 1; break; } /* Advance */ diroffset += ndirent->rec_len; blkoff += ndirent->rec_len; } brelse(bp); return (error); } int nandfs_get_fsinfo(struct nandfsmount *nmp, struct nandfs_fsinfo *fsinfo) { struct nandfs_device *fsdev; fsdev = nmp->nm_nandfsdev; memcpy(&fsinfo->fs_fsdata, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata)); memcpy(&fsinfo->fs_super, &fsdev->nd_super, sizeof(fsdev->nd_super)); snprintf(fsinfo->fs_dev, sizeof(fsinfo->fs_dev), "%s", nmp->nm_vfs_mountp->mnt_stat.f_mntfromname); return (0); } void nandfs_inode_init(struct nandfs_inode *inode, uint16_t mode) { struct timespec ts; vfs_timestamp(&ts); inode->i_blocks = 0; inode->i_size = 0; inode->i_ctime = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; inode->i_mtime = ts.tv_sec; inode->i_mtime_nsec = ts.tv_nsec; inode->i_mode = mode; inode->i_links_count = 1; if (S_ISDIR(mode)) inode->i_links_count = 2; inode->i_flags = 0; inode->i_special = 0; memset(inode->i_db, 0, sizeof(inode->i_db)); memset(inode->i_ib, 0, sizeof(inode->i_ib)); } void nandfs_inode_destroy(struct nandfs_inode *inode) { MPASS(inode->i_blocks == 0); bzero(inode, sizeof(*inode)); } int nandfs_fs_full(struct nandfs_device *nffsdev) { uint64_t space, bps; bps = nffsdev->nd_fsdata.f_blocks_per_segment; space = (nffsdev->nd_clean_segs - 1) * bps; DPRINTF(BUF, ("%s: bufs:%jx space:%jx\n", __func__, (uintmax_t)nffsdev->nd_dirty_bufs, (uintmax_t)space)); if (nffsdev->nd_dirty_bufs + (nffsdev->nd_segs_reserved * bps) >= space) return (1); return (0); } static int _nandfs_dirty_buf(struct buf *bp, int dirty_meta, int force) { struct nandfs_device *nffsdev; struct nandfs_node *node; uint64_t ino, bps; if (NANDFS_ISGATHERED(bp)) { bqrelse(bp); return (0); } if ((bp->b_flags & (B_MANAGED | B_DELWRI)) == (B_MANAGED | B_DELWRI)) { bqrelse(bp); return (0); } node = VTON(bp->b_vp); nffsdev = node->nn_nandfsdev; DPRINTF(BUF, ("%s: buf:%p\n", __func__, bp)); ino = node->nn_ino; if (nandfs_fs_full(nffsdev) && !NANDFS_SYS_NODE(ino) && !force) { brelse(bp); return (ENOSPC); } bp->b_flags |= B_MANAGED; bdwrite(bp); nandfs_dirty_bufs_increment(nffsdev); KASSERT((bp->b_vp), ("vp missing for bp")); KASSERT((nandfs_vblk_get(bp) || ino == NANDFS_DAT_INO), ("bp vblk is 0")); /* * To maintain consistency of FS we need to force making * meta buffers dirty, even if free space is low. */ if (dirty_meta && ino != NANDFS_GC_INO) nandfs_bmap_dirty_blocks(VTON(bp->b_vp), bp, 1); bps = nffsdev->nd_fsdata.f_blocks_per_segment; if (nffsdev->nd_dirty_bufs >= (bps * nandfs_max_dirty_segs)) { mtx_lock(&nffsdev->nd_sync_mtx); if (nffsdev->nd_syncing == 0) { DPRINTF(SYNC, ("%s: wakeup gc\n", __func__)); nffsdev->nd_syncing = 1; wakeup(&nffsdev->nd_syncing); } mtx_unlock(&nffsdev->nd_sync_mtx); } return (0); } int nandfs_dirty_buf(struct buf *bp, int force) { return (_nandfs_dirty_buf(bp, 1, force)); } int nandfs_dirty_buf_meta(struct buf *bp, int force) { return (_nandfs_dirty_buf(bp, 0, force)); } void nandfs_undirty_buf_fsdev(struct nandfs_device *nffsdev, struct buf *bp) { BUF_ASSERT_HELD(bp); if (bp->b_flags & B_DELWRI) { bp->b_flags &= ~(B_DELWRI|B_MANAGED); nandfs_dirty_bufs_decrement(nffsdev); } /* * Since it is now being written, we can clear its deferred write flag. */ bp->b_flags &= ~B_DEFERRED; brelse(bp); } void nandfs_undirty_buf(struct buf *bp) { struct nandfs_node *node; node = VTON(bp->b_vp); nandfs_undirty_buf_fsdev(node->nn_nandfsdev, bp); } void nandfs_vblk_set(struct buf *bp, nandfs_daddr_t blocknr) { nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1); *vblk = blocknr; } nandfs_daddr_t nandfs_vblk_get(struct buf *bp) { nandfs_daddr_t *vblk = (nandfs_daddr_t *)(&bp->b_fsprivate1); return (*vblk); } void nandfs_buf_set(struct buf *bp, uint32_t bits) { uintptr_t flags; flags = (uintptr_t)bp->b_fsprivate3; flags |= (uintptr_t)bits; bp->b_fsprivate3 = (void *)flags; } void nandfs_buf_clear(struct buf *bp, uint32_t bits) { uintptr_t flags; flags = (uintptr_t)bp->b_fsprivate3; flags &= ~(uintptr_t)bits; bp->b_fsprivate3 = (void *)flags; } int nandfs_buf_check(struct buf *bp, uint32_t bits) { uintptr_t flags; flags = (uintptr_t)bp->b_fsprivate3; if (flags & bits) return (1); return (0); } int nandfs_erase(struct nandfs_device *fsdev, off_t offset, size_t size) { DPRINTF(BLOCK, ("%s: performing erase at offset %jx size %zx\n", __func__, offset, size)); MPASS(size % fsdev->nd_erasesize == 0); return (g_delete_data(fsdev->nd_gconsumer, offset, size)); } int nandfs_vop_islocked(struct vnode *vp) { int islocked; islocked = VOP_ISLOCKED(vp); return (islocked == LK_EXCLUSIVE || islocked == LK_SHARED); } nandfs_daddr_t nandfs_block_to_dblock(struct nandfs_device *fsdev, nandfs_lbn_t block) { return (btodb(block * fsdev->nd_blocksize)); } Index: head/sys/fs/nandfs/nandfs_subr.h =================================================================== --- head/sys/fs/nandfs/nandfs_subr.h (revision 326267) +++ head/sys/fs/nandfs/nandfs_subr.h (revision 326268) @@ -1,238 +1,240 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_subr.h,v 1.1 2009/07/18 16:31:42 reinoud * * $FreeBSD$ */ #ifndef _FS_NANDFS_NANDFS_SUBR_H_ #define _FS_NANDFS_NANDFS_SUBR_H_ struct nandfs_mdt; struct nandfs_alloc_request { uint64_t entrynum; struct buf *bp_desc; struct buf *bp_bitmap; struct buf *bp_entry; }; /* Segment creation */ void nandfs_wakeup_wait_sync(struct nandfs_device *, int); int nandfs_segment_constructor(struct nandfsmount *, int); int nandfs_sync_file(struct vnode *); /* Basic calculators */ uint64_t nandfs_get_segnum_of_block(struct nandfs_device *, nandfs_daddr_t); void nandfs_get_segment_range(struct nandfs_device *, uint64_t, uint64_t *, uint64_t *); void nandfs_calc_mdt_consts(struct nandfs_device *, struct nandfs_mdt *, int); /* Log reading / volume helpers */ int nandfs_search_super_root(struct nandfs_device *); /* Reading */ int nandfs_dev_bread(struct nandfs_device *, nandfs_daddr_t, struct ucred *, int, struct buf **); int nandfs_bread(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, struct buf **); int nandfs_bread_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, struct buf **); int nandfs_bdestroy(struct nandfs_node *, nandfs_daddr_t); int nandfs_bcreate(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, struct buf **); int nandfs_bcreate_meta(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, struct buf **); int nandfs_bread_create(struct nandfs_node *, nandfs_lbn_t, struct ucred *, int, struct buf **); /* vtop operations */ int nandfs_vtop(struct nandfs_node *, nandfs_daddr_t, nandfs_daddr_t *); /* Node action implementators */ int nandfs_vinit(struct vnode *, uint64_t); int nandfs_get_node(struct nandfsmount *, uint64_t, struct nandfs_node **); int nandfs_get_node_raw(struct nandfs_device *, struct nandfsmount *, uint64_t, struct nandfs_inode *, struct nandfs_node **); void nandfs_dispose_node(struct nandfs_node **); void nandfs_itimes(struct vnode *); int nandfs_lookup_name_in_dir(struct vnode *, const char *, int, uint64_t *, int *, uint64_t *); int nandfs_create_node(struct vnode *, struct vnode **, struct vattr *, struct componentname *); void nandfs_delete_node(struct nandfs_node *); int nandfs_chsize(struct vnode *, u_quad_t, struct ucred *); int nandfs_dir_detach(struct nandfsmount *, struct nandfs_node *, struct nandfs_node *, struct componentname *); int nandfs_dir_attach(struct nandfsmount *, struct nandfs_node *, struct nandfs_node *, struct vattr *, struct componentname *); int nandfs_dirty_buf(struct buf *, int); int nandfs_dirty_buf_meta(struct buf *, int); int nandfs_fs_full(struct nandfs_device *); void nandfs_undirty_buf_fsdev(struct nandfs_device *, struct buf *); void nandfs_undirty_buf(struct buf *); void nandfs_clear_buf(struct buf *); void nandfs_buf_set(struct buf *, uint32_t); void nandfs_buf_clear(struct buf *, uint32_t); int nandfs_buf_check(struct buf *, uint32_t); int nandfs_find_free_entry(struct nandfs_mdt *, struct nandfs_node *, struct nandfs_alloc_request *); int nandfs_find_entry(struct nandfs_mdt *, struct nandfs_node *, struct nandfs_alloc_request *); int nandfs_alloc_entry(struct nandfs_mdt *, struct nandfs_alloc_request *); void nandfs_abort_entry(struct nandfs_alloc_request *); int nandfs_free_entry(struct nandfs_mdt *, struct nandfs_alloc_request *); int nandfs_get_entry_block(struct nandfs_mdt *, struct nandfs_node *, struct nandfs_alloc_request *, uint32_t *, int); /* Inode management. */ int nandfs_node_create(struct nandfsmount *, struct nandfs_node **, uint16_t); int nandfs_node_destroy(struct nandfs_node *); int nandfs_node_update(struct nandfs_node *); int nandfs_get_node_entry(struct nandfsmount *, struct nandfs_inode **, uint64_t, struct buf **); void nandfs_mdt_trans_blk(struct nandfs_mdt *, uint64_t, uint64_t *, uint64_t *, nandfs_lbn_t *, uint32_t *); /* vblock management */ void nandfs_mdt_trans(struct nandfs_mdt *, uint64_t, nandfs_lbn_t *, uint32_t *); int nandfs_vblock_alloc(struct nandfs_device *, nandfs_daddr_t *); int nandfs_vblock_end(struct nandfs_device *, nandfs_daddr_t); int nandfs_vblock_assign(struct nandfs_device *, nandfs_daddr_t, nandfs_lbn_t); int nandfs_vblock_free(struct nandfs_device *, nandfs_daddr_t); /* Checkpoint management */ int nandfs_get_checkpoint(struct nandfs_device *, struct nandfs_node *, uint64_t); int nandfs_set_checkpoint(struct nandfs_device *, struct nandfs_node *, uint64_t, struct nandfs_inode *, uint64_t); /* Segment management */ int nandfs_alloc_segment(struct nandfs_device *, uint64_t *); int nandfs_update_segment(struct nandfs_device *, uint64_t, uint32_t); int nandfs_free_segment(struct nandfs_device *, uint64_t); int nandfs_clear_segment(struct nandfs_device *, uint64_t); int nandfs_touch_segment(struct nandfs_device *, uint64_t); int nandfs_markgc_segment(struct nandfs_device *, uint64_t); int nandfs_bmap_insert_block(struct nandfs_node *, nandfs_lbn_t, struct buf *); int nandfs_bmap_update_block(struct nandfs_node *, struct buf *, nandfs_lbn_t); int nandfs_bmap_update_dat(struct nandfs_node *, nandfs_daddr_t, struct buf *); int nandfs_bmap_dirty_blocks(struct nandfs_node *, struct buf *, int); int nandfs_bmap_truncate_mapping(struct nandfs_node *, nandfs_lbn_t, nandfs_lbn_t); int nandfs_bmap_lookup(struct nandfs_node *, nandfs_lbn_t, nandfs_daddr_t *); /* dirent */ int nandfs_add_dirent(struct vnode *, uint64_t, char *, long, uint8_t); int nandfs_remove_dirent(struct vnode *, struct nandfs_node *, struct componentname *); int nandfs_update_dirent(struct vnode *, struct nandfs_node *, struct nandfs_node *); int nandfs_init_dir(struct vnode *, uint64_t, uint64_t); int nandfs_update_parent_dir(struct vnode *, uint64_t); void nandfs_vblk_set(struct buf *, nandfs_daddr_t); nandfs_daddr_t nandfs_vblk_get(struct buf *); void nandfs_inode_init(struct nandfs_inode *, uint16_t); void nandfs_inode_destroy(struct nandfs_inode *); /* ioctl */ int nandfs_get_seg_stat(struct nandfs_device *, struct nandfs_seg_stat *); int nandfs_chng_cpmode(struct nandfs_node *, struct nandfs_cpmode *); int nandfs_get_cpinfo_ioctl(struct nandfs_node *, struct nandfs_argv *); int nandfs_delete_cp(struct nandfs_node *, uint64_t start, uint64_t); int nandfs_make_snap(struct nandfs_device *, uint64_t *); int nandfs_delete_snap(struct nandfs_device *, uint64_t); int nandfs_get_cpstat(struct nandfs_node *, struct nandfs_cpstat *); int nandfs_get_segment_info_ioctl(struct nandfs_device *, struct nandfs_argv *); int nandfs_get_dat_vinfo_ioctl(struct nandfs_device *, struct nandfs_argv *); int nandfs_get_dat_bdescs_ioctl(struct nandfs_device *, struct nandfs_argv *); int nandfs_get_fsinfo(struct nandfsmount *, struct nandfs_fsinfo *); int nandfs_get_cpinfo(struct nandfs_node *, uint64_t, uint16_t, struct nandfs_cpinfo *, uint32_t, uint32_t *); nandfs_lbn_t nandfs_get_maxfilesize(struct nandfs_device *); int nandfs_write_superblock(struct nandfs_device *); extern int nandfs_sync_interval; extern int nandfs_max_dirty_segs; extern int nandfs_cps_between_sblocks; struct buf *nandfs_geteblk(int, int); void nandfs_dirty_bufs_increment(struct nandfs_device *); void nandfs_dirty_bufs_decrement(struct nandfs_device *); int nandfs_start_cleaner(struct nandfs_device *); int nandfs_stop_cleaner(struct nandfs_device *); int nandfs_segsum_valid(struct nandfs_segment_summary *); int nandfs_load_segsum(struct nandfs_device *, nandfs_daddr_t, struct nandfs_segment_summary *); int nandfs_get_segment_info(struct nandfs_device *, struct nandfs_suinfo *, uint32_t, uint64_t); int nandfs_get_segment_info_filter(struct nandfs_device *, struct nandfs_suinfo *, uint32_t, uint64_t, uint64_t *, uint32_t, uint32_t); int nandfs_get_dat_vinfo(struct nandfs_device *, struct nandfs_vinfo *, uint32_t); int nandfs_get_dat_bdescs(struct nandfs_device *, struct nandfs_bdesc *, uint32_t); #define NANDFS_VBLK_ASSIGNED 1 #define NANDFS_IS_INDIRECT(bp) ((bp)->b_lblkno < 0) int nandfs_erase(struct nandfs_device *, off_t, size_t); #define NANDFS_VOP_ISLOCKED(vp) nandfs_vop_islocked((vp)) int nandfs_vop_islocked(struct vnode *vp); nandfs_daddr_t nandfs_block_to_dblock(struct nandfs_device *, nandfs_lbn_t); #define DEBUG_MODE #if defined(DEBUG_MODE) #define nandfs_error panic #define nandfs_warning printf #elif defined(TEST_MODE) #define nandfs_error printf #define nandfs_warning printf #else #define nandfs_error(...) #define nandfs_warning(...) #endif #endif /* !_FS_NANDFS_NANDFS_SUBR_H_ */ Index: head/sys/fs/nandfs/nandfs_sufile.c =================================================================== --- head/sys/fs/nandfs/nandfs_sufile.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_sufile.c (revision 326268) @@ -1,569 +1,571 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SU_USAGE_OFF(bp, offset) \ ((struct nandfs_segment_usage *)((bp)->b_data + offset)) static int nandfs_seg_usage_blk_offset(struct nandfs_device *fsdev, uint64_t seg, uint64_t *blk, uint64_t *offset) { uint64_t off; uint16_t seg_size; seg_size = fsdev->nd_fsdata.f_segment_usage_size; off = roundup(sizeof(struct nandfs_sufile_header), seg_size); off += (seg * seg_size); *blk = off / fsdev->nd_blocksize; *offset = off % fsdev->nd_blocksize; return (0); } /* Alloc new segment */ int nandfs_alloc_segment(struct nandfs_device *fsdev, uint64_t *seg) { struct nandfs_node *su_node; struct nandfs_sufile_header *su_header; struct nandfs_segment_usage *su_usage; struct buf *bp_header, *bp; uint64_t blk, vblk, offset, i, rest, nsegments; uint16_t seg_size; int error, found; seg_size = fsdev->nd_fsdata.f_segment_usage_size; nsegments = fsdev->nd_fsdata.f_nsegments; su_node = fsdev->nd_su_node; ASSERT_VOP_LOCKED(NTOV(su_node), __func__); /* Read header buffer */ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header); if (error) { brelse(bp_header); return (error); } su_header = (struct nandfs_sufile_header *)bp_header->b_data; /* Get last allocated segment */ i = su_header->sh_last_alloc + 1; found = 0; bp = NULL; while (!found) { nandfs_seg_usage_blk_offset(fsdev, i, &blk, &offset); if(blk != 0) { error = nandfs_bmap_lookup(su_node, blk, &vblk); if (error) { nandfs_error("%s: cannot find vblk for blk " "blk:%jx\n", __func__, blk); return (error); } if (vblk) error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); else error = nandfs_bcreate(su_node, blk, NOCRED, 0, &bp); if (error) { nandfs_error("%s: cannot create/read " "vblk:%jx\n", __func__, vblk); if (bp) brelse(bp); return (error); } su_usage = SU_USAGE_OFF(bp, offset); } else { su_usage = SU_USAGE_OFF(bp_header, offset); bp = bp_header; } rest = (fsdev->nd_blocksize - offset) / seg_size; /* Go through all su usage in block */ while (rest) { /* When last check start from beginning */ if (i == nsegments) break; if (!su_usage->su_flags) { su_usage->su_flags = 1; found = 1; break; } su_usage++; i++; /* If all checked return error */ if (i == su_header->sh_last_alloc) { DPRINTF(SEG, ("%s: cannot allocate segment \n", __func__)); brelse(bp_header); if (blk != 0) brelse(bp); return (1); } rest--; } if (!found) { /* Otherwise read another block */ if (blk != 0) brelse(bp); if (i == nsegments) { blk = 0; i = 0; } else blk++; offset = 0; } } if (found) { *seg = i; su_header->sh_last_alloc = i; su_header->sh_ncleansegs--; su_header->sh_ndirtysegs++; fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs * fsdev->nd_fsdata.f_blocks_per_segment; fsdev->nd_clean_segs--; /* * It is mostly called from syncer() so we want to force * making buf dirty. */ error = nandfs_dirty_buf(bp_header, 1); if (error) { if (bp && bp != bp_header) brelse(bp); return (error); } if (bp && bp != bp_header) nandfs_dirty_buf(bp, 1); DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)i)); return (0); } DPRINTF(SEG, ("%s: failed\n", __func__)); return (1); } /* * Make buffer dirty, it will be updated soon but first it need to be * gathered by syncer. */ int nandfs_touch_segment(struct nandfs_device *fsdev, uint64_t seg) { struct nandfs_node *su_node; struct buf *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; ASSERT_VOP_LOCKED(NTOV(su_node), __func__); nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); nandfs_error("%s: cannot preallocate new segment\n", __func__); return (error); } else nandfs_dirty_buf(bp, 1); DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); return (error); } /* Update block count of segment */ int nandfs_update_segment(struct nandfs_device *fsdev, uint64_t seg, uint32_t nblks) { struct nandfs_node *su_node; struct nandfs_segment_usage *su_usage; struct buf *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; ASSERT_VOP_LOCKED(NTOV(su_node), __func__); nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { nandfs_error("%s: read block:%jx to update\n", __func__, blk); brelse(bp); return (error); } su_usage = SU_USAGE_OFF(bp, offset); su_usage->su_lastmod = fsdev->nd_ts.tv_sec; su_usage->su_flags = NANDFS_SEGMENT_USAGE_DIRTY; su_usage->su_nblocks += nblks; DPRINTF(SEG, ("%s: seg:%#jx inc:%#x cur:%#x\n", __func__, (uintmax_t)seg, nblks, su_usage->su_nblocks)); nandfs_dirty_buf(bp, 1); return (0); } /* Make segment free */ int nandfs_free_segment(struct nandfs_device *fsdev, uint64_t seg) { struct nandfs_node *su_node; struct nandfs_sufile_header *su_header; struct nandfs_segment_usage *su_usage; struct buf *bp_header, *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; ASSERT_VOP_LOCKED(NTOV(su_node), __func__); /* Read su header */ error = nandfs_bread(su_node, 0, NOCRED, 0, &bp_header); if (error) { brelse(bp_header); return (error); } su_header = (struct nandfs_sufile_header *)bp_header->b_data; nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); /* Read su usage block if other than su header block */ if (blk != 0) { error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); brelse(bp_header); return (error); } } else bp = bp_header; /* Reset su usage data */ su_usage = SU_USAGE_OFF(bp, offset); su_usage->su_lastmod = fsdev->nd_ts.tv_sec; su_usage->su_nblocks = 0; su_usage->su_flags = 0; /* Update clean/dirty counter in header */ su_header->sh_ncleansegs++; su_header->sh_ndirtysegs--; /* * Make buffers dirty, called by cleaner * so force dirty even if no much space left * on device */ nandfs_dirty_buf(bp_header, 1); if (bp != bp_header) nandfs_dirty_buf(bp, 1); /* Update free block count */ fsdev->nd_super.s_free_blocks_count = su_header->sh_ncleansegs * fsdev->nd_fsdata.f_blocks_per_segment; fsdev->nd_clean_segs++; DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); return (0); } static int nandfs_bad_segment(struct nandfs_device *fsdev, uint64_t seg) { struct nandfs_node *su_node; struct nandfs_segment_usage *su_usage; struct buf *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; ASSERT_VOP_LOCKED(NTOV(su_node), __func__); nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); return (error); } su_usage = SU_USAGE_OFF(bp, offset); su_usage->su_lastmod = fsdev->nd_ts.tv_sec; su_usage->su_flags = NANDFS_SEGMENT_USAGE_ERROR; DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); nandfs_dirty_buf(bp, 1); return (0); } int nandfs_markgc_segment(struct nandfs_device *fsdev, uint64_t seg) { struct nandfs_node *su_node; struct nandfs_segment_usage *su_usage; struct buf *bp; uint64_t blk, offset; int error; su_node = fsdev->nd_su_node; VOP_LOCK(NTOV(su_node), LK_EXCLUSIVE); nandfs_seg_usage_blk_offset(fsdev, seg, &blk, &offset); error = nandfs_bread(su_node, blk, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); return (error); } su_usage = SU_USAGE_OFF(bp, offset); MPASS((su_usage->su_flags & NANDFS_SEGMENT_USAGE_GC) == 0); su_usage->su_flags |= NANDFS_SEGMENT_USAGE_GC; brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); return (0); } int nandfs_clear_segment(struct nandfs_device *fsdev, uint64_t seg) { uint64_t offset, segsize; uint32_t bps, bsize; int error = 0; bps = fsdev->nd_fsdata.f_blocks_per_segment; bsize = fsdev->nd_blocksize; segsize = bsize * bps; nandfs_get_segment_range(fsdev, seg, &offset, NULL); offset *= bsize; DPRINTF(SEG, ("%s: seg:%#jx\n", __func__, (uintmax_t)seg)); /* Erase it and mark it bad when fail */ if (nandfs_erase(fsdev, offset, segsize)) error = nandfs_bad_segment(fsdev, seg); if (error) return (error); /* Mark it free */ error = nandfs_free_segment(fsdev, seg); return (error); } int nandfs_get_seg_stat(struct nandfs_device *nandfsdev, struct nandfs_seg_stat *nss) { struct nandfs_sufile_header *suhdr; struct nandfs_node *su_node; struct buf *bp; int err; su_node = nandfsdev->nd_su_node; NANDFS_WRITELOCK(nandfsdev); VOP_LOCK(NTOV(su_node), LK_SHARED); err = nandfs_bread(nandfsdev->nd_su_node, 0, NOCRED, 0, &bp); if (err) { brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); NANDFS_WRITEUNLOCK(nandfsdev); return (-1); } suhdr = (struct nandfs_sufile_header *)bp->b_data; nss->nss_nsegs = nandfsdev->nd_fsdata.f_nsegments; nss->nss_ncleansegs = suhdr->sh_ncleansegs; nss->nss_ndirtysegs = suhdr->sh_ndirtysegs; nss->nss_ctime = 0; nss->nss_nongc_ctime = nandfsdev->nd_ts.tv_sec; nss->nss_prot_seq = nandfsdev->nd_seg_sequence; brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); NANDFS_WRITEUNLOCK(nandfsdev); return (0); } int nandfs_get_segment_info_ioctl(struct nandfs_device *fsdev, struct nandfs_argv *nargv) { struct nandfs_suinfo *nsi; int error; if (nargv->nv_nmembs > NANDFS_SEGMENTS_MAX) return (EINVAL); nsi = malloc(sizeof(struct nandfs_suinfo) * nargv->nv_nmembs, M_NANDFSTEMP, M_WAITOK | M_ZERO); error = nandfs_get_segment_info(fsdev, nsi, nargv->nv_nmembs, nargv->nv_index); if (error == 0) error = copyout(nsi, (void *)(uintptr_t)nargv->nv_base, sizeof(struct nandfs_suinfo) * nargv->nv_nmembs); free(nsi, M_NANDFSTEMP); return (error); } int nandfs_get_segment_info(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment) { return (nandfs_get_segment_info_filter(fsdev, nsi, nmembs, segment, NULL, 0, 0)); } int nandfs_get_segment_info_filter(struct nandfs_device *fsdev, struct nandfs_suinfo *nsi, uint32_t nmembs, uint64_t segment, uint64_t *nsegs, uint32_t filter, uint32_t nfilter) { struct nandfs_segment_usage *su; struct nandfs_node *su_node; struct buf *bp; uint64_t curr, blocknr, blockoff, i; uint32_t flags; int err = 0; curr = ~(0); lockmgr(&fsdev->nd_seg_const, LK_EXCLUSIVE, NULL); su_node = fsdev->nd_su_node; VOP_LOCK(NTOV(su_node), LK_SHARED); bp = NULL; if (nsegs != NULL) *nsegs = 0; for (i = 0; i < nmembs; segment++) { if (segment == fsdev->nd_fsdata.f_nsegments) break; nandfs_seg_usage_blk_offset(fsdev, segment, &blocknr, &blockoff); if (i == 0 || curr != blocknr) { if (bp != NULL) brelse(bp); err = nandfs_bread(su_node, blocknr, NOCRED, 0, &bp); if (err) { goto out; } curr = blocknr; } su = SU_USAGE_OFF(bp, blockoff); flags = su->su_flags; if (segment == fsdev->nd_seg_num || segment == fsdev->nd_next_seg_num) flags |= NANDFS_SEGMENT_USAGE_ACTIVE; if (nfilter != 0 && (flags & nfilter) != 0) continue; if (filter != 0 && (flags & filter) == 0) continue; nsi->nsi_num = segment; nsi->nsi_lastmod = su->su_lastmod; nsi->nsi_blocks = su->su_nblocks; nsi->nsi_flags = flags; nsi++; i++; if (nsegs != NULL) (*nsegs)++; } out: if (bp != NULL) brelse(bp); VOP_UNLOCK(NTOV(su_node), 0); lockmgr(&fsdev->nd_seg_const, LK_RELEASE, NULL); return (err); } Index: head/sys/fs/nandfs/nandfs_vfsops.c =================================================================== --- head/sys/fs/nandfs/nandfs_vfsops.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_vfsops.c (revision 326268) @@ -1,1598 +1,1600 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_vfsops.c,v 1.1 2009/07/18 16:31:42 reinoud Exp */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_NANDFSMNT, "nandfs_mount", "NANDFS mount structure"); #define NANDFS_SET_SYSTEMFILE(vp) { \ (vp)->v_vflag |= VV_SYSTEM; \ vref(vp); \ vput(vp); } #define NANDFS_UNSET_SYSTEMFILE(vp) { \ VOP_LOCK(vp, LK_EXCLUSIVE); \ MPASS(vp->v_bufobj.bo_dirty.bv_cnt == 0); \ (vp)->v_vflag &= ~VV_SYSTEM; \ vgone(vp); \ vput(vp); } /* Globals */ struct _nandfs_devices nandfs_devices; /* Parameters */ int nandfs_verbose = 0; static void nandfs_tunable_init(void *arg) { TUNABLE_INT_FETCH("vfs.nandfs.verbose", &nandfs_verbose); } SYSINIT(nandfs_tunables, SI_SUB_VFS, SI_ORDER_ANY, nandfs_tunable_init, NULL); static SYSCTL_NODE(_vfs, OID_AUTO, nandfs, CTLFLAG_RD, 0, "NAND filesystem"); static SYSCTL_NODE(_vfs_nandfs, OID_AUTO, mount, CTLFLAG_RD, 0, "NANDFS mountpoints"); SYSCTL_INT(_vfs_nandfs, OID_AUTO, verbose, CTLFLAG_RW, &nandfs_verbose, 0, ""); #define NANDFS_CONSTR_INTERVAL 5 int nandfs_sync_interval = NANDFS_CONSTR_INTERVAL; /* sync every 5 seconds */ SYSCTL_UINT(_vfs_nandfs, OID_AUTO, sync_interval, CTLFLAG_RW, &nandfs_sync_interval, 0, ""); #define NANDFS_MAX_DIRTY_SEGS 5 int nandfs_max_dirty_segs = NANDFS_MAX_DIRTY_SEGS; /* sync when 5 dirty seg */ SYSCTL_UINT(_vfs_nandfs, OID_AUTO, max_dirty_segs, CTLFLAG_RW, &nandfs_max_dirty_segs, 0, ""); #define NANDFS_CPS_BETWEEN_SBLOCKS 5 int nandfs_cps_between_sblocks = NANDFS_CPS_BETWEEN_SBLOCKS; /* write superblock every 5 checkpoints */ SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cps_between_sblocks, CTLFLAG_RW, &nandfs_cps_between_sblocks, 0, ""); #define NANDFS_CLEANER_ENABLE 1 int nandfs_cleaner_enable = NANDFS_CLEANER_ENABLE; SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_enable, CTLFLAG_RW, &nandfs_cleaner_enable, 0, ""); #define NANDFS_CLEANER_INTERVAL 5 int nandfs_cleaner_interval = NANDFS_CLEANER_INTERVAL; SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_interval, CTLFLAG_RW, &nandfs_cleaner_interval, 0, ""); #define NANDFS_CLEANER_SEGMENTS 5 int nandfs_cleaner_segments = NANDFS_CLEANER_SEGMENTS; SYSCTL_UINT(_vfs_nandfs, OID_AUTO, cleaner_segments, CTLFLAG_RW, &nandfs_cleaner_segments, 0, ""); static int nandfs_mountfs(struct vnode *devvp, struct mount *mp); static vfs_mount_t nandfs_mount; static vfs_root_t nandfs_root; static vfs_statfs_t nandfs_statfs; static vfs_unmount_t nandfs_unmount; static vfs_vget_t nandfs_vget; static vfs_sync_t nandfs_sync; static const char *nandfs_opts[] = { "snap", "from", "noatime", NULL }; /* System nodes */ static int nandfs_create_system_nodes(struct nandfs_device *nandfsdev) { int error; error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_DAT_INO, &nandfsdev->nd_super_root.sr_dat, &nandfsdev->nd_dat_node); if (error) goto errorout; error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_CPFILE_INO, &nandfsdev->nd_super_root.sr_cpfile, &nandfsdev->nd_cp_node); if (error) goto errorout; error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_SUFILE_INO, &nandfsdev->nd_super_root.sr_sufile, &nandfsdev->nd_su_node); if (error) goto errorout; error = nandfs_get_node_raw(nandfsdev, NULL, NANDFS_GC_INO, NULL, &nandfsdev->nd_gc_node); if (error) goto errorout; NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node)); NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node)); NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node)); NANDFS_SET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node)); DPRINTF(VOLUMES, ("System vnodes: dat: %p cp: %p su: %p\n", NTOV(nandfsdev->nd_dat_node), NTOV(nandfsdev->nd_cp_node), NTOV(nandfsdev->nd_su_node))); return (0); errorout: nandfs_dispose_node(&nandfsdev->nd_gc_node); nandfs_dispose_node(&nandfsdev->nd_dat_node); nandfs_dispose_node(&nandfsdev->nd_cp_node); nandfs_dispose_node(&nandfsdev->nd_su_node); return (error); } static void nandfs_release_system_nodes(struct nandfs_device *nandfsdev) { if (!nandfsdev) return; if (nandfsdev->nd_refcnt > 0) return; if (nandfsdev->nd_gc_node) NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_gc_node)); if (nandfsdev->nd_dat_node) NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_dat_node)); if (nandfsdev->nd_cp_node) NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_cp_node)); if (nandfsdev->nd_su_node) NANDFS_UNSET_SYSTEMFILE(NTOV(nandfsdev->nd_su_node)); } static int nandfs_check_fsdata_crc(struct nandfs_fsdata *fsdata) { uint32_t fsdata_crc, comp_crc; if (fsdata->f_magic != NANDFS_FSDATA_MAGIC) return (0); /* Preserve CRC */ fsdata_crc = fsdata->f_sum; /* Calculate */ fsdata->f_sum = (0); comp_crc = crc32((uint8_t *)fsdata, fsdata->f_bytes); /* Restore */ fsdata->f_sum = fsdata_crc; /* Check CRC */ return (fsdata_crc == comp_crc); } static int nandfs_check_superblock_crc(struct nandfs_fsdata *fsdata, struct nandfs_super_block *super) { uint32_t super_crc, comp_crc; /* Check super block magic */ if (super->s_magic != NANDFS_SUPER_MAGIC) return (0); /* Preserve CRC */ super_crc = super->s_sum; /* Calculate */ super->s_sum = (0); comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes); /* Restore */ super->s_sum = super_crc; /* Check CRC */ return (super_crc == comp_crc); } static void nandfs_calc_superblock_crc(struct nandfs_fsdata *fsdata, struct nandfs_super_block *super) { uint32_t comp_crc; /* Calculate */ super->s_sum = 0; comp_crc = crc32((uint8_t *)super, fsdata->f_sbbytes); /* Restore */ super->s_sum = comp_crc; } static int nandfs_is_empty(u_char *area, int size) { int i; for (i = 0; i < size; i++) if (area[i] != 0xff) return (0); return (1); } static __inline int nandfs_sblocks_in_esize(struct nandfs_device *fsdev) { return ((fsdev->nd_erasesize - NANDFS_SBLOCK_OFFSET_BYTES) / sizeof(struct nandfs_super_block)); } static __inline int nandfs_max_sblocks(struct nandfs_device *fsdev) { return (NANDFS_NFSAREAS * nandfs_sblocks_in_esize(fsdev)); } static __inline int nandfs_sblocks_in_block(struct nandfs_device *fsdev) { return (fsdev->nd_devblocksize / sizeof(struct nandfs_super_block)); } #if 0 static __inline int nandfs_sblocks_in_first_block(struct nandfs_device *fsdev) { int n; n = nandfs_sblocks_in_block(fsdev) - NANDFS_SBLOCK_OFFSET_BYTES / sizeof(struct nandfs_super_block); if (n < 0) n = 0; return (n); } #endif static int nandfs_write_superblock_at(struct nandfs_device *fsdev, struct nandfs_fsarea *fstp) { struct nandfs_super_block *super, *supert; struct buf *bp; int sb_per_sector, sbs_in_fsd, read_block; int index, pos, error; off_t offset; DPRINTF(SYNC, ("%s: last_used %d nandfs_sblocks_in_esize %d\n", __func__, fstp->last_used, nandfs_sblocks_in_esize(fsdev))); if (fstp->last_used == nandfs_sblocks_in_esize(fsdev) - 1) index = 0; else index = fstp->last_used + 1; super = &fsdev->nd_super; supert = NULL; sb_per_sector = nandfs_sblocks_in_block(fsdev); sbs_in_fsd = sizeof(struct nandfs_fsdata) / sizeof(struct nandfs_super_block); index += sbs_in_fsd; offset = fstp->offset; DPRINTF(SYNC, ("%s: offset %#jx s_last_pseg %#jx s_last_cno %#jx " "s_last_seq %#jx wtime %jd index %d\n", __func__, offset, super->s_last_pseg, super->s_last_cno, super->s_last_seq, super->s_wtime, index)); read_block = btodb(offset + rounddown(index, sb_per_sector) * sizeof(struct nandfs_super_block)); DPRINTF(SYNC, ("%s: read_block %#x\n", __func__, read_block)); if (index == sbs_in_fsd) { error = nandfs_erase(fsdev, offset, fsdev->nd_erasesize); if (error) return (error); error = bread(fsdev->nd_devvp, btodb(offset), fsdev->nd_devblocksize, NOCRED, &bp); if (error) { printf("NANDFS: couldn't read initial data: %d\n", error); brelse(bp); return (error); } memcpy(bp->b_data, &fsdev->nd_fsdata, sizeof(fsdev->nd_fsdata)); /* * 0xff-out the rest. This bp could be cached, so potentially * b_data contains stale super blocks. * * We don't mind cached bp since most of the time we just add * super blocks to already 0xff-out b_data and don't need to * perform actual read. */ if (fsdev->nd_devblocksize > sizeof(fsdev->nd_fsdata)) memset(bp->b_data + sizeof(fsdev->nd_fsdata), 0xff, fsdev->nd_devblocksize - sizeof(fsdev->nd_fsdata)); error = bwrite(bp); if (error) { printf("NANDFS: cannot rewrite initial data at %jx\n", offset); return (error); } } error = bread(fsdev->nd_devvp, read_block, fsdev->nd_devblocksize, NOCRED, &bp); if (error) { brelse(bp); return (error); } supert = (struct nandfs_super_block *)(bp->b_data); pos = index % sb_per_sector; DPRINTF(SYNC, ("%s: storing at %d\n", __func__, pos)); memcpy(&supert[pos], super, sizeof(struct nandfs_super_block)); /* * See comment above in code that performs erase. */ if (pos == 0) memset(&supert[1], 0xff, (sb_per_sector - 1) * sizeof(struct nandfs_super_block)); error = bwrite(bp); if (error) { printf("NANDFS: cannot update superblock at %jx\n", offset); return (error); } DPRINTF(SYNC, ("%s: fstp->last_used %d -> %d\n", __func__, fstp->last_used, index - sbs_in_fsd)); fstp->last_used = index - sbs_in_fsd; return (0); } int nandfs_write_superblock(struct nandfs_device *fsdev) { struct nandfs_super_block *super; struct timespec ts; int error; int i, j; vfs_timestamp(&ts); super = &fsdev->nd_super; super->s_last_pseg = fsdev->nd_last_pseg; super->s_last_cno = fsdev->nd_last_cno; super->s_last_seq = fsdev->nd_seg_sequence; super->s_wtime = ts.tv_sec; nandfs_calc_superblock_crc(&fsdev->nd_fsdata, super); error = 0; for (i = 0, j = fsdev->nd_last_fsarea; i < NANDFS_NFSAREAS; i++, j = (j + 1 % NANDFS_NFSAREAS)) { if (fsdev->nd_fsarea[j].flags & NANDFS_FSSTOR_FAILED) { DPRINTF(SYNC, ("%s: skipping %d\n", __func__, j)); continue; } error = nandfs_write_superblock_at(fsdev, &fsdev->nd_fsarea[j]); if (error) { printf("NANDFS: writing superblock at offset %d failed:" "%d\n", j * fsdev->nd_erasesize, error); fsdev->nd_fsarea[j].flags |= NANDFS_FSSTOR_FAILED; } else break; } if (i == NANDFS_NFSAREAS) { printf("NANDFS: superblock was not written\n"); /* * TODO: switch to read-only? */ return (error); } else fsdev->nd_last_fsarea = (j + 1) % NANDFS_NFSAREAS; return (0); } static int nandfs_select_fsdata(struct nandfs_device *fsdev, struct nandfs_fsdata *fsdatat, struct nandfs_fsdata **fsdata, int nfsds) { int i; *fsdata = NULL; for (i = 0; i < nfsds; i++) { DPRINTF(VOLUMES, ("%s: i %d f_magic %x f_crc %x\n", __func__, i, fsdatat[i].f_magic, fsdatat[i].f_sum)); if (!nandfs_check_fsdata_crc(&fsdatat[i])) continue; *fsdata = &fsdatat[i]; break; } return (*fsdata != NULL ? 0 : EINVAL); } static int nandfs_select_sb(struct nandfs_device *fsdev, struct nandfs_super_block *supert, struct nandfs_super_block **super, int nsbs) { int i; *super = NULL; for (i = 0; i < nsbs; i++) { if (!nandfs_check_superblock_crc(&fsdev->nd_fsdata, &supert[i])) continue; DPRINTF(SYNC, ("%s: i %d s_last_cno %jx s_magic %x " "s_wtime %jd\n", __func__, i, supert[i].s_last_cno, supert[i].s_magic, supert[i].s_wtime)); if (*super == NULL || supert[i].s_last_cno > (*super)->s_last_cno) *super = &supert[i]; } return (*super != NULL ? 0 : EINVAL); } static int nandfs_read_structures_at(struct nandfs_device *fsdev, struct nandfs_fsarea *fstp, struct nandfs_fsdata *fsdata, struct nandfs_super_block *super) { struct nandfs_super_block *tsuper, *tsuperd; struct buf *bp; int error, read_size; int i; int offset; offset = fstp->offset; if (fsdev->nd_erasesize > MAXBSIZE) read_size = MAXBSIZE; else read_size = fsdev->nd_erasesize; error = bread(fsdev->nd_devvp, btodb(offset), read_size, NOCRED, &bp); if (error) { printf("couldn't read: %d\n", error); brelse(bp); fstp->flags |= NANDFS_FSSTOR_FAILED; return (error); } tsuper = super; memcpy(fsdata, bp->b_data, sizeof(struct nandfs_fsdata)); memcpy(tsuper, (bp->b_data + sizeof(struct nandfs_fsdata)), read_size - sizeof(struct nandfs_fsdata)); brelse(bp); tsuper += (read_size - sizeof(struct nandfs_fsdata)) / sizeof(struct nandfs_super_block); for (i = 1; i < fsdev->nd_erasesize / read_size; i++) { error = bread(fsdev->nd_devvp, btodb(offset + i * read_size), read_size, NOCRED, &bp); if (error) { printf("couldn't read: %d\n", error); brelse(bp); fstp->flags |= NANDFS_FSSTOR_FAILED; return (error); } memcpy(tsuper, bp->b_data, read_size); tsuper += read_size / sizeof(struct nandfs_super_block); brelse(bp); } tsuper -= 1; fstp->last_used = nandfs_sblocks_in_esize(fsdev) - 1; for (tsuperd = super - 1; (tsuper != tsuperd); tsuper -= 1) { if (nandfs_is_empty((u_char *)tsuper, sizeof(*tsuper))) fstp->last_used--; else break; } DPRINTF(VOLUMES, ("%s: last_used %d\n", __func__, fstp->last_used)); return (0); } static int nandfs_read_structures(struct nandfs_device *fsdev) { struct nandfs_fsdata *fsdata, *fsdatat; struct nandfs_super_block *sblocks, *ssblock; int nsbs, nfsds, i; int error = 0; int nrsbs; nfsds = NANDFS_NFSAREAS; nsbs = nandfs_max_sblocks(fsdev); fsdatat = malloc(sizeof(struct nandfs_fsdata) * nfsds, M_NANDFSTEMP, M_WAITOK | M_ZERO); sblocks = malloc(sizeof(struct nandfs_super_block) * nsbs, M_NANDFSTEMP, M_WAITOK | M_ZERO); nrsbs = 0; for (i = 0; i < NANDFS_NFSAREAS; i++) { fsdev->nd_fsarea[i].offset = i * fsdev->nd_erasesize; error = nandfs_read_structures_at(fsdev, &fsdev->nd_fsarea[i], &fsdatat[i], sblocks + nrsbs); if (error) continue; nrsbs += (fsdev->nd_fsarea[i].last_used + 1); if (fsdev->nd_fsarea[fsdev->nd_last_fsarea].last_used > fsdev->nd_fsarea[i].last_used) fsdev->nd_last_fsarea = i; } if (nrsbs == 0) { printf("nandfs: no valid superblocks found\n"); error = EINVAL; goto out; } error = nandfs_select_fsdata(fsdev, fsdatat, &fsdata, nfsds); if (error) goto out; memcpy(&fsdev->nd_fsdata, fsdata, sizeof(struct nandfs_fsdata)); error = nandfs_select_sb(fsdev, sblocks, &ssblock, nsbs); if (error) goto out; memcpy(&fsdev->nd_super, ssblock, sizeof(struct nandfs_super_block)); out: free(fsdatat, M_NANDFSTEMP); free(sblocks, M_NANDFSTEMP); if (error == 0) DPRINTF(VOLUMES, ("%s: selected sb with w_time %jd " "last_pseg %#jx\n", __func__, fsdev->nd_super.s_wtime, fsdev->nd_super.s_last_pseg)); return (error); } static void nandfs_unmount_base(struct nandfs_device *nandfsdev) { int error; if (!nandfsdev) return; /* Remove all our information */ error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0); if (error) { /* * Flushing buffers failed when fs was umounting, can't do * much now, just printf error and continue with umount. */ nandfs_error("%s(): error:%d when umounting FS\n", __func__, error); } /* Release the device's system nodes */ nandfs_release_system_nodes(nandfsdev); } static void nandfs_get_ncleanseg(struct nandfs_device *nandfsdev) { struct nandfs_seg_stat nss; nandfs_get_seg_stat(nandfsdev, &nss); nandfsdev->nd_clean_segs = nss.nss_ncleansegs; DPRINTF(VOLUMES, ("nandfs_mount: clean segs: %jx\n", (uintmax_t)nandfsdev->nd_clean_segs)); } static int nandfs_mount_base(struct nandfs_device *nandfsdev, struct mount *mp, struct nandfs_args *args) { uint32_t log_blocksize; int error; /* Flush out any old buffers remaining from a previous use. */ if ((error = vinvalbuf(nandfsdev->nd_devvp, V_SAVE, 0, 0))) return (error); error = nandfs_read_structures(nandfsdev); if (error) { printf("nandfs: could not get valid filesystem structures\n"); return (error); } if (nandfsdev->nd_fsdata.f_rev_level != NANDFS_CURRENT_REV) { printf("nandfs: unsupported file system revision: %d " "(supported is %d).\n", nandfsdev->nd_fsdata.f_rev_level, NANDFS_CURRENT_REV); return (EINVAL); } if (nandfsdev->nd_fsdata.f_erasesize != nandfsdev->nd_erasesize) { printf("nandfs: erasesize mismatch (device %#x, fs %#x)\n", nandfsdev->nd_erasesize, nandfsdev->nd_fsdata.f_erasesize); return (EINVAL); } /* Get our blocksize */ log_blocksize = nandfsdev->nd_fsdata.f_log_block_size; nandfsdev->nd_blocksize = (uint64_t) 1 << (log_blocksize + 10); DPRINTF(VOLUMES, ("%s: blocksize:%x\n", __func__, nandfsdev->nd_blocksize)); DPRINTF(VOLUMES, ("%s: accepted super block with cp %#jx\n", __func__, (uintmax_t)nandfsdev->nd_super.s_last_cno)); /* Calculate dat structure parameters */ nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_dat_mdt, nandfsdev->nd_fsdata.f_dat_entry_size); nandfs_calc_mdt_consts(nandfsdev, &nandfsdev->nd_ifile_mdt, nandfsdev->nd_fsdata.f_inode_size); /* Search for the super root and roll forward when needed */ if (nandfs_search_super_root(nandfsdev)) { printf("Cannot find valid SuperRoot\n"); return (EINVAL); } nandfsdev->nd_mount_state = nandfsdev->nd_super.s_state; if (nandfsdev->nd_mount_state != NANDFS_VALID_FS) { printf("FS is seriously damaged, needs repairing\n"); printf("aborting mount\n"); return (EINVAL); } /* * FS should be ok now. The superblock and the last segsum could be * updated from the repair so extract running values again. */ nandfsdev->nd_last_pseg = nandfsdev->nd_super.s_last_pseg; nandfsdev->nd_seg_sequence = nandfsdev->nd_super.s_last_seq; nandfsdev->nd_seg_num = nandfs_get_segnum_of_block(nandfsdev, nandfsdev->nd_last_pseg); nandfsdev->nd_next_seg_num = nandfs_get_segnum_of_block(nandfsdev, nandfsdev->nd_last_segsum.ss_next); nandfsdev->nd_ts.tv_sec = nandfsdev->nd_last_segsum.ss_create; nandfsdev->nd_last_cno = nandfsdev->nd_super.s_last_cno; nandfsdev->nd_fakevblk = 1; /* * FIXME: bogus calculation. Should use actual number of usable segments * instead of total amount. */ nandfsdev->nd_segs_reserved = nandfsdev->nd_fsdata.f_nsegments * nandfsdev->nd_fsdata.f_r_segments_percentage / 100; nandfsdev->nd_last_ino = NANDFS_USER_INO; DPRINTF(VOLUMES, ("%s: last_pseg %#jx last_cno %#jx last_seq %#jx\n" "fsdev: last_seg: seq %#jx num %#jx, next_seg_num %#jx " "segs_reserved %#jx\n", __func__, (uintmax_t)nandfsdev->nd_last_pseg, (uintmax_t)nandfsdev->nd_last_cno, (uintmax_t)nandfsdev->nd_seg_sequence, (uintmax_t)nandfsdev->nd_seg_sequence, (uintmax_t)nandfsdev->nd_seg_num, (uintmax_t)nandfsdev->nd_next_seg_num, (uintmax_t)nandfsdev->nd_segs_reserved)); DPRINTF(VOLUMES, ("nandfs_mount: accepted super root\n")); /* Create system vnodes for DAT, CP and SEGSUM */ error = nandfs_create_system_nodes(nandfsdev); if (error) nandfs_unmount_base(nandfsdev); nandfs_get_ncleanseg(nandfsdev); return (error); } static void nandfs_unmount_device(struct nandfs_device *nandfsdev) { /* Is there anything? */ if (nandfsdev == NULL) return; /* Remove the device only if we're the last reference */ nandfsdev->nd_refcnt--; if (nandfsdev->nd_refcnt >= 1) return; MPASS(nandfsdev->nd_syncer == NULL); MPASS(nandfsdev->nd_cleaner == NULL); MPASS(nandfsdev->nd_free_base == NULL); /* Unmount our base */ nandfs_unmount_base(nandfsdev); /* Remove from our device list */ SLIST_REMOVE(&nandfs_devices, nandfsdev, nandfs_device, nd_next_device); DROP_GIANT(); g_topology_lock(); g_vfs_close(nandfsdev->nd_gconsumer); g_topology_unlock(); PICKUP_GIANT(); DPRINTF(VOLUMES, ("closing device\n")); /* Clear our mount reference and release device node */ vrele(nandfsdev->nd_devvp); dev_rel(nandfsdev->nd_devvp->v_rdev); /* Free our device info */ cv_destroy(&nandfsdev->nd_sync_cv); mtx_destroy(&nandfsdev->nd_sync_mtx); cv_destroy(&nandfsdev->nd_clean_cv); mtx_destroy(&nandfsdev->nd_clean_mtx); mtx_destroy(&nandfsdev->nd_mutex); lockdestroy(&nandfsdev->nd_seg_const); free(nandfsdev, M_NANDFSMNT); } static int nandfs_check_mounts(struct nandfs_device *nandfsdev, struct mount *mp, struct nandfs_args *args) { struct nandfsmount *nmp; uint64_t last_cno; /* no double-mounting of the same checkpoint */ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { if (nmp->nm_mount_args.cpno == args->cpno) return (EBUSY); } /* Allow readonly mounts without questioning here */ if (mp->mnt_flag & MNT_RDONLY) return (0); /* Read/write mount */ STAILQ_FOREACH(nmp, &nandfsdev->nd_mounts, nm_next_mount) { /* Only one RW mount on this device! */ if ((nmp->nm_vfs_mountp->mnt_flag & MNT_RDONLY)==0) return (EROFS); /* RDONLY on last mountpoint is device busy */ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno; if (nmp->nm_mount_args.cpno == last_cno) return (EBUSY); } /* OK for now */ return (0); } static int nandfs_mount_device(struct vnode *devvp, struct mount *mp, struct nandfs_args *args, struct nandfs_device **nandfsdev_p) { struct nandfs_device *nandfsdev; struct g_provider *pp; struct g_consumer *cp; struct cdev *dev; uint32_t erasesize; int error, size; int ronly; DPRINTF(VOLUMES, ("Mounting NANDFS device\n")); ronly = (mp->mnt_flag & MNT_RDONLY) != 0; /* Look up device in our nandfs_mountpoints */ *nandfsdev_p = NULL; SLIST_FOREACH(nandfsdev, &nandfs_devices, nd_next_device) if (nandfsdev->nd_devvp == devvp) break; if (nandfsdev) { DPRINTF(VOLUMES, ("device already mounted\n")); error = nandfs_check_mounts(nandfsdev, mp, args); if (error) return error; nandfsdev->nd_refcnt++; *nandfsdev_p = nandfsdev; if (!ronly) { DROP_GIANT(); g_topology_lock(); error = g_access(nandfsdev->nd_gconsumer, 0, 1, 0); g_topology_unlock(); PICKUP_GIANT(); } return (error); } vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); dev = devvp->v_rdev; dev_ref(dev); DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "nandfs", ronly ? 0 : 1); pp = g_dev_getprovider(dev); g_topology_unlock(); PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); if (error) { dev_rel(dev); return (error); } nandfsdev = malloc(sizeof(struct nandfs_device), M_NANDFSMNT, M_WAITOK | M_ZERO); /* Initialise */ nandfsdev->nd_refcnt = 1; nandfsdev->nd_devvp = devvp; nandfsdev->nd_syncing = 0; nandfsdev->nd_cleaning = 0; nandfsdev->nd_gconsumer = cp; cv_init(&nandfsdev->nd_sync_cv, "nandfssync"); mtx_init(&nandfsdev->nd_sync_mtx, "nffssyncmtx", NULL, MTX_DEF); cv_init(&nandfsdev->nd_clean_cv, "nandfsclean"); mtx_init(&nandfsdev->nd_clean_mtx, "nffscleanmtx", NULL, MTX_DEF); mtx_init(&nandfsdev->nd_mutex, "nandfsdev lock", NULL, MTX_DEF); lockinit(&nandfsdev->nd_seg_const, PVFS, "nffssegcon", VLKTIMEOUT, LK_CANRECURSE); STAILQ_INIT(&nandfsdev->nd_mounts); nandfsdev->nd_devsize = pp->mediasize; nandfsdev->nd_devblocksize = pp->sectorsize; size = sizeof(erasesize); error = g_io_getattr("NAND::blocksize", nandfsdev->nd_gconsumer, &size, &erasesize); if (error) { DPRINTF(VOLUMES, ("couldn't get erasesize: %d\n", error)); if (error == ENOIOCTL || error == EOPNOTSUPP) { /* * We conclude that this is not NAND storage */ erasesize = NANDFS_DEF_ERASESIZE; } else { DROP_GIANT(); g_topology_lock(); g_vfs_close(nandfsdev->nd_gconsumer); g_topology_unlock(); PICKUP_GIANT(); dev_rel(dev); free(nandfsdev, M_NANDFSMNT); return (error); } } nandfsdev->nd_erasesize = erasesize; DPRINTF(VOLUMES, ("%s: erasesize %x\n", __func__, nandfsdev->nd_erasesize)); /* Register nandfs_device in list */ SLIST_INSERT_HEAD(&nandfs_devices, nandfsdev, nd_next_device); error = nandfs_mount_base(nandfsdev, mp, args); if (error) { /* Remove all our information */ nandfs_unmount_device(nandfsdev); return (EINVAL); } nandfsdev->nd_maxfilesize = nandfs_get_maxfilesize(nandfsdev); *nandfsdev_p = nandfsdev; DPRINTF(VOLUMES, ("NANDFS device mounted ok\n")); return (0); } static int nandfs_mount_checkpoint(struct nandfsmount *nmp) { struct nandfs_cpfile_header *cphdr; struct nandfs_checkpoint *cp; struct nandfs_inode ifile_inode; struct nandfs_node *cp_node; struct buf *bp; uint64_t ncp, nsn, cpno, fcpno, blocknr, last_cno; uint32_t off, dlen; int cp_per_block, error; cpno = nmp->nm_mount_args.cpno; if (cpno == 0) cpno = nmp->nm_nandfsdev->nd_super.s_last_cno; DPRINTF(VOLUMES, ("%s: trying to mount checkpoint number %"PRIu64"\n", __func__, cpno)); cp_node = nmp->nm_nandfsdev->nd_cp_node; VOP_LOCK(NTOV(cp_node), LK_SHARED); /* Get cpfile header from 1st block of cp file */ error = nandfs_bread(cp_node, 0, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (error); } cphdr = (struct nandfs_cpfile_header *) bp->b_data; ncp = cphdr->ch_ncheckpoints; nsn = cphdr->ch_nsnapshots; brelse(bp); DPRINTF(VOLUMES, ("mount_nandfs: checkpoint header read in\n")); DPRINTF(VOLUMES, ("\tNumber of checkpoints %"PRIu64"\n", ncp)); DPRINTF(VOLUMES, ("\tNumber of snapshots %"PRIu64"\n", nsn)); /* Read in our specified checkpoint */ dlen = nmp->nm_nandfsdev->nd_fsdata.f_checkpoint_size; cp_per_block = nmp->nm_nandfsdev->nd_blocksize / dlen; fcpno = cpno + NANDFS_CPFILE_FIRST_CHECKPOINT_OFFSET - 1; blocknr = fcpno / cp_per_block; off = (fcpno % cp_per_block) * dlen; error = nandfs_bread(cp_node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); printf("mount_nandfs: couldn't read cp block %"PRIu64"\n", fcpno); return (EINVAL); } /* Needs to be a valid checkpoint */ cp = (struct nandfs_checkpoint *) ((uint8_t *) bp->b_data + off); if (cp->cp_flags & NANDFS_CHECKPOINT_INVALID) { printf("mount_nandfs: checkpoint marked invalid\n"); brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (EINVAL); } /* Is this really the checkpoint we want? */ if (cp->cp_cno != cpno) { printf("mount_nandfs: checkpoint file corrupt? " "expected cpno %"PRIu64", found cpno %"PRIu64"\n", cpno, cp->cp_cno); brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (EINVAL); } /* Check if it's a snapshot ! */ last_cno = nmp->nm_nandfsdev->nd_super.s_last_cno; if (cpno != last_cno) { /* Only allow snapshots if not mounting on the last cp */ if ((cp->cp_flags & NANDFS_CHECKPOINT_SNAPSHOT) == 0) { printf( "mount_nandfs: checkpoint %"PRIu64" is not a " "snapshot\n", cpno); brelse(bp); VOP_UNLOCK(NTOV(cp_node), 0); return (EINVAL); } } ifile_inode = cp->cp_ifile_inode; brelse(bp); /* Get ifile inode */ error = nandfs_get_node_raw(nmp->nm_nandfsdev, NULL, NANDFS_IFILE_INO, &ifile_inode, &nmp->nm_ifile_node); if (error) { printf("mount_nandfs: can't read ifile node\n"); VOP_UNLOCK(NTOV(cp_node), 0); return (EINVAL); } NANDFS_SET_SYSTEMFILE(NTOV(nmp->nm_ifile_node)); VOP_UNLOCK(NTOV(cp_node), 0); /* Get root node? */ return (0); } static void free_nandfs_mountinfo(struct mount *mp) { struct nandfsmount *nmp = VFSTONANDFS(mp); if (nmp == NULL) return; free(nmp, M_NANDFSMNT); } void nandfs_wakeup_wait_sync(struct nandfs_device *nffsdev, int reason) { char *reasons[] = { "umount", "vfssync", "bdflush", "fforce", "fsync", "ro_upd" }; DPRINTF(SYNC, ("%s: %s\n", __func__, reasons[reason])); mtx_lock(&nffsdev->nd_sync_mtx); if (nffsdev->nd_syncing) cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx); if (reason == SYNCER_UMOUNT) nffsdev->nd_syncer_exit = 1; nffsdev->nd_syncing = 1; wakeup(&nffsdev->nd_syncing); cv_wait(&nffsdev->nd_sync_cv, &nffsdev->nd_sync_mtx); mtx_unlock(&nffsdev->nd_sync_mtx); } static void nandfs_gc_finished(struct nandfs_device *nffsdev, int exit) { int error; mtx_lock(&nffsdev->nd_sync_mtx); nffsdev->nd_syncing = 0; DPRINTF(SYNC, ("%s: cleaner finish\n", __func__)); cv_broadcast(&nffsdev->nd_sync_cv); mtx_unlock(&nffsdev->nd_sync_mtx); if (!exit) { error = tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval); DPRINTF(SYNC, ("%s: cleaner waked up: %d\n", __func__, error)); } } static void nandfs_syncer(struct nandfsmount *nmp) { struct nandfs_device *nffsdev; struct mount *mp; int flags, error; mp = nmp->nm_vfs_mountp; nffsdev = nmp->nm_nandfsdev; tsleep(&nffsdev->nd_syncing, PRIBIO, "-", hz * nandfs_sync_interval); while (!nffsdev->nd_syncer_exit) { DPRINTF(SYNC, ("%s: syncer run\n", __func__)); nffsdev->nd_syncing = 1; flags = (nmp->nm_flags & (NANDFS_FORCE_SYNCER | NANDFS_UMOUNT)); error = nandfs_segment_constructor(nmp, flags); if (error) nandfs_error("%s: error:%d when creating segments\n", __func__, error); nmp->nm_flags &= ~flags; nandfs_gc_finished(nffsdev, 0); } MPASS(nffsdev->nd_cleaner == NULL); error = nandfs_segment_constructor(nmp, NANDFS_FORCE_SYNCER | NANDFS_UMOUNT); if (error) nandfs_error("%s: error:%d when creating segments\n", __func__, error); nandfs_gc_finished(nffsdev, 1); nffsdev->nd_syncer = NULL; MPASS(nffsdev->nd_free_base == NULL); DPRINTF(SYNC, ("%s: exiting\n", __func__)); kthread_exit(); } static int start_syncer(struct nandfsmount *nmp) { int error; MPASS(nmp->nm_nandfsdev->nd_syncer == NULL); DPRINTF(SYNC, ("%s: start syncer\n", __func__)); nmp->nm_nandfsdev->nd_syncer_exit = 0; error = kthread_add((void(*)(void *))nandfs_syncer, nmp, NULL, &nmp->nm_nandfsdev->nd_syncer, 0, 0, "nandfs_syncer"); if (error) printf("nandfs: could not start syncer: %d\n", error); return (error); } static int stop_syncer(struct nandfsmount *nmp) { MPASS(nmp->nm_nandfsdev->nd_syncer != NULL); nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_UMOUNT); DPRINTF(SYNC, ("%s: stop syncer\n", __func__)); return (0); } /* * Mount null layer */ static int nandfs_mount(struct mount *mp) { struct nandfsmount *nmp; struct vnode *devvp; struct nameidata nd; struct vfsoptlist *opts; struct thread *td; char *from; int error = 0, flags; DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp)); td = curthread; opts = mp->mnt_optnew; if (vfs_filteropt(opts, nandfs_opts)) return (EINVAL); /* * Update is a no-op */ if (mp->mnt_flag & MNT_UPDATE) { nmp = VFSTONANDFS(mp); if (vfs_flagopt(mp->mnt_optnew, "export", NULL, 0)) { return (error); } if (!(nmp->nm_ronly) && vfs_flagopt(opts, "ro", NULL, 0)) { vn_start_write(NULL, &mp, V_WAIT); error = VFS_SYNC(mp, MNT_WAIT); if (error) return (error); vn_finished_write(mp); flags = WRITECLOSE; if (mp->mnt_flag & MNT_FORCE) flags |= FORCECLOSE; nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_ROUPD); error = vflush(mp, 0, flags, td); if (error) return (error); nandfs_stop_cleaner(nmp->nm_nandfsdev); stop_syncer(nmp); DROP_GIANT(); g_topology_lock(); g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0); g_topology_unlock(); PICKUP_GIANT(); MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); nmp->nm_ronly = 1; } else if ((nmp->nm_ronly) && !vfs_flagopt(opts, "ro", NULL, 0)) { /* * Don't allow read-write snapshots. */ if (nmp->nm_mount_args.cpno != 0) return (EROFS); /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ devvp = nmp->nm_nandfsdev->nd_devvp; vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); error = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); if (error) { error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { VOP_UNLOCK(devvp, 0); return (error); } } VOP_UNLOCK(devvp, 0); DROP_GIANT(); g_topology_lock(); error = g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, 1, 0); g_topology_unlock(); PICKUP_GIANT(); if (error) return (error); MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_RDONLY; MNT_IUNLOCK(mp); error = start_syncer(nmp); if (error == 0) error = nandfs_start_cleaner(nmp->nm_nandfsdev); if (error) { DROP_GIANT(); g_topology_lock(); g_access(nmp->nm_nandfsdev->nd_gconsumer, 0, -1, 0); g_topology_unlock(); PICKUP_GIANT(); return (error); } nmp->nm_ronly = 0; } return (0); } from = vfs_getopts(opts, "from", &error); if (error) return (error); /* * Find device node */ NDINIT(&nd, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, from, curthread); error = namei(&nd); if (error) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); devvp = nd.ni_vp; if (!vn_isdisk(devvp, &error)) { vput(devvp); return (error); } /* Check the access rights on the mount device */ error = VOP_ACCESS(devvp, VREAD, curthread->td_ucred, curthread); if (error) error = priv_check(curthread, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } vfs_getnewfsid(mp); error = nandfs_mountfs(devvp, mp); if (error) return (error); vfs_mountedfrom(mp, from); return (0); } static int nandfs_mountfs(struct vnode *devvp, struct mount *mp) { struct nandfsmount *nmp = NULL; struct nandfs_args *args = NULL; struct nandfs_device *nandfsdev; char *from; int error, ronly; char *cpno; ronly = (mp->mnt_flag & MNT_RDONLY) != 0; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; VOP_UNLOCK(devvp, 0); if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; from = vfs_getopts(mp->mnt_optnew, "from", &error); if (error) goto error; error = vfs_getopt(mp->mnt_optnew, "snap", (void **)&cpno, NULL); if (error == ENOENT) cpno = NULL; else if (error) goto error; args = (struct nandfs_args *)malloc(sizeof(struct nandfs_args), M_NANDFSMNT, M_WAITOK | M_ZERO); if (cpno != NULL) args->cpno = strtoul(cpno, (char **)NULL, 10); else args->cpno = 0; args->fspec = from; if (args->cpno != 0 && !ronly) { error = EROFS; goto error; } printf("WARNING: NANDFS is considered to be a highly experimental " "feature in FreeBSD.\n"); error = nandfs_mount_device(devvp, mp, args, &nandfsdev); if (error) goto error; nmp = (struct nandfsmount *) malloc(sizeof(struct nandfsmount), M_NANDFSMNT, M_WAITOK | M_ZERO); mp->mnt_data = nmp; nmp->nm_vfs_mountp = mp; nmp->nm_ronly = ronly; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_USES_BCACHE; MNT_IUNLOCK(mp); nmp->nm_nandfsdev = nandfsdev; /* Add our mountpoint */ STAILQ_INSERT_TAIL(&nandfsdev->nd_mounts, nmp, nm_next_mount); if (args->cpno > nandfsdev->nd_last_cno) { printf("WARNING: supplied checkpoint number (%jd) is greater " "than last known checkpoint on filesystem (%jd). Mounting" " checkpoint %jd\n", (uintmax_t)args->cpno, (uintmax_t)nandfsdev->nd_last_cno, (uintmax_t)nandfsdev->nd_last_cno); args->cpno = nandfsdev->nd_last_cno; } /* Setting up other parameters */ nmp->nm_mount_args = *args; free(args, M_NANDFSMNT); error = nandfs_mount_checkpoint(nmp); if (error) { nandfs_unmount(mp, MNT_FORCE); goto unmounted; } if (!ronly) { error = start_syncer(nmp); if (error == 0) error = nandfs_start_cleaner(nmp->nm_nandfsdev); if (error) nandfs_unmount(mp, MNT_FORCE); } return (0); error: if (args != NULL) free(args, M_NANDFSMNT); if (nmp != NULL) { free(nmp, M_NANDFSMNT); mp->mnt_data = NULL; } unmounted: return (error); } static int nandfs_unmount(struct mount *mp, int mntflags) { struct nandfs_device *nandfsdev; struct nandfsmount *nmp; int error; int flags = 0; DPRINTF(VOLUMES, ("%s: mp = %p\n", __func__, (void *)mp)); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; nmp = mp->mnt_data; nandfsdev = nmp->nm_nandfsdev; error = vflush(mp, 0, flags | SKIPSYSTEM, curthread); if (error) return (error); if (!(nmp->nm_ronly)) { nandfs_stop_cleaner(nandfsdev); stop_syncer(nmp); } if (nmp->nm_ifile_node) NANDFS_UNSET_SYSTEMFILE(NTOV(nmp->nm_ifile_node)); /* Remove our mount point */ STAILQ_REMOVE(&nandfsdev->nd_mounts, nmp, nandfsmount, nm_next_mount); /* Unmount the device itself when we're the last one */ nandfs_unmount_device(nandfsdev); free_nandfs_mountinfo(mp); /* * Finally, throw away the null_mount structure */ mp->mnt_data = 0; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (0); } static int nandfs_statfs(struct mount *mp, struct statfs *sbp) { struct nandfsmount *nmp; struct nandfs_device *nandfsdev; struct nandfs_fsdata *fsdata; struct nandfs_super_block *sb; struct nandfs_block_group_desc *groups; struct nandfs_node *ifile; struct nandfs_mdt *mdt; struct buf *bp; int i, error; uint32_t entries_per_group; uint64_t files = 0; nmp = mp->mnt_data; nandfsdev = nmp->nm_nandfsdev; fsdata = &nandfsdev->nd_fsdata; sb = &nandfsdev->nd_super; ifile = nmp->nm_ifile_node; mdt = &nandfsdev->nd_ifile_mdt; entries_per_group = mdt->entries_per_group; VOP_LOCK(NTOV(ifile), LK_SHARED); error = nandfs_bread(ifile, 0, NOCRED, 0, &bp); if (error) { brelse(bp); VOP_UNLOCK(NTOV(ifile), 0); return (error); } groups = (struct nandfs_block_group_desc *)bp->b_data; for (i = 0; i < mdt->groups_per_desc_block; i++) files += (entries_per_group - groups[i].bg_nfrees); brelse(bp); VOP_UNLOCK(NTOV(ifile), 0); sbp->f_bsize = nandfsdev->nd_blocksize; sbp->f_iosize = sbp->f_bsize; sbp->f_blocks = fsdata->f_blocks_per_segment * fsdata->f_nsegments; sbp->f_bfree = sb->s_free_blocks_count; sbp->f_bavail = sbp->f_bfree; sbp->f_files = files; sbp->f_ffree = 0; return (0); } static int nandfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct nandfsmount *nmp = VFSTONANDFS(mp); struct nandfs_node *node; int error; error = nandfs_get_node(nmp, NANDFS_ROOT_INO, &node); if (error) return (error); KASSERT(NTOV(node)->v_vflag & VV_ROOT, ("root_vp->v_vflag & VV_ROOT")); *vpp = NTOV(node); return (error); } static int nandfs_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct nandfsmount *nmp = VFSTONANDFS(mp); struct nandfs_node *node; int error; error = nandfs_get_node(nmp, ino, &node); if (node) *vpp = NTOV(node); return (error); } static int nandfs_sync(struct mount *mp, int waitfor) { struct nandfsmount *nmp = VFSTONANDFS(mp); DPRINTF(SYNC, ("%s: mp %p waitfor %d\n", __func__, mp, waitfor)); /* * XXX: A hack to be removed soon */ if (waitfor == MNT_LAZY) return (0); if (waitfor == MNT_SUSPEND) return (0); nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_VFS_SYNC); return (0); } static struct vfsops nandfs_vfsops = { .vfs_init = nandfs_init, .vfs_mount = nandfs_mount, .vfs_root = nandfs_root, .vfs_statfs = nandfs_statfs, .vfs_uninit = nandfs_uninit, .vfs_unmount = nandfs_unmount, .vfs_vget = nandfs_vget, .vfs_sync = nandfs_sync, }; VFS_SET(nandfs_vfsops, nandfs, VFCF_LOOPBACK); Index: head/sys/fs/nandfs/nandfs_vnops.c =================================================================== --- head/sys/fs/nandfs/nandfs_vnops.c (revision 326267) +++ head/sys/fs/nandfs/nandfs_vnops.c (revision 326268) @@ -1,2437 +1,2439 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2010-2012 Semihalf * Copyright (c) 2008, 2009 Reinoud Zandijk * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * From: NetBSD: nilfs_vnops.c,v 1.2 2009/08/26 03:40:48 elad */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern uma_zone_t nandfs_node_zone; static void nandfs_read_filebuf(struct nandfs_node *, struct buf *); static void nandfs_itimes_locked(struct vnode *); static int nandfs_truncate(struct vnode *, uint64_t); static vop_pathconf_t nandfs_pathconf; #define UPDATE_CLOSE 0 #define UPDATE_WAIT 0 static int nandfs_inactive(struct vop_inactive_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error = 0; DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, node)); if (node == NULL) { DPRINTF(NODE, ("%s: inactive NULL node\n", __func__)); return (0); } if (node->nn_inode.i_mode != 0 && !(node->nn_inode.i_links_count)) { nandfs_truncate(vp, 0); error = nandfs_node_destroy(node); if (error) nandfs_error("%s: destroy node: %p\n", __func__, node); node->nn_flags = 0; vrecycle(vp); } return (error); } static int nandfs_reclaim(struct vop_reclaim_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nandfs_node = VTON(vp); struct nandfs_device *fsdev = nandfs_node->nn_nandfsdev; uint64_t ino = nandfs_node->nn_ino; DPRINTF(VNCALL, ("%s: vp:%p node:%p\n", __func__, vp, nandfs_node)); /* Invalidate all entries to a particular vnode. */ cache_purge(vp); /* Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); /* Remove from vfs hash if not system vnode */ if (!NANDFS_SYS_NODE(nandfs_node->nn_ino)) vfs_hash_remove(vp); /* Dispose all node knowledge */ nandfs_dispose_node(&nandfs_node); if (!NANDFS_SYS_NODE(ino)) NANDFS_WRITEUNLOCK(fsdev); return (0); } static int nandfs_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct uio *uio = ap->a_uio; struct buf *bp; uint64_t size; uint32_t blocksize; off_t bytesinfile; ssize_t toread, off; daddr_t lbn; ssize_t resid; int error = 0; if (uio->uio_resid == 0) return (0); size = node->nn_inode.i_size; if (uio->uio_offset >= size) return (0); blocksize = nandfsdev->nd_blocksize; bytesinfile = size - uio->uio_offset; resid = omin(uio->uio_resid, bytesinfile); while (resid) { lbn = uio->uio_offset / blocksize; off = uio->uio_offset & (blocksize - 1); toread = omin(resid, blocksize - off); DPRINTF(READ, ("nandfs_read bn: 0x%jx toread: 0x%zx (0x%x)\n", (uintmax_t)lbn, toread, blocksize)); error = nandfs_bread(node, lbn, NOCRED, 0, &bp); if (error) { brelse(bp); break; } error = uiomove(bp->b_data + off, toread, uio); if (error) { brelse(bp); break; } brelse(bp); resid -= toread; } return (error); } static int nandfs_write(struct vop_write_args *ap) { struct nandfs_device *fsdev; struct nandfs_node *node; struct vnode *vp; struct uio *uio; struct buf *bp; uint64_t file_size, vblk; uint32_t blocksize; ssize_t towrite, off; daddr_t lbn; ssize_t resid; int error, ioflag, modified; vp = ap->a_vp; uio = ap->a_uio; ioflag = ap->a_ioflag; node = VTON(vp); fsdev = node->nn_nandfsdev; if (nandfs_fs_full(fsdev)) return (ENOSPC); DPRINTF(WRITE, ("nandfs_write called %#zx at %#jx\n", uio->uio_resid, (uintmax_t)uio->uio_offset)); if (uio->uio_offset < 0) return (EINVAL); if (uio->uio_resid == 0) return (0); blocksize = fsdev->nd_blocksize; file_size = node->nn_inode.i_size; switch (vp->v_type) { case VREG: if (ioflag & IO_APPEND) uio->uio_offset = file_size; break; case VDIR: return (EISDIR); case VLNK: break; default: panic("%s: bad file type vp: %p", __func__, vp); } /* If explicitly asked to append, uio_offset can be wrong? */ if (ioflag & IO_APPEND) uio->uio_offset = file_size; resid = uio->uio_resid; modified = error = 0; while (uio->uio_resid) { lbn = uio->uio_offset / blocksize; off = uio->uio_offset & (blocksize - 1); towrite = omin(uio->uio_resid, blocksize - off); DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x)\n", __func__, (uintmax_t)lbn, towrite, blocksize)); error = nandfs_bmap_lookup(node, lbn, &vblk); if (error) break; DPRINTF(WRITE, ("%s: lbn: 0x%jd toread: 0x%zx (0x%x) " "vblk=%jx\n", __func__, (uintmax_t)lbn, towrite, blocksize, vblk)); if (vblk != 0) error = nandfs_bread(node, lbn, NOCRED, 0, &bp); else error = nandfs_bcreate(node, lbn, NOCRED, 0, &bp); DPRINTF(WRITE, ("%s: vp %p bread bp %p lbn %#jx\n", __func__, vp, bp, (uintmax_t)lbn)); if (error) { if (bp) brelse(bp); break; } error = uiomove((char *)bp->b_data + off, (int)towrite, uio); if (error) break; error = nandfs_dirty_buf(bp, 0); if (error) break; modified++; } /* XXX proper handling when only part of file was properly written */ if (modified) { if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) node->nn_inode.i_mode &= ~(ISUID | ISGID); if (file_size < uio->uio_offset + uio->uio_resid) { node->nn_inode.i_size = uio->uio_offset + uio->uio_resid; node->nn_flags |= IN_CHANGE | IN_UPDATE; vnode_pager_setsize(vp, uio->uio_offset + uio->uio_resid); nandfs_itimes(vp); } } DPRINTF(WRITE, ("%s: return:%d\n", __func__, error)); return (error); } static int nandfs_lookup(struct vop_cachedlookup_args *ap) { struct vnode *dvp, **vpp; struct componentname *cnp; struct ucred *cred; struct thread *td; struct nandfs_node *dir_node, *node; struct nandfsmount *nmp; uint64_t ino, off; const char *name; int namelen, nameiop, islastcn, mounted_ro; int error, found; DPRINTF(VNCALL, ("%s\n", __func__)); dvp = ap->a_dvp; vpp = ap->a_vpp; *vpp = NULL; cnp = ap->a_cnp; cred = cnp->cn_cred; td = cnp->cn_thread; dir_node = VTON(dvp); nmp = dir_node->nn_nmp; /* Simplify/clarification flags */ nameiop = cnp->cn_nameiop; islastcn = cnp->cn_flags & ISLASTCN; mounted_ro = dvp->v_mount->mnt_flag & MNT_RDONLY; /* * If requesting a modify on the last path element on a read-only * filingsystem, reject lookup; */ if (islastcn && mounted_ro && (nameiop == DELETE || nameiop == RENAME)) return (EROFS); if (dir_node->nn_inode.i_links_count == 0) return (ENOENT); /* * Obviously, the file is not (anymore) in the namecache, we have to * search for it. There are three basic cases: '.', '..' and others. * * Following the guidelines of VOP_LOOKUP manpage and tmpfs. */ error = 0; if ((cnp->cn_namelen == 1) && (cnp->cn_nameptr[0] == '.')) { DPRINTF(LOOKUP, ("\tlookup '.'\n")); /* Special case 1 '.' */ VREF(dvp); *vpp = dvp; /* Done */ } else if (cnp->cn_flags & ISDOTDOT) { /* Special case 2 '..' */ DPRINTF(LOOKUP, ("\tlookup '..'\n")); /* Get our node */ name = ".."; namelen = 2; error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino, &found, &off); if (error) goto out; if (!found) error = ENOENT; /* First unlock parent */ VOP_UNLOCK(dvp, 0); if (error == 0) { DPRINTF(LOOKUP, ("\tfound '..'\n")); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { DPRINTF(LOOKUP, ("\tnode retrieved/created OK\n")); *vpp = NTOV(node); } } /* Try to relock parent */ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); } else { DPRINTF(LOOKUP, ("\tlookup file\n")); /* All other files */ /* Look up filename in the directory returning its inode */ name = cnp->cn_nameptr; namelen = cnp->cn_namelen; error = nandfs_lookup_name_in_dir(dvp, name, namelen, &ino, &found, &off); if (error) goto out; if (!found) { DPRINTF(LOOKUP, ("\tNOT found\n")); /* * UGH, didn't find name. If we're creating or * renaming on the last name this is OK and we ought * to return EJUSTRETURN if its allowed to be created. */ error = ENOENT; if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cred, td); if (!error) { /* keep the component name */ cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; } } /* Done */ } else { if (ino == NANDFS_WHT_INO) cnp->cn_flags |= ISWHITEOUT; if ((cnp->cn_flags & ISWHITEOUT) && (nameiop == LOOKUP)) return (ENOENT); if ((nameiop == DELETE) && islastcn) { if ((cnp->cn_flags & ISWHITEOUT) && (cnp->cn_flags & DOWHITEOUT)) { cnp->cn_flags |= SAVENAME; dir_node->nn_diroff = off; return (EJUSTRETURN); } error = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } if ((dir_node->nn_inode.i_mode & ISVTX) && cred->cr_uid != 0 && cred->cr_uid != dir_node->nn_inode.i_uid && node->nn_inode.i_uid != cred->cr_uid) { vput(*vpp); *vpp = NULL; return (EPERM); } } else if ((nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cred, cnp->cn_thread); if (error) return (error); /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } } else { /* Try to create/reuse the node */ error = nandfs_get_node(nmp, ino, &node); if (!error) { *vpp = NTOV(node); node->nn_diroff = off; } } } } out: /* * Store result in the cache if requested. If we are creating a file, * the file might not be found and thus putting it into the namecache * might be seen as negative caching. */ if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(dvp, *vpp, cnp); return (error); } static int nandfs_getattr(struct vop_getattr_args *ap) { struct vnode *vp = ap->a_vp; struct vattr *vap = ap->a_vap; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; DPRINTF(VNCALL, ("%s: vp: %p\n", __func__, vp)); nandfs_itimes(vp); /* Basic info */ VATTR_NULL(vap); vap->va_atime.tv_sec = inode->i_mtime; vap->va_atime.tv_nsec = inode->i_mtime_nsec; vap->va_mtime.tv_sec = inode->i_mtime; vap->va_mtime.tv_nsec = inode->i_mtime_nsec; vap->va_ctime.tv_sec = inode->i_ctime; vap->va_ctime.tv_nsec = inode->i_ctime_nsec; vap->va_type = IFTOVT(inode->i_mode); vap->va_mode = inode->i_mode & ~S_IFMT; vap->va_nlink = inode->i_links_count; vap->va_uid = inode->i_uid; vap->va_gid = inode->i_gid; vap->va_rdev = inode->i_special; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->nn_ino; vap->va_size = inode->i_size; vap->va_blocksize = node->nn_nandfsdev->nd_blocksize; vap->va_gen = 0; vap->va_flags = inode->i_flags; vap->va_bytes = inode->i_blocks * vap->va_blocksize; vap->va_filerev = 0; vap->va_vaflags = 0; return (0); } static int nandfs_vtruncbuf(struct vnode *vp, uint64_t nblks) { struct nandfs_device *nffsdev; struct bufobj *bo; struct buf *bp, *nbp; bo = &vp->v_bufobj; nffsdev = VTON(vp)->nn_nandfsdev; ASSERT_VOP_LOCKED(vp, "nandfs_truncate"); restart: BO_LOCK(bo); restart_locked: TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < nblks) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_NOWAIT, NULL)) goto restart_locked; bremfree(bp); bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); BO_UNLOCK(bo); brelse(bp); BO_LOCK(bo); } TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) { if (bp->b_lblkno < nblks) continue; if (BUF_LOCK(bp, LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK, BO_LOCKPTR(bo)) == ENOLCK) goto restart; bp->b_flags |= (B_INVAL | B_RELBUF); bp->b_flags &= ~(B_ASYNC | B_MANAGED); brelse(bp); nandfs_dirty_bufs_decrement(nffsdev); BO_LOCK(bo); } BO_UNLOCK(bo); return (0); } static int nandfs_truncate(struct vnode *vp, uint64_t newsize) { struct nandfs_device *nffsdev; struct nandfs_node *node; struct nandfs_inode *inode; struct buf *bp = NULL; uint64_t oblks, nblks, vblk, size, rest; int error; node = VTON(vp); nffsdev = node->nn_nandfsdev; inode = &node->nn_inode; /* Calculate end of file */ size = inode->i_size; if (newsize == size) { node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (0); } if (newsize > size) { inode->i_size = newsize; vnode_pager_setsize(vp, newsize); node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (0); } nblks = howmany(newsize, nffsdev->nd_blocksize); oblks = howmany(size, nffsdev->nd_blocksize); rest = newsize % nffsdev->nd_blocksize; if (rest) { error = nandfs_bmap_lookup(node, nblks - 1, &vblk); if (error) return (error); if (vblk != 0) error = nandfs_bread(node, nblks - 1, NOCRED, 0, &bp); else error = nandfs_bcreate(node, nblks - 1, NOCRED, 0, &bp); if (error) { if (bp) brelse(bp); return (error); } bzero((char *)bp->b_data + rest, (u_int)(nffsdev->nd_blocksize - rest)); error = nandfs_dirty_buf(bp, 0); if (error) return (error); } DPRINTF(VNCALL, ("%s: vp %p oblks %jx nblks %jx\n", __func__, vp, oblks, nblks)); error = nandfs_bmap_truncate_mapping(node, oblks - 1, nblks - 1); if (error) { if (bp) nandfs_undirty_buf(bp); return (error); } error = nandfs_vtruncbuf(vp, nblks); if (error) { if (bp) nandfs_undirty_buf(bp); return (error); } inode->i_size = newsize; vnode_pager_setsize(vp, newsize); node->nn_flags |= IN_CHANGE | IN_UPDATE; nandfs_itimes(vp); return (error); } static void nandfs_itimes_locked(struct vnode *vp) { struct nandfs_node *node; struct nandfs_inode *inode; struct timespec ts; ASSERT_VI_LOCKED(vp, __func__); node = VTON(vp); inode = &node->nn_inode; if ((node->nn_flags & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) return; if (((vp->v_mount->mnt_kern_flag & (MNTK_SUSPENDED | MNTK_SUSPEND)) == 0) || (node->nn_flags & (IN_CHANGE | IN_UPDATE))) node->nn_flags |= IN_MODIFIED; vfs_timestamp(&ts); if (node->nn_flags & IN_UPDATE) { inode->i_mtime = ts.tv_sec; inode->i_mtime_nsec = ts.tv_nsec; } if (node->nn_flags & IN_CHANGE) { inode->i_ctime = ts.tv_sec; inode->i_ctime_nsec = ts.tv_nsec; } node->nn_flags &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); } void nandfs_itimes(struct vnode *vp) { VI_LOCK(vp); nandfs_itimes_locked(vp); VI_UNLOCK(vp); } static int nandfs_chmod(struct vnode *vp, int mode, struct ucred *cred, struct thread *td) { struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; uint16_t nmode; int error = 0; DPRINTF(VNCALL, ("%s: vp %p, mode %x, cred %p, td %p\n", __func__, vp, mode, cred, td)); /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. Both of these are allowed in * jail(8). */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) return (EFTYPE); } if (!groupmember(inode->i_gid, cred) && (mode & ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); if (error) return (error); } /* * Deny setting setuid if we are not the file owner. */ if ((mode & ISUID) && inode->i_uid != cred->cr_uid) { error = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); if (error) return (error); } nmode = inode->i_mode; nmode &= ~ALLPERMS; nmode |= (mode & ALLPERMS); inode->i_mode = nmode; node->nn_flags |= IN_CHANGE; DPRINTF(VNCALL, ("%s: to mode %x\n", __func__, nmode)); return (error); } static int nandfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *td) { struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; uid_t ouid; gid_t ogid; int error = 0; if (uid == (uid_t)VNOVAL) uid = inode->i_uid; if (gid == (gid_t)VNOVAL) gid = inode->i_gid; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESSX(vp, VWRITE_OWNER, cred, td))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if (((uid != inode->i_uid && uid != cred->cr_uid) || (gid != inode->i_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) return (error); ogid = inode->i_gid; ouid = inode->i_uid; inode->i_gid = gid; inode->i_uid = uid; node->nn_flags |= IN_CHANGE; if ((inode->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) inode->i_mode &= ~(ISUID | ISGID); } DPRINTF(VNCALL, ("%s: vp %p, cred %p, td %p - ret OK\n", __func__, vp, cred, td)); return (0); } static int nandfs_setattr(struct vop_setattr_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; struct vattr *vap = ap->a_vap; struct ucred *cred = ap->a_cred; struct thread *td = curthread; uint32_t flags; int error = 0; if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { DPRINTF(VNCALL, ("%s: unsettable attribute\n", __func__)); return (EINVAL); } if (vap->va_flags != VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p flags:%lx\n", __func__, vp, td, vap->va_flags)); if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. * Privileged non-jail processes may not modify system flags * if securelevel > 0 and any existing system flags are set. * Privileged jail processes behave like privileged non-jail * processes if the security.jail.chflags_allowed sysctl is * is non-zero; otherwise, they behave like unprivileged * processes. */ flags = inode->i_flags; if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } /* Snapshot flag cannot be set or cleared */ if (((vap->va_flags & SF_SNAPSHOT) != 0 && (flags & SF_SNAPSHOT) == 0) || ((vap->va_flags & SF_SNAPSHOT) == 0 && (flags & SF_SNAPSHOT) != 0)) return (EPERM); inode->i_flags = vap->va_flags; } else { if (flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || (vap->va_flags & UF_SETTABLE) != vap->va_flags) return (EPERM); flags &= SF_SETTABLE; flags |= (vap->va_flags & UF_SETTABLE); inode->i_flags = flags; } node->nn_flags |= IN_CHANGE; if (vap->va_flags & (IMMUTABLE | APPEND)) return (0); } if (inode->i_flags & (IMMUTABLE | APPEND)) return (EPERM); if (vap->va_size != (u_quad_t)VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p size:%jx\n", __func__, vp, td, (uintmax_t)vap->va_size)); switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); if ((inode->i_flags & SF_SNAPSHOT) != 0) return (EPERM); break; default: return (0); } if (vap->va_size > node->nn_nandfsdev->nd_maxfilesize) return (EFBIG); KASSERT((vp->v_type == VREG), ("Set size %d", vp->v_type)); nandfs_truncate(vp, vap->va_size); node->nn_flags |= IN_CHANGE; return (0); } if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); DPRINTF(VNCALL, ("%s: vp:%p td:%p uid/gid %x/%x\n", __func__, vp, td, vap->va_uid, vap->va_gid)); error = nandfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); if (error) return (error); } if (vap->va_mode != (mode_t)VNOVAL) { if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); DPRINTF(VNCALL, ("%s: vp:%p td:%p mode %x\n", __func__, vp, td, vap->va_mode)); error = nandfs_chmod(vp, (int)vap->va_mode, cred, td); if (error) return (error); } if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_birthtime.tv_sec != VNOVAL) { DPRINTF(VNCALL, ("%s: vp:%p td:%p time a/m/b %jx/%jx/%jx\n", __func__, vp, td, (uintmax_t)vap->va_atime.tv_sec, (uintmax_t)vap->va_mtime.tv_sec, (uintmax_t)vap->va_birthtime.tv_sec)); if (vap->va_atime.tv_sec != VNOVAL) node->nn_flags |= IN_ACCESS; if (vap->va_mtime.tv_sec != VNOVAL) node->nn_flags |= IN_CHANGE | IN_UPDATE; if (vap->va_birthtime.tv_sec != VNOVAL) node->nn_flags |= IN_MODIFIED; nandfs_itimes(vp); return (0); } return (0); } static int nandfs_open(struct vop_open_args *ap) { struct nandfs_node *node = VTON(ap->a_vp); uint64_t filesize; DPRINTF(VNCALL, ("nandfs_open called ap->a_mode %x\n", ap->a_mode)); if (ap->a_vp->v_type == VCHR || ap->a_vp->v_type == VBLK) return (EOPNOTSUPP); if ((node->nn_inode.i_flags & APPEND) && (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) return (EPERM); filesize = node->nn_inode.i_size; vnode_create_vobject(ap->a_vp, filesize, ap->a_td); return (0); } static int nandfs_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); mtx_lock(&vp->v_interlock); if (vp->v_usecount > 1) nandfs_itimes_locked(vp); mtx_unlock(&vp->v_interlock); return (0); } static int nandfs_check_possible(struct vnode *vp, struct vattr *vap, mode_t mode) { /* Check if we are allowed to write */ switch (vap->va_type) { case VDIR: case VLNK: case VREG: /* * Normal nodes: check if we're on a read-only mounted * filingsystem and bomb out if we're trying to write. */ if ((mode & VMODIFY_PERMS) && (vp->v_mount->mnt_flag & MNT_RDONLY)) return (EROFS); break; case VBLK: case VCHR: case VSOCK: case VFIFO: /* * Special nodes: even on read-only mounted filingsystems * these are allowed to be written to if permissions allow. */ break; default: /* No idea what this is */ return (EINVAL); } /* No one may write immutable files */ if ((mode & VWRITE) && (VTON(vp)->nn_inode.i_flags & IMMUTABLE)) return (EPERM); return (0); } static int nandfs_check_permitted(struct vnode *vp, struct vattr *vap, mode_t mode, struct ucred *cred) { return (vaccess(vp->v_type, vap->va_mode, vap->va_uid, vap->va_gid, mode, cred, NULL)); } static int nandfs_advlock(struct vop_advlock_args *ap) { struct nandfs_node *nvp; quad_t size; nvp = VTON(ap->a_vp); size = nvp->nn_inode.i_size; return (lf_advlock(ap, &(nvp->nn_lockf), size)); } static int nandfs_access(struct vop_access_args *ap) { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; struct ucred *cred = ap->a_cred; struct vattr vap; int error; DPRINTF(VNCALL, ("%s: vp:%p mode: %x\n", __func__, vp, accmode)); error = VOP_GETATTR(vp, &vap, NULL); if (error) return (error); error = nandfs_check_possible(vp, &vap, accmode); if (error) return (error); error = nandfs_check_permitted(vp, &vap, accmode, cred); return (error); } static int nandfs_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nvp = VTON(vp); printf("\tvp=%p, nandfs_node=%p\n", vp, nvp); printf("nandfs inode %#jx\n", (uintmax_t)nvp->nn_ino); printf("flags = 0x%b\n", (u_int)nvp->nn_flags, PRINT_NODE_FLAGS); return (0); } static void nandfs_read_filebuf(struct nandfs_node *node, struct buf *bp) { struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct buf *nbp; nandfs_daddr_t vblk, pblk; nandfs_lbn_t from; uint32_t blocksize; int error = 0; int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; /* * Translate all the block sectors into a series of buffers to read * asynchronously from the nandfs device. Note that this lookup may * induce readin's too. */ blocksize = nandfsdev->nd_blocksize; if (bp->b_bcount / blocksize != 1) panic("invalid b_count in bp %p\n", bp); from = bp->b_blkno; DPRINTF(READ, ("\tread in from inode %#jx blkno %#jx" " count %#lx\n", (uintmax_t)node->nn_ino, from, bp->b_bcount)); /* Get virtual block numbers for the vnode's buffer span */ error = nandfs_bmap_lookup(node, from, &vblk); if (error) { bp->b_error = EINVAL; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } /* Translate virtual block numbers to physical block numbers */ error = nandfs_vtop(node, vblk, &pblk); if (error) { bp->b_error = EINVAL; bp->b_ioflags |= BIO_ERROR; bufdone(bp); return; } /* Issue translated blocks */ bp->b_resid = bp->b_bcount; /* Note virtual block 0 marks not mapped */ if (vblk == 0) { vfs_bio_clrbuf(bp); bufdone(bp); return; } nbp = bp; nbp->b_blkno = pblk * blk2dev; bp->b_iooffset = dbtob(nbp->b_blkno); MPASS(bp->b_iooffset >= 0); BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, nbp); nandfs_vblk_set(bp, vblk); DPRINTF(READ, ("read_filebuf : ino %#jx blk %#jx -> " "%#jx -> %#jx [bp %p]\n", (uintmax_t)node->nn_ino, (uintmax_t)(from), (uintmax_t)vblk, (uintmax_t)pblk, nbp)); } static void nandfs_write_filebuf(struct nandfs_node *node, struct buf *bp) { struct nandfs_device *nandfsdev = node->nn_nandfsdev; bp->b_iooffset = dbtob(bp->b_blkno); MPASS(bp->b_iooffset >= 0); BO_STRATEGY(&nandfsdev->nd_devvp->v_bufobj, bp); } static int nandfs_strategy(struct vop_strategy_args *ap) { struct vnode *vp = ap->a_vp; struct buf *bp = ap->a_bp; struct nandfs_node *node = VTON(vp); /* check if we ought to be here */ KASSERT((vp->v_type != VBLK && vp->v_type != VCHR), ("nandfs_strategy on type %d", vp->v_type)); /* Translate if needed and pass on */ if (bp->b_iocmd == BIO_READ) { nandfs_read_filebuf(node, bp); return (0); } /* Send to segment collector */ nandfs_write_filebuf(node, bp); return (0); } static int nandfs_readdir(struct vop_readdir_args *ap) { struct uio *uio = ap->a_uio; struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); struct nandfs_dir_entry *ndirent; struct dirent dirent; struct buf *bp; uint64_t file_size, diroffset, transoffset, blkoff; uint64_t blocknr; uint32_t blocksize = node->nn_nandfsdev->nd_blocksize; uint8_t *pos, name_len; int error; DPRINTF(READDIR, ("nandfs_readdir called\n")); if (vp->v_type != VDIR) return (ENOTDIR); file_size = node->nn_inode.i_size; DPRINTF(READDIR, ("nandfs_readdir filesize %jd resid %zd\n", (uintmax_t)file_size, uio->uio_resid )); /* We are called just as long as we keep on pushing data in */ error = 0; if ((uio->uio_offset < file_size) && (uio->uio_resid >= sizeof(struct dirent))) { diroffset = uio->uio_offset; transoffset = diroffset; blocknr = diroffset / blocksize; blkoff = diroffset % blocksize; error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } while (diroffset < file_size) { DPRINTF(READDIR, ("readdir : offset = %"PRIu64"\n", diroffset)); if (blkoff >= blocksize) { blkoff = 0; blocknr++; brelse(bp); error = nandfs_bread(node, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (EIO); } } /* Read in one dirent */ pos = (uint8_t *)bp->b_data + blkoff; ndirent = (struct nandfs_dir_entry *)pos; name_len = ndirent->name_len; memset(&dirent, 0, sizeof(struct dirent)); dirent.d_fileno = ndirent->inode; if (dirent.d_fileno) { dirent.d_type = ndirent->file_type; dirent.d_namlen = name_len; strncpy(dirent.d_name, ndirent->name, name_len); dirent.d_reclen = GENERIC_DIRSIZ(&dirent); DPRINTF(READDIR, ("copying `%*.*s`\n", name_len, name_len, dirent.d_name)); } /* * If there isn't enough space in the uio to return a * whole dirent, break off read */ if (uio->uio_resid < GENERIC_DIRSIZ(&dirent)) break; /* Transfer */ if (dirent.d_fileno) uiomove(&dirent, GENERIC_DIRSIZ(&dirent), uio); /* Advance */ diroffset += ndirent->rec_len; blkoff += ndirent->rec_len; /* Remember the last entry we transferred */ transoffset = diroffset; } brelse(bp); /* Pass on last transferred offset */ uio->uio_offset = transoffset; } if (ap->a_eofflag) *ap->a_eofflag = (uio->uio_offset >= file_size); return (error); } static int nandfs_dirempty(struct vnode *dvp, uint64_t parentino, struct ucred *cred) { struct nandfs_node *dnode = VTON(dvp); struct nandfs_dir_entry *dirent; uint64_t file_size = dnode->nn_inode.i_size; uint64_t blockcount = dnode->nn_inode.i_blocks; uint64_t blocknr; uint32_t blocksize = dnode->nn_nandfsdev->nd_blocksize; uint32_t limit; uint32_t off; uint8_t *pos; struct buf *bp; int error; DPRINTF(LOOKUP, ("%s: dvp %p parentino %#jx cred %p\n", __func__, dvp, (uintmax_t)parentino, cred)); KASSERT((file_size != 0), ("nandfs_dirempty for NULL dir %p", dvp)); blocknr = 0; while (blocknr < blockcount) { error = nandfs_bread(dnode, blocknr, NOCRED, 0, &bp); if (error) { brelse(bp); return (0); } pos = (uint8_t *)bp->b_data; off = 0; if (blocknr == (blockcount - 1)) limit = file_size % blocksize; else limit = blocksize; while (off < limit) { dirent = (struct nandfs_dir_entry *)(pos + off); off += dirent->rec_len; if (dirent->inode == 0) continue; switch (dirent->name_len) { case 0: break; case 1: if (dirent->name[0] != '.') goto notempty; KASSERT(dirent->inode == dnode->nn_ino, (".'s inode does not match dir")); break; case 2: if (dirent->name[0] != '.' && dirent->name[1] != '.') goto notempty; KASSERT(dirent->inode == parentino, ("..'s inode does not match parent")); break; default: goto notempty; } } brelse(bp); blocknr++; } return (1); notempty: brelse(bp); return (0); } static int nandfs_link(struct vop_link_args *ap) { struct vnode *tdvp = ap->a_tdvp; struct vnode *vp = ap->a_vp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *node = VTON(vp); struct nandfs_inode *inode = &node->nn_inode; int error; if (inode->i_links_count >= LINK_MAX) return (EMLINK); if (inode->i_flags & (IMMUTABLE | APPEND)) return (EPERM); /* Update link count */ inode->i_links_count++; /* Add dir entry */ error = nandfs_add_dirent(tdvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(inode->i_mode)); if (error) { inode->i_links_count--; } node->nn_flags |= IN_CHANGE; nandfs_itimes(vp); DPRINTF(VNCALL, ("%s: tdvp %p vp %p cnp %p\n", __func__, tdvp, vp, cnp)); return (0); } static int nandfs_create(struct vop_create_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int error; DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); /* Create new vnode/inode */ error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; /* Add new dir entry */ error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode)); if (error) { if (nandfs_node_destroy(node)) { nandfs_error("%s: error destroying node %p\n", __func__, node); } return (error); } *vpp = NTOV(node); if ((cnp->cn_flags & MAKEENTRY) != 0) cache_enter(dvp, *vpp, cnp); DPRINTF(VNCALL, ("created file vp %p nandnode %p ino %jx\n", *vpp, node, (uintmax_t)node->nn_ino)); return (0); } static int nandfs_remove(struct vop_remove_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct nandfs_node *node = VTON(vp); struct nandfs_node *dnode = VTON(dvp); struct componentname *cnp = ap->a_cnp; DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx link %d\n", __func__, dvp, vp, node, (uintmax_t)node->nn_ino, node->nn_inode.i_links_count)); if (vp->v_type == VDIR) return (EISDIR); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->nn_inode.i_flags & APPEND)) return (EPERM); nandfs_remove_dirent(dvp, node, cnp); node->nn_inode.i_links_count--; node->nn_flags |= IN_CHANGE; return (0); } /* * Check if source directory is in the path of the target directory. * Target is supplied locked, source is unlocked. * The target is always vput before returning. */ static int nandfs_checkpath(struct nandfs_node *src, struct nandfs_node *dest, struct ucred *cred) { struct vnode *vp; int error, rootino; struct nandfs_dir_entry dirent; vp = NTOV(dest); if (src->nn_ino == dest->nn_ino) { error = EEXIST; goto out; } rootino = NANDFS_ROOT_INO; error = 0; if (dest->nn_ino == rootino) goto out; for (;;) { if (vp->v_type != VDIR) { error = ENOTDIR; break; } error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirent, NANDFS_DIR_REC_LEN(2), (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, NULL); if (error != 0) break; if (dirent.name_len != 2 || dirent.name[0] != '.' || dirent.name[1] != '.') { error = ENOTDIR; break; } if (dirent.inode == src->nn_ino) { error = EINVAL; break; } if (dirent.inode == rootino) break; vput(vp); if ((error = VFS_VGET(vp->v_mount, dirent.inode, LK_EXCLUSIVE, &vp)) != 0) { vp = NULL; break; } } out: if (error == ENOTDIR) printf("checkpath: .. not a directory\n"); if (vp != NULL) vput(vp); return (error); } static int nandfs_rename(struct vop_rename_args *ap) { struct vnode *tvp = ap->a_tvp; struct vnode *tdvp = ap->a_tdvp; struct vnode *fvp = ap->a_fvp; struct vnode *fdvp = ap->a_fdvp; struct componentname *tcnp = ap->a_tcnp; struct componentname *fcnp = ap->a_fcnp; int doingdirectory = 0, oldparent = 0, newparent = 0; int error = 0; struct nandfs_node *fdnode, *fnode, *fnode1; struct nandfs_node *tdnode = VTON(tdvp); struct nandfs_node *tnode; uint32_t tdflags, fflags, fdflags; uint16_t mode; DPRINTF(VNCALL, ("%s: fdvp:%p fvp:%p tdvp:%p tdp:%p\n", __func__, fdvp, fvp, tdvp, tvp)); /* * Check for cross-device rename. */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; abortit: if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); return (error); } tdflags = tdnode->nn_inode.i_flags; if (tvp && ((VTON(tvp)->nn_inode.i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdflags & APPEND))) { error = EPERM; goto abortit; } /* * Renaming a file to itself has no effect. The upper layers should * not call us in that case. Temporarily just warn if they do. */ if (fvp == tvp) { printf("nandfs_rename: fvp == tvp (can't happen)\n"); error = 0; goto abortit; } if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) goto abortit; fdnode = VTON(fdvp); fnode = VTON(fvp); if (fnode->nn_inode.i_links_count >= LINK_MAX) { VOP_UNLOCK(fvp, 0); error = EMLINK; goto abortit; } fflags = fnode->nn_inode.i_flags; fdflags = fdnode->nn_inode.i_flags; if ((fflags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdflags & APPEND)) { VOP_UNLOCK(fvp, 0); error = EPERM; goto abortit; } mode = fnode->nn_inode.i_mode; if ((mode & S_IFMT) == S_IFDIR) { /* * Avoid ".", "..", and aliases of "." for obvious reasons. */ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || (fdvp == fvp) || ((fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT) || (fnode->nn_flags & IN_RENAME)) { VOP_UNLOCK(fvp, 0); error = EINVAL; goto abortit; } fnode->nn_flags |= IN_RENAME; doingdirectory = 1; DPRINTF(VNCALL, ("%s: doingdirectory dvp %p\n", __func__, tdvp)); oldparent = fdnode->nn_ino; } vrele(fdvp); tnode = NULL; if (tvp) tnode = VTON(tvp); /* * Bump link count on fvp while we are moving stuff around. If we * crash before completing the work, the link count may be wrong * but correctable. */ fnode->nn_inode.i_links_count++; /* Check for in path moving XXX */ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); VOP_UNLOCK(fvp, 0); if (oldparent != tdnode->nn_ino) newparent = tdnode->nn_ino; if (doingdirectory && newparent) { if (error) /* write access check above */ goto bad; if (tnode != NULL) vput(tvp); error = nandfs_checkpath(fnode, tdnode, tcnp->cn_cred); if (error) goto out; VREF(tdvp); error = relookup(tdvp, &tvp, tcnp); if (error) goto out; vrele(tdvp); tdnode = VTON(tdvp); tnode = NULL; if (tvp) tnode = VTON(tvp); } /* * If the target doesn't exist, link the target to the source and * unlink the source. Otherwise, rewrite the target directory to * reference the source and remove the original entry. */ if (tvp == NULL) { /* * Account for ".." in new directory. */ if (doingdirectory && fdvp != tdvp) tdnode->nn_inode.i_links_count++; DPRINTF(VNCALL, ("%s: new entry in dvp:%p\n", __func__, tdvp)); /* * Add name in new directory. */ error = nandfs_add_dirent(tdvp, fnode->nn_ino, tcnp->cn_nameptr, tcnp->cn_namelen, IFTODT(fnode->nn_inode.i_mode)); if (error) { if (doingdirectory && fdvp != tdvp) tdnode->nn_inode.i_links_count--; goto bad; } vput(tdvp); } else { /* * If the parent directory is "sticky", then the user must * own the parent directory, or the destination of the rename, * otherwise the destination may not be changed (except by * root). This implements append-only directories. */ if ((tdnode->nn_inode.i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && tcnp->cn_cred->cr_uid != tdnode->nn_inode.i_uid && tnode->nn_inode.i_uid != tcnp->cn_cred->cr_uid) { error = EPERM; goto bad; } /* * Target must be empty if a directory and have no links * to it. Also, ensure source and target are compatible * (both directories, or both not directories). */ mode = tnode->nn_inode.i_mode; if ((mode & S_IFMT) == S_IFDIR) { if (!nandfs_dirempty(tvp, tdnode->nn_ino, tcnp->cn_cred)) { error = ENOTEMPTY; goto bad; } if (!doingdirectory) { error = ENOTDIR; goto bad; } /* * Update name cache since directory is going away. */ cache_purge(tdvp); } else if (doingdirectory) { error = EISDIR; goto bad; } DPRINTF(VNCALL, ("%s: update entry dvp:%p\n", __func__, tdvp)); /* * Change name tcnp in tdvp to point at fvp. */ error = nandfs_update_dirent(tdvp, fnode, tnode); if (error) goto bad; if (doingdirectory && !newparent) tdnode->nn_inode.i_links_count--; vput(tdvp); tnode->nn_inode.i_links_count--; vput(tvp); tnode = NULL; } /* * Unlink the source. */ fcnp->cn_flags &= ~MODMASK; fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; VREF(fdvp); error = relookup(fdvp, &fvp, fcnp); if (error == 0) vrele(fdvp); if (fvp != NULL) { fnode1 = VTON(fvp); fdnode = VTON(fdvp); } else { /* * From name has disappeared. */ if (doingdirectory) panic("nandfs_rename: lost dir entry"); vrele(ap->a_fvp); return (0); } DPRINTF(VNCALL, ("%s: unlink source fnode:%p\n", __func__, fnode)); /* * Ensure that the directory entry still exists and has not * changed while the new name has been entered. If the source is * a file then the entry may have been unlinked or renamed. In * either case there is no further work to be done. If the source * is a directory then it cannot have been rmdir'ed; its link * count of three would cause a rmdir to fail with ENOTEMPTY. * The IN_RENAME flag ensures that it cannot be moved by another * rename. */ if (fnode != fnode1) { if (doingdirectory) panic("nandfs: lost dir entry"); } else { /* * If the source is a directory with a * new parent, the link count of the old * parent directory must be decremented * and ".." set to point to the new parent. */ if (doingdirectory && newparent) { DPRINTF(VNCALL, ("%s: new parent %#jx -> %#jx\n", __func__, (uintmax_t) oldparent, (uintmax_t) newparent)); error = nandfs_update_parent_dir(fvp, newparent); if (!error) { fdnode->nn_inode.i_links_count--; fdnode->nn_flags |= IN_CHANGE; } } error = nandfs_remove_dirent(fdvp, fnode, fcnp); if (!error) { fnode->nn_inode.i_links_count--; fnode->nn_flags |= IN_CHANGE; } fnode->nn_flags &= ~IN_RENAME; } if (fdnode) vput(fdvp); if (fnode) vput(fvp); vrele(ap->a_fvp); return (error); bad: DPRINTF(VNCALL, ("%s: error:%d\n", __func__, error)); if (tnode) vput(NTOV(tnode)); vput(NTOV(tdnode)); out: if (doingdirectory) fnode->nn_flags &= ~IN_RENAME; if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { fnode->nn_inode.i_links_count--; fnode->nn_flags |= IN_CHANGE; fnode->nn_flags &= ~IN_RENAME; vput(fvp); } else vrele(fvp); return (error); } static int nandfs_mkdir(struct vop_mkdir_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfs_inode *dir_inode = &dir_node->nn_inode; struct nandfs_node *node; struct nandfsmount *nmp = dir_node->nn_nmp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); int error; DPRINTF(VNCALL, ("%s: dvp %p\n", __func__, dvp)); if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); if (dir_inode->i_links_count >= LINK_MAX) return (EMLINK); error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; *vpp = NTOV(node); error = nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode)); if (error) { vput(*vpp); return (error); } dir_node->nn_inode.i_links_count++; dir_node->nn_flags |= IN_CHANGE; error = nandfs_init_dir(NTOV(node), node->nn_ino, dir_node->nn_ino); if (error) { vput(NTOV(node)); return (error); } DPRINTF(VNCALL, ("created dir vp %p nandnode %p ino %jx\n", *vpp, node, (uintmax_t)node->nn_ino)); return (0); } static int nandfs_mknod(struct vop_mknod_args *ap) { struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct vattr *vap = ap->a_vap; uint16_t mode = MAKEIMODE(vap->va_type, vap->va_mode); struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int error; if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); error = nandfs_node_create(nmp, &node, mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; if (vap->va_rdev != VNOVAL) node->nn_inode.i_special = vap->va_rdev; *vpp = NTOV(node); if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode))) { vput(*vpp); return (ENOTDIR); } node->nn_flags |= IN_ACCESS | IN_CHANGE | IN_UPDATE; return (0); } static int nandfs_symlink(struct vop_symlink_args *ap) { struct vnode **vpp = ap->a_vpp; struct vnode *dvp = ap->a_dvp; uint16_t mode = MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode); struct componentname *cnp = ap->a_cnp; struct nandfs_node *dir_node = VTON(dvp); struct nandfsmount *nmp = dir_node->nn_nmp; struct nandfs_node *node; int len, error; if (nandfs_fs_full(dir_node->nn_nandfsdev)) return (ENOSPC); error = nandfs_node_create(nmp, &node, S_IFLNK | mode); if (error) return (error); node->nn_inode.i_gid = dir_node->nn_inode.i_gid; node->nn_inode.i_uid = cnp->cn_cred->cr_uid; *vpp = NTOV(node); if (nandfs_add_dirent(dvp, node->nn_ino, cnp->cn_nameptr, cnp->cn_namelen, IFTODT(mode))) { vput(*vpp); return (ENOTDIR); } len = strlen(ap->a_target); error = vn_rdwr(UIO_WRITE, *vpp, ap->a_target, len, (off_t)0, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cnp->cn_cred, NOCRED, NULL, NULL); if (error) vput(*vpp); return (error); } static int nandfs_readlink(struct vop_readlink_args *ap) { struct vnode *vp = ap->a_vp; return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); } static int nandfs_rmdir(struct vop_rmdir_args *ap) { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct nandfs_node *node, *dnode; uint32_t dflag, flag; int error = 0; node = VTON(vp); dnode = VTON(dvp); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->nn_inode.i_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->nn_inode.i_flags & APPEND)) return (EPERM); DPRINTF(VNCALL, ("%s: dvp %p vp %p nandnode %p ino %#jx\n", __func__, dvp, vp, node, (uintmax_t)node->nn_ino)); if (node->nn_inode.i_links_count < 2) return (EINVAL); if (!nandfs_dirempty(vp, dnode->nn_ino, cnp->cn_cred)) return (ENOTEMPTY); /* Files marked as immutable or append-only cannot be deleted. */ dflag = dnode->nn_inode.i_flags; flag = node->nn_inode.i_flags; if ((dflag & APPEND) || (flag & (NOUNLINK | IMMUTABLE | APPEND))) { return (EPERM); } if (vp->v_mountedhere != 0) return (EINVAL); nandfs_remove_dirent(dvp, node, cnp); dnode->nn_inode.i_links_count -= 1; dnode->nn_flags |= IN_CHANGE; cache_purge(dvp); error = nandfs_truncate(vp, (uint64_t)0); if (error) return (error); node->nn_inode.i_links_count -= 2; node->nn_flags |= IN_CHANGE; cache_purge(vp); return (error); } static int nandfs_fsync(struct vop_fsync_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int locked; DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, node, (uintmax_t)node->nn_ino)); /* * Start syncing vnode only if inode was modified or * there are some dirty buffers */ if (VTON(vp)->nn_flags & IN_MODIFIED || vp->v_bufobj.bo_dirty.bv_cnt) { locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); nandfs_wakeup_wait_sync(node->nn_nandfsdev, SYNCER_FSYNC); VOP_LOCK(vp, locked | LK_RETRY); } return (0); } static int nandfs_bmap(struct vop_bmap_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *nnode = VTON(vp); struct nandfs_device *nandfsdev = nnode->nn_nandfsdev; nandfs_daddr_t l2vmap, v2pmap; int error; int blk2dev = nandfsdev->nd_blocksize / DEV_BSIZE; DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx\n", __func__, vp, nnode, (uintmax_t)nnode->nn_ino)); if (ap->a_bop != NULL) *ap->a_bop = &nandfsdev->nd_devvp->v_bufobj; if (ap->a_bnp == NULL) return (0); if (ap->a_runp != NULL) *ap->a_runp = 0; if (ap->a_runb != NULL) *ap->a_runb = 0; /* * Translate all the block sectors into a series of buffers to read * asynchronously from the nandfs device. Note that this lookup may * induce readin's too. */ /* Get virtual block numbers for the vnode's buffer span */ error = nandfs_bmap_lookup(nnode, ap->a_bn, &l2vmap); if (error) return (-1); /* Translate virtual block numbers to physical block numbers */ error = nandfs_vtop(nnode, l2vmap, &v2pmap); if (error) return (-1); /* Note virtual block 0 marks not mapped */ if (l2vmap == 0) *ap->a_bnp = -1; else *ap->a_bnp = v2pmap * blk2dev; /* in DEV_BSIZE */ DPRINTF(VNCALL, ("%s: vp %p nandnode %p ino %#jx lblk %jx -> blk %jx\n", __func__, vp, nnode, (uintmax_t)nnode->nn_ino, (uintmax_t)ap->a_bn, (uintmax_t)*ap->a_bnp )); return (0); } static void nandfs_force_syncer(struct nandfsmount *nmp) { nmp->nm_flags |= NANDFS_FORCE_SYNCER; nandfs_wakeup_wait_sync(nmp->nm_nandfsdev, SYNCER_FFORCE); } static int nandfs_ioctl(struct vop_ioctl_args *ap) { struct vnode *vp = ap->a_vp; u_long command = ap->a_command; caddr_t data = ap->a_data; struct nandfs_node *node = VTON(vp); struct nandfs_device *nandfsdev = node->nn_nandfsdev; struct nandfsmount *nmp = node->nn_nmp; uint64_t *tab, *cno; struct nandfs_seg_stat *nss; struct nandfs_cpmode *ncpm; struct nandfs_argv *nargv; struct nandfs_cpstat *ncp; int error; DPRINTF(VNCALL, ("%s: %x\n", __func__, (uint32_t)command)); error = priv_check(ap->a_td, PRIV_VFS_MOUNT); if (error) return (error); if (nmp->nm_ronly) { switch (command) { case NANDFS_IOCTL_GET_FSINFO: case NANDFS_IOCTL_GET_SUSTAT: case NANDFS_IOCTL_GET_CPINFO: case NANDFS_IOCTL_GET_CPSTAT: case NANDFS_IOCTL_GET_SUINFO: case NANDFS_IOCTL_GET_VINFO: case NANDFS_IOCTL_GET_BDESCS: break; default: return (EROFS); } } switch (command) { case NANDFS_IOCTL_GET_FSINFO: error = nandfs_get_fsinfo(nmp, (struct nandfs_fsinfo *)data); break; case NANDFS_IOCTL_GET_SUSTAT: nss = (struct nandfs_seg_stat *)data; error = nandfs_get_seg_stat(nandfsdev, nss); break; case NANDFS_IOCTL_CHANGE_CPMODE: ncpm = (struct nandfs_cpmode *)data; error = nandfs_chng_cpmode(nandfsdev->nd_cp_node, ncpm); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_GET_CPINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_cpinfo_ioctl(nandfsdev->nd_cp_node, nargv); break; case NANDFS_IOCTL_DELETE_CP: tab = (uint64_t *)data; error = nandfs_delete_cp(nandfsdev->nd_cp_node, tab[0], tab[1]); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_GET_CPSTAT: ncp = (struct nandfs_cpstat *)data; error = nandfs_get_cpstat(nandfsdev->nd_cp_node, ncp); break; case NANDFS_IOCTL_GET_SUINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_segment_info_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_GET_VINFO: nargv = (struct nandfs_argv *)data; error = nandfs_get_dat_vinfo_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_GET_BDESCS: nargv = (struct nandfs_argv *)data; error = nandfs_get_dat_bdescs_ioctl(nandfsdev, nargv); break; case NANDFS_IOCTL_SYNC: cno = (uint64_t *)data; nandfs_force_syncer(nmp); *cno = nandfsdev->nd_last_cno; error = 0; break; case NANDFS_IOCTL_MAKE_SNAP: cno = (uint64_t *)data; error = nandfs_make_snap(nandfsdev, cno); nandfs_force_syncer(nmp); break; case NANDFS_IOCTL_DELETE_SNAP: cno = (uint64_t *)data; error = nandfs_delete_snap(nandfsdev, *cno); nandfs_force_syncer(nmp); break; default: error = ENOTTY; break; } return (error); } /* * Whiteout vnode call */ static int nandfs_whiteout(struct vop_whiteout_args *ap) { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; int error = 0; switch (ap->a_flags) { case LOOKUP: return (0); case CREATE: /* Create a new directory whiteout */ #ifdef INVARIANTS if ((cnp->cn_flags & SAVENAME) == 0) panic("nandfs_whiteout: missing name"); #endif error = nandfs_add_dirent(dvp, NANDFS_WHT_INO, cnp->cn_nameptr, cnp->cn_namelen, DT_WHT); break; case DELETE: /* Remove an existing directory whiteout */ cnp->cn_flags &= ~DOWHITEOUT; error = nandfs_remove_dirent(dvp, NULL, cnp); break; default: panic("nandf_whiteout: unknown op: %d", ap->a_flags); } return (error); } static int nandfs_pathconf(struct vop_pathconf_args *ap) { int error; error = 0; switch (ap->a_name) { case _PC_NO_TRUNC: *ap->a_retval = 1; break; case _PC_ACL_EXTENDED: *ap->a_retval = 0; break; case _PC_ALLOC_SIZE_MIN: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_bsize; break; case _PC_FILESIZEBITS: *ap->a_retval = 64; break; case _PC_REC_INCR_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; case _PC_REC_MAX_XFER_SIZE: *ap->a_retval = -1; /* means ``unlimited'' */ break; case _PC_REC_MIN_XFER_SIZE: *ap->a_retval = ap->a_vp->v_mount->mnt_stat.f_iosize; break; default: error = vop_stdpathconf(ap); break; } return (error); } static int nandfs_vnlock1(struct vop_lock1_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error, vi_locked; /* * XXX can vnode go away while we are sleeping? */ vi_locked = mtx_owned(&vp->v_interlock); if (vi_locked) VI_UNLOCK(vp); error = NANDFS_WRITELOCKFLAGS(node->nn_nandfsdev, ap->a_flags & LK_NOWAIT); if (vi_locked && !error) VI_LOCK(vp); if (error) return (error); error = vop_stdlock(ap); if (error) { NANDFS_WRITEUNLOCK(node->nn_nandfsdev); return (error); } return (0); } static int nandfs_vnunlock(struct vop_unlock_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); int error; error = vop_stdunlock(ap); if (error) return (error); NANDFS_WRITEUNLOCK(node->nn_nandfsdev); return (0); } /* * Global vfs data structures */ struct vop_vector nandfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = nandfs_access, .vop_advlock = nandfs_advlock, .vop_bmap = nandfs_bmap, .vop_close = nandfs_close, .vop_create = nandfs_create, .vop_fsync = nandfs_fsync, .vop_getattr = nandfs_getattr, .vop_inactive = nandfs_inactive, .vop_cachedlookup = nandfs_lookup, .vop_ioctl = nandfs_ioctl, .vop_link = nandfs_link, .vop_lookup = vfs_cache_lookup, .vop_mkdir = nandfs_mkdir, .vop_mknod = nandfs_mknod, .vop_open = nandfs_open, .vop_pathconf = nandfs_pathconf, .vop_print = nandfs_print, .vop_read = nandfs_read, .vop_readdir = nandfs_readdir, .vop_readlink = nandfs_readlink, .vop_reclaim = nandfs_reclaim, .vop_remove = nandfs_remove, .vop_rename = nandfs_rename, .vop_rmdir = nandfs_rmdir, .vop_whiteout = nandfs_whiteout, .vop_write = nandfs_write, .vop_setattr = nandfs_setattr, .vop_strategy = nandfs_strategy, .vop_symlink = nandfs_symlink, .vop_lock1 = nandfs_vnlock1, .vop_unlock = nandfs_vnunlock, }; struct vop_vector nandfs_system_vnodeops = { .vop_default = &default_vnodeops, .vop_close = nandfs_close, .vop_inactive = nandfs_inactive, .vop_reclaim = nandfs_reclaim, .vop_strategy = nandfs_strategy, .vop_fsync = nandfs_fsync, .vop_bmap = nandfs_bmap, .vop_access = VOP_PANIC, .vop_advlock = VOP_PANIC, .vop_create = VOP_PANIC, .vop_getattr = VOP_PANIC, .vop_cachedlookup = VOP_PANIC, .vop_ioctl = VOP_PANIC, .vop_link = VOP_PANIC, .vop_lookup = VOP_PANIC, .vop_mkdir = VOP_PANIC, .vop_mknod = VOP_PANIC, .vop_open = VOP_PANIC, .vop_pathconf = VOP_PANIC, .vop_print = VOP_PANIC, .vop_read = VOP_PANIC, .vop_readdir = VOP_PANIC, .vop_readlink = VOP_PANIC, .vop_remove = VOP_PANIC, .vop_rename = VOP_PANIC, .vop_rmdir = VOP_PANIC, .vop_whiteout = VOP_PANIC, .vop_write = VOP_PANIC, .vop_setattr = VOP_PANIC, .vop_symlink = VOP_PANIC, }; static int nandfsfifo_close(struct vop_close_args *ap) { struct vnode *vp = ap->a_vp; struct nandfs_node *node = VTON(vp); DPRINTF(VNCALL, ("%s: vp %p node %p\n", __func__, vp, node)); mtx_lock(&vp->v_interlock); if (vp->v_usecount > 1) nandfs_itimes_locked(vp); mtx_unlock(&vp->v_interlock); return (fifo_specops.vop_close(ap)); } struct vop_vector nandfs_fifoops = { .vop_default = &fifo_specops, .vop_fsync = VOP_PANIC, .vop_access = nandfs_access, .vop_close = nandfsfifo_close, .vop_getattr = nandfs_getattr, .vop_inactive = nandfs_inactive, .vop_print = nandfs_print, .vop_read = VOP_PANIC, .vop_reclaim = nandfs_reclaim, .vop_setattr = nandfs_setattr, .vop_write = VOP_PANIC, .vop_lock1 = nandfs_vnlock1, .vop_unlock = nandfs_vnunlock, }; int nandfs_vinit(struct vnode *vp, uint64_t ino) { struct nandfs_node *node; ASSERT_VOP_LOCKED(vp, __func__); node = VTON(vp); /* Check if we're fetching the root */ if (ino == NANDFS_ROOT_INO) vp->v_vflag |= VV_ROOT; if (ino != NANDFS_GC_INO) vp->v_type = IFTOVT(node->nn_inode.i_mode); else vp->v_type = VREG; if (vp->v_type == VFIFO) vp->v_op = &nandfs_fifoops; return (0); } Index: head/sys/fs/nfs/nfs_commonacl.c =================================================================== --- head/sys/fs/nfs/nfs_commonacl.c (revision 326267) +++ head/sys/fs/nfs/nfs_commonacl.c (revision 326268) @@ -1,513 +1,515 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include extern int nfsrv_useacl; #endif static int nfsrv_acemasktoperm(u_int32_t acetype, u_int32_t mask, int owner, enum vtype type, acl_perm_t *permp); /* * Handle xdr for an ace. */ APPLESTATIC int nfsrv_dissectace(struct nfsrv_descript *nd, struct acl_entry *acep, int *aceerrp, int *acesizep, NFSPROC_T *p) { u_int32_t *tl; int len, gotid = 0, owner = 0, error = 0, aceerr = 0; u_char *name, namestr[NFSV4_SMALLSTR + 1]; u_int32_t flag, mask, acetype; gid_t gid; uid_t uid; *aceerrp = 0; acep->ae_flags = 0; NFSM_DISSECT(tl, u_int32_t *, 4 * NFSX_UNSIGNED); acetype = fxdr_unsigned(u_int32_t, *tl++); flag = fxdr_unsigned(u_int32_t, *tl++); mask = fxdr_unsigned(u_int32_t, *tl++); len = fxdr_unsigned(int, *tl); if (len < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (len == 0) { /* Netapp filers return a 0 length who for nil users */ acep->ae_tag = ACL_UNDEFINED_TAG; acep->ae_id = ACL_UNDEFINED_ID; acep->ae_perm = (acl_perm_t)0; acep->ae_entry_type = ACL_ENTRY_TYPE_DENY; if (acesizep) *acesizep = 4 * NFSX_UNSIGNED; error = 0; goto nfsmout; } if (len > NFSV4_SMALLSTR) name = malloc(len + 1, M_NFSSTRING, M_WAITOK); else name = namestr; error = nfsrv_mtostr(nd, name, len); if (error) { if (len > NFSV4_SMALLSTR) free(name, M_NFSSTRING); goto nfsmout; } if (len == 6) { if (!NFSBCMP(name, "OWNER@", 6)) { acep->ae_tag = ACL_USER_OBJ; acep->ae_id = ACL_UNDEFINED_ID; owner = 1; gotid = 1; } else if (!NFSBCMP(name, "GROUP@", 6)) { acep->ae_tag = ACL_GROUP_OBJ; acep->ae_id = ACL_UNDEFINED_ID; gotid = 1; } } else if (len == 9 && !NFSBCMP(name, "EVERYONE@", 9)) { acep->ae_tag = ACL_EVERYONE; acep->ae_id = ACL_UNDEFINED_ID; gotid = 1; } if (gotid == 0) { if (flag & NFSV4ACE_IDENTIFIERGROUP) { acep->ae_tag = ACL_GROUP; aceerr = nfsv4_strtogid(nd, name, len, &gid, p); if (aceerr == 0) acep->ae_id = (uid_t)gid; } else { acep->ae_tag = ACL_USER; aceerr = nfsv4_strtouid(nd, name, len, &uid, p); if (aceerr == 0) acep->ae_id = uid; } } if (len > NFSV4_SMALLSTR) free(name, M_NFSSTRING); if (aceerr == 0) { /* * Handle the flags. */ flag &= ~NFSV4ACE_IDENTIFIERGROUP; if (flag & NFSV4ACE_FILEINHERIT) { flag &= ~NFSV4ACE_FILEINHERIT; acep->ae_flags |= ACL_ENTRY_FILE_INHERIT; } if (flag & NFSV4ACE_DIRECTORYINHERIT) { flag &= ~NFSV4ACE_DIRECTORYINHERIT; acep->ae_flags |= ACL_ENTRY_DIRECTORY_INHERIT; } if (flag & NFSV4ACE_NOPROPAGATEINHERIT) { flag &= ~NFSV4ACE_NOPROPAGATEINHERIT; acep->ae_flags |= ACL_ENTRY_NO_PROPAGATE_INHERIT; } if (flag & NFSV4ACE_INHERITONLY) { flag &= ~NFSV4ACE_INHERITONLY; acep->ae_flags |= ACL_ENTRY_INHERIT_ONLY; } if (flag & NFSV4ACE_SUCCESSFULACCESS) { flag &= ~NFSV4ACE_SUCCESSFULACCESS; acep->ae_flags |= ACL_ENTRY_SUCCESSFUL_ACCESS; } if (flag & NFSV4ACE_FAILEDACCESS) { flag &= ~NFSV4ACE_FAILEDACCESS; acep->ae_flags |= ACL_ENTRY_FAILED_ACCESS; } /* * Set ae_entry_type. */ if (acetype == NFSV4ACE_ALLOWEDTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_ALLOW; else if (acetype == NFSV4ACE_DENIEDTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_DENY; else if (acetype == NFSV4ACE_AUDITTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_AUDIT; else if (acetype == NFSV4ACE_ALARMTYPE) acep->ae_entry_type = ACL_ENTRY_TYPE_ALARM; else aceerr = NFSERR_ATTRNOTSUPP; } /* * Now, check for unsupported flag bits. */ if (aceerr == 0 && flag != 0) aceerr = NFSERR_ATTRNOTSUPP; /* * And turn the mask into perm bits. */ if (aceerr == 0) aceerr = nfsrv_acemasktoperm(acetype, mask, owner, VREG, &acep->ae_perm); *aceerrp = aceerr; if (acesizep) *acesizep = NFSM_RNDUP(len) + (4 * NFSX_UNSIGNED); error = 0; nfsmout: NFSEXITCODE(error); return (error); } /* * Turn an NFSv4 ace mask into R/W/X flag bits. */ static int nfsrv_acemasktoperm(u_int32_t acetype, u_int32_t mask, int owner, enum vtype type, acl_perm_t *permp) { acl_perm_t perm = 0x0; int error = 0; if (mask & NFSV4ACE_READDATA) { mask &= ~NFSV4ACE_READDATA; perm |= ACL_READ_DATA; } if (mask & NFSV4ACE_LISTDIRECTORY) { mask &= ~NFSV4ACE_LISTDIRECTORY; perm |= ACL_LIST_DIRECTORY; } if (mask & NFSV4ACE_WRITEDATA) { mask &= ~NFSV4ACE_WRITEDATA; perm |= ACL_WRITE_DATA; } if (mask & NFSV4ACE_ADDFILE) { mask &= ~NFSV4ACE_ADDFILE; perm |= ACL_ADD_FILE; } if (mask & NFSV4ACE_APPENDDATA) { mask &= ~NFSV4ACE_APPENDDATA; perm |= ACL_APPEND_DATA; } if (mask & NFSV4ACE_ADDSUBDIRECTORY) { mask &= ~NFSV4ACE_ADDSUBDIRECTORY; perm |= ACL_ADD_SUBDIRECTORY; } if (mask & NFSV4ACE_READNAMEDATTR) { mask &= ~NFSV4ACE_READNAMEDATTR; perm |= ACL_READ_NAMED_ATTRS; } if (mask & NFSV4ACE_WRITENAMEDATTR) { mask &= ~NFSV4ACE_WRITENAMEDATTR; perm |= ACL_WRITE_NAMED_ATTRS; } if (mask & NFSV4ACE_EXECUTE) { mask &= ~NFSV4ACE_EXECUTE; perm |= ACL_EXECUTE; } if (mask & NFSV4ACE_SEARCH) { mask &= ~NFSV4ACE_SEARCH; perm |= ACL_EXECUTE; } if (mask & NFSV4ACE_DELETECHILD) { mask &= ~NFSV4ACE_DELETECHILD; perm |= ACL_DELETE_CHILD; } if (mask & NFSV4ACE_READATTRIBUTES) { mask &= ~NFSV4ACE_READATTRIBUTES; perm |= ACL_READ_ATTRIBUTES; } if (mask & NFSV4ACE_WRITEATTRIBUTES) { mask &= ~NFSV4ACE_WRITEATTRIBUTES; perm |= ACL_WRITE_ATTRIBUTES; } if (mask & NFSV4ACE_DELETE) { mask &= ~NFSV4ACE_DELETE; perm |= ACL_DELETE; } if (mask & NFSV4ACE_READACL) { mask &= ~NFSV4ACE_READACL; perm |= ACL_READ_ACL; } if (mask & NFSV4ACE_WRITEACL) { mask &= ~NFSV4ACE_WRITEACL; perm |= ACL_WRITE_ACL; } if (mask & NFSV4ACE_WRITEOWNER) { mask &= ~NFSV4ACE_WRITEOWNER; perm |= ACL_WRITE_OWNER; } if (mask & NFSV4ACE_SYNCHRONIZE) { mask &= ~NFSV4ACE_SYNCHRONIZE; perm |= ACL_SYNCHRONIZE; } if (mask != 0) { error = NFSERR_ATTRNOTSUPP; goto out; } *permp = perm; out: NFSEXITCODE(error); return (error); } /* local functions */ static int nfsrv_buildace(struct nfsrv_descript *, u_char *, int, enum vtype, int, int, struct acl_entry *); /* * This function builds an NFS ace. */ static int nfsrv_buildace(struct nfsrv_descript *nd, u_char *name, int namelen, enum vtype type, int group, int owner, struct acl_entry *ace) { u_int32_t *tl, aceflag = 0x0, acemask = 0x0, acetype; int full_len; full_len = NFSM_RNDUP(namelen); NFSM_BUILD(tl, u_int32_t *, 4 * NFSX_UNSIGNED + full_len); /* * Fill in the ace type. */ if (ace->ae_entry_type & ACL_ENTRY_TYPE_ALLOW) acetype = NFSV4ACE_ALLOWEDTYPE; else if (ace->ae_entry_type & ACL_ENTRY_TYPE_DENY) acetype = NFSV4ACE_DENIEDTYPE; else if (ace->ae_entry_type & ACL_ENTRY_TYPE_AUDIT) acetype = NFSV4ACE_AUDITTYPE; else acetype = NFSV4ACE_ALARMTYPE; *tl++ = txdr_unsigned(acetype); /* * Set the flag bits from the ACL. */ if (ace->ae_flags & ACL_ENTRY_FILE_INHERIT) aceflag |= NFSV4ACE_FILEINHERIT; if (ace->ae_flags & ACL_ENTRY_DIRECTORY_INHERIT) aceflag |= NFSV4ACE_DIRECTORYINHERIT; if (ace->ae_flags & ACL_ENTRY_NO_PROPAGATE_INHERIT) aceflag |= NFSV4ACE_NOPROPAGATEINHERIT; if (ace->ae_flags & ACL_ENTRY_INHERIT_ONLY) aceflag |= NFSV4ACE_INHERITONLY; if (ace->ae_flags & ACL_ENTRY_SUCCESSFUL_ACCESS) aceflag |= NFSV4ACE_SUCCESSFULACCESS; if (ace->ae_flags & ACL_ENTRY_FAILED_ACCESS) aceflag |= NFSV4ACE_FAILEDACCESS; if (group) aceflag |= NFSV4ACE_IDENTIFIERGROUP; *tl++ = txdr_unsigned(aceflag); if (type == VDIR) { if (ace->ae_perm & ACL_LIST_DIRECTORY) acemask |= NFSV4ACE_LISTDIRECTORY; if (ace->ae_perm & ACL_ADD_FILE) acemask |= NFSV4ACE_ADDFILE; if (ace->ae_perm & ACL_ADD_SUBDIRECTORY) acemask |= NFSV4ACE_ADDSUBDIRECTORY; if (ace->ae_perm & ACL_READ_NAMED_ATTRS) acemask |= NFSV4ACE_READNAMEDATTR; if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS) acemask |= NFSV4ACE_WRITENAMEDATTR; if (ace->ae_perm & ACL_EXECUTE) acemask |= NFSV4ACE_SEARCH; if (ace->ae_perm & ACL_DELETE_CHILD) acemask |= NFSV4ACE_DELETECHILD; if (ace->ae_perm & ACL_READ_ATTRIBUTES) acemask |= NFSV4ACE_READATTRIBUTES; if (ace->ae_perm & ACL_WRITE_ATTRIBUTES) acemask |= NFSV4ACE_WRITEATTRIBUTES; if (ace->ae_perm & ACL_DELETE) acemask |= NFSV4ACE_DELETE; if (ace->ae_perm & ACL_READ_ACL) acemask |= NFSV4ACE_READACL; if (ace->ae_perm & ACL_WRITE_ACL) acemask |= NFSV4ACE_WRITEACL; if (ace->ae_perm & ACL_WRITE_OWNER) acemask |= NFSV4ACE_WRITEOWNER; if (ace->ae_perm & ACL_SYNCHRONIZE) acemask |= NFSV4ACE_SYNCHRONIZE; } else { if (ace->ae_perm & ACL_READ_DATA) acemask |= NFSV4ACE_READDATA; if (ace->ae_perm & ACL_WRITE_DATA) acemask |= NFSV4ACE_WRITEDATA; if (ace->ae_perm & ACL_APPEND_DATA) acemask |= NFSV4ACE_APPENDDATA; if (ace->ae_perm & ACL_READ_NAMED_ATTRS) acemask |= NFSV4ACE_READNAMEDATTR; if (ace->ae_perm & ACL_WRITE_NAMED_ATTRS) acemask |= NFSV4ACE_WRITENAMEDATTR; if (ace->ae_perm & ACL_EXECUTE) acemask |= NFSV4ACE_EXECUTE; if (ace->ae_perm & ACL_READ_ATTRIBUTES) acemask |= NFSV4ACE_READATTRIBUTES; if (ace->ae_perm & ACL_WRITE_ATTRIBUTES) acemask |= NFSV4ACE_WRITEATTRIBUTES; if (ace->ae_perm & ACL_DELETE) acemask |= NFSV4ACE_DELETE; if (ace->ae_perm & ACL_READ_ACL) acemask |= NFSV4ACE_READACL; if (ace->ae_perm & ACL_WRITE_ACL) acemask |= NFSV4ACE_WRITEACL; if (ace->ae_perm & ACL_WRITE_OWNER) acemask |= NFSV4ACE_WRITEOWNER; if (ace->ae_perm & ACL_SYNCHRONIZE) acemask |= NFSV4ACE_SYNCHRONIZE; } *tl++ = txdr_unsigned(acemask); *tl++ = txdr_unsigned(namelen); if (full_len - namelen) *(tl + (namelen / NFSX_UNSIGNED)) = 0x0; NFSBCOPY(name, (caddr_t)tl, namelen); return (full_len + 4 * NFSX_UNSIGNED); } /* * Build an NFSv4 ACL. */ APPLESTATIC int nfsrv_buildacl(struct nfsrv_descript *nd, NFSACL_T *aclp, enum vtype type, NFSPROC_T *p) { int i, entrycnt = 0, retlen; u_int32_t *entrycntp; int isowner, isgroup, namelen, malloced; u_char *name, namestr[NFSV4_SMALLSTR]; NFSM_BUILD(entrycntp, u_int32_t *, NFSX_UNSIGNED); retlen = NFSX_UNSIGNED; /* * Loop through the acl entries, building each one. */ for (i = 0; i < aclp->acl_cnt; i++) { isowner = isgroup = malloced = 0; switch (aclp->acl_entry[i].ae_tag) { case ACL_USER_OBJ: isowner = 1; name = "OWNER@"; namelen = 6; break; case ACL_GROUP_OBJ: isgroup = 1; name = "GROUP@"; namelen = 6; break; case ACL_EVERYONE: name = "EVERYONE@"; namelen = 9; break; case ACL_USER: name = namestr; nfsv4_uidtostr(aclp->acl_entry[i].ae_id, &name, &namelen, p); if (name != namestr) malloced = 1; break; case ACL_GROUP: isgroup = 1; name = namestr; nfsv4_gidtostr((gid_t)aclp->acl_entry[i].ae_id, &name, &namelen, p); if (name != namestr) malloced = 1; break; default: continue; } retlen += nfsrv_buildace(nd, name, namelen, type, isgroup, isowner, &aclp->acl_entry[i]); entrycnt++; if (malloced) free(name, M_NFSSTRING); } *entrycntp = txdr_unsigned(entrycnt); return (retlen); } /* * Set an NFSv4 acl. */ APPLESTATIC int nfsrv_setacl(vnode_t vp, NFSACL_T *aclp, struct ucred *cred, NFSPROC_T *p) { int error; if (nfsrv_useacl == 0 || nfs_supportsnfsv4acls(vp) == 0) { error = NFSERR_ATTRNOTSUPP; goto out; } /* * With NFSv4 ACLs, chmod(2) may need to add additional entries. * Make sure it has enough room for that - splitting every entry * into two and appending "canonical six" entries at the end. * Cribbed out of kern/vfs_acl.c - Rick M. */ if (aclp->acl_cnt > (ACL_MAX_ENTRIES - 6) / 2) { error = NFSERR_ATTRNOTSUPP; goto out; } error = VOP_SETACL(vp, ACL_TYPE_NFS4, aclp, cred, p); out: NFSEXITCODE(error); return (error); } /* * Compare two NFSv4 acls. * Return 0 if they are the same, 1 if not the same. */ APPLESTATIC int nfsrv_compareacl(NFSACL_T *aclp1, NFSACL_T *aclp2) { int i; struct acl_entry *acep1, *acep2; if (aclp1->acl_cnt != aclp2->acl_cnt) return (1); acep1 = aclp1->acl_entry; acep2 = aclp2->acl_entry; for (i = 0; i < aclp1->acl_cnt; i++) { if (acep1->ae_tag != acep2->ae_tag) return (1); switch (acep1->ae_tag) { case ACL_GROUP: case ACL_USER: if (acep1->ae_id != acep2->ae_id) return (1); /* fall through */ case ACL_USER_OBJ: case ACL_GROUP_OBJ: case ACL_OTHER: if (acep1->ae_perm != acep2->ae_perm) return (1); } acep1++; acep2++; } return (0); } Index: head/sys/fs/nfs/nfscl.h =================================================================== --- head/sys/fs/nfs/nfscl.h (revision 326267) +++ head/sys/fs/nfs/nfscl.h (revision 326268) @@ -1,82 +1,84 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCL_H #define _NFS_NFSCL_H /* * Extra stuff for a NFSv4 nfsnode. * MALLOC'd to the correct length for the name and file handle. * n4_data has the file handle, followed by the file name. * The macro NFS4NODENAME() returns a pointer to the start of the * name. */ struct nfsv4node { u_int16_t n4_fhlen; u_int16_t n4_namelen; u_int8_t n4_data[1]; }; #define NFS4NODENAME(n) (&((n)->n4_data[(n)->n4_fhlen])) /* * Just a macro to convert the nfscl_reqstart arguments. */ #define NFSCL_REQSTART(n, p, v) \ nfscl_reqstart((n), (p), VFSTONFS((v)->v_mount), \ VTONFS(v)->n_fhp->nfh_fh, VTONFS(v)->n_fhp->nfh_len, NULL, \ NULL, 0, 0) /* * These two macros convert between a lease duration and renew interval. * For now, just make the renew interval 1/2 the lease duration. * (They should be inverse operators.) */ #define NFSCL_RENEW(l) (((l) < 2) ? 1 : ((l) / 2)) #define NFSCL_LEASE(r) ((r) * 2) /* This macro checks to see if a forced dismount is about to occur. */ #define NFSCL_FORCEDISM(m) (((m)->mnt_kern_flag & MNTK_UNMOUNTF) != 0 || \ (VFSTONFS(m)->nm_privflag & NFSMNTP_FORCEDISM) != 0) /* * These flag bits are used for the argument to nfscl_fillsattr() to * indicate special handling of the attributes. */ #define NFSSATTR_FULL 0x1 #define NFSSATTR_SIZE0 0x2 #define NFSSATTR_SIZENEG1 0x4 #define NFSSATTR_SIZERDEV 0x8 /* Use this macro for debug printfs. */ #define NFSCL_DEBUG(level, ...) do { \ if (nfscl_debuglevel >= (level)) \ printf(__VA_ARGS__); \ } while (0) #endif /* _NFS_NFSCL_H */ Index: head/sys/fs/nfs/nfsclstate.h =================================================================== --- head/sys/fs/nfs/nfsclstate.h (revision 326267) +++ head/sys/fs/nfs/nfsclstate.h (revision 326268) @@ -1,438 +1,440 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSCLSTATE_H_ #define _NFS_NFSCLSTATE_H_ /* * Definitions for NFS V4 client state handling. */ LIST_HEAD(nfsclopenhead, nfsclopen); LIST_HEAD(nfscllockownerhead, nfscllockowner); SLIST_HEAD(nfscllockownerfhhead, nfscllockownerfh); LIST_HEAD(nfscllockhead, nfscllock); LIST_HEAD(nfsclhead, nfsclclient); LIST_HEAD(nfsclownerhead, nfsclowner); TAILQ_HEAD(nfscldeleghead, nfscldeleg); LIST_HEAD(nfscldeleghash, nfscldeleg); TAILQ_HEAD(nfscllayouthead, nfscllayout); LIST_HEAD(nfscllayouthash, nfscllayout); LIST_HEAD(nfsclflayouthead, nfsclflayout); LIST_HEAD(nfscldevinfohead, nfscldevinfo); LIST_HEAD(nfsclrecalllayouthead, nfsclrecalllayout); #define NFSCLDELEGHASHSIZE 256 #define NFSCLDELEGHASH(c, f, l) \ (&((c)->nfsc_deleghash[ncl_hash((f), (l)) % NFSCLDELEGHASHSIZE])) #define NFSCLLAYOUTHASHSIZE 256 #define NFSCLLAYOUTHASH(c, f, l) \ (&((c)->nfsc_layouthash[ncl_hash((f), (l)) % NFSCLLAYOUTHASHSIZE])) /* Structure for NFSv4.1 session stuff. */ struct nfsclsession { struct mtx nfsess_mtx; struct nfsslot nfsess_cbslots[NFSV4_CBSLOTS]; nfsquad_t nfsess_clientid; SVCXPRT *nfsess_xprt; /* For backchannel callback */ uint32_t nfsess_slotseq[64]; /* Max for 64bit nm_slots */ uint64_t nfsess_slots; uint32_t nfsess_sequenceid; uint32_t nfsess_maxcache; /* Max size for cached reply. */ uint16_t nfsess_foreslots; uint16_t nfsess_backslots; uint8_t nfsess_sessionid[NFSX_V4SESSIONID]; uint8_t nfsess_defunct; /* Non-zero for old sessions */ }; /* * This structure holds the session, clientid and related information * needed for an NFSv4.1 Meta Data Server (MDS) or Data Server (DS). * It is malloc'd to the correct length. */ struct nfsclds { TAILQ_ENTRY(nfsclds) nfsclds_list; struct nfsclsession nfsclds_sess; struct mtx nfsclds_mtx; struct nfssockreq *nfsclds_sockp; time_t nfsclds_expire; uint16_t nfsclds_flags; uint16_t nfsclds_servownlen; uint8_t nfsclds_verf[NFSX_VERF]; uint8_t nfsclds_serverown[0]; }; /* * Flags for nfsclds_flags. */ #define NFSCLDS_HASWRITEVERF 0x0001 #define NFSCLDS_MDS 0x0002 #define NFSCLDS_DS 0x0004 #define NFSCLDS_CLOSED 0x0008 struct nfsclclient { LIST_ENTRY(nfsclclient) nfsc_list; struct nfsclownerhead nfsc_owner; struct nfscldeleghead nfsc_deleg; struct nfscldeleghash nfsc_deleghash[NFSCLDELEGHASHSIZE]; struct nfscllayouthead nfsc_layout; struct nfscllayouthash nfsc_layouthash[NFSCLLAYOUTHASHSIZE]; struct nfscldevinfohead nfsc_devinfo; struct nfsv4lock nfsc_lock; struct proc *nfsc_renewthread; struct nfsmount *nfsc_nmp; time_t nfsc_expire; u_int32_t nfsc_clientidrev; u_int32_t nfsc_rev; u_int32_t nfsc_renew; u_int32_t nfsc_cbident; u_int16_t nfsc_flags; u_int16_t nfsc_idlen; u_int8_t nfsc_id[1]; /* Malloc'd to correct length */ }; /* * Bits for nfsc_flags. */ #define NFSCLFLAGS_INITED 0x0001 #define NFSCLFLAGS_HASCLIENTID 0x0002 #define NFSCLFLAGS_RECOVER 0x0004 #define NFSCLFLAGS_UMOUNT 0x0008 #define NFSCLFLAGS_HASTHREAD 0x0010 #define NFSCLFLAGS_AFINET6 0x0020 #define NFSCLFLAGS_EXPIREIT 0x0040 #define NFSCLFLAGS_FIRSTDELEG 0x0080 #define NFSCLFLAGS_GOTDELEG 0x0100 #define NFSCLFLAGS_RECVRINPROG 0x0200 struct nfsclowner { LIST_ENTRY(nfsclowner) nfsow_list; struct nfsclopenhead nfsow_open; struct nfsclclient *nfsow_clp; u_int32_t nfsow_seqid; u_int32_t nfsow_defunct; struct nfsv4lock nfsow_rwlock; u_int8_t nfsow_owner[NFSV4CL_LOCKNAMELEN]; }; /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfscldeleg { TAILQ_ENTRY(nfscldeleg) nfsdl_list; LIST_ENTRY(nfscldeleg) nfsdl_hash; struct nfsclownerhead nfsdl_owner; /* locally issued state */ struct nfscllockownerhead nfsdl_lock; nfsv4stateid_t nfsdl_stateid; struct acl_entry nfsdl_ace; /* Delegation ace */ struct nfsclclient *nfsdl_clp; struct nfsv4lock nfsdl_rwlock; /* for active I/O ops */ struct nfscred nfsdl_cred; /* Cred. used for Open */ time_t nfsdl_timestamp; /* used for stale cleanup */ u_int64_t nfsdl_sizelimit; /* Limit for file growth */ u_int64_t nfsdl_size; /* saved copy of file size */ u_int64_t nfsdl_change; /* and change attribute */ struct timespec nfsdl_modtime; /* local modify time */ u_int16_t nfsdl_fhlen; u_int8_t nfsdl_flags; u_int8_t nfsdl_fh[1]; /* must be last */ }; /* * nfsdl_flags bits. */ #define NFSCLDL_READ 0x01 #define NFSCLDL_WRITE 0x02 #define NFSCLDL_RECALL 0x04 #define NFSCLDL_NEEDRECLAIM 0x08 #define NFSCLDL_ZAPPED 0x10 #define NFSCLDL_MODTIMESET 0x20 #define NFSCLDL_DELEGRET 0x40 /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfsclopen { LIST_ENTRY(nfsclopen) nfso_list; struct nfscllockownerhead nfso_lock; nfsv4stateid_t nfso_stateid; struct nfsclowner *nfso_own; struct nfscred nfso_cred; /* Cred. used for Open */ u_int32_t nfso_mode; u_int32_t nfso_opencnt; u_int16_t nfso_fhlen; u_int8_t nfso_posixlock; /* 1 for POSIX type locking */ u_int8_t nfso_fh[1]; /* must be last */ }; /* * Return values for nfscl_open(). NFSCLOPEN_OK must == 0. */ #define NFSCLOPEN_OK 0 #define NFSCLOPEN_DOOPEN 1 #define NFSCLOPEN_DOOPENDOWNGRADE 2 #define NFSCLOPEN_SETCRED 3 struct nfscllockowner { LIST_ENTRY(nfscllockowner) nfsl_list; struct nfscllockhead nfsl_lock; struct nfsclopen *nfsl_open; NFSPROC_T *nfsl_inprog; nfsv4stateid_t nfsl_stateid; int nfsl_lockflags; u_int32_t nfsl_seqid; struct nfsv4lock nfsl_rwlock; u_int8_t nfsl_owner[NFSV4CL_LOCKNAMELEN]; u_int8_t nfsl_openowner[NFSV4CL_LOCKNAMELEN]; }; /* * Byte range entry for the above lock owner. */ struct nfscllock { LIST_ENTRY(nfscllock) nfslo_list; u_int64_t nfslo_first; u_int64_t nfslo_end; short nfslo_type; }; /* This structure is used to collect a list of lockowners to free up. */ struct nfscllockownerfh { SLIST_ENTRY(nfscllockownerfh) nfslfh_list; struct nfscllockownerhead nfslfh_lock; int nfslfh_len; uint8_t nfslfh_fh[NFSX_V4FHMAX]; }; /* * MALLOC'd to the correct length to accommodate the file handle. */ struct nfscllayout { TAILQ_ENTRY(nfscllayout) nfsly_list; LIST_ENTRY(nfscllayout) nfsly_hash; nfsv4stateid_t nfsly_stateid; struct nfsv4lock nfsly_lock; uint64_t nfsly_filesid[2]; uint64_t nfsly_lastbyte; struct nfsclflayouthead nfsly_flayread; struct nfsclflayouthead nfsly_flayrw; struct nfsclrecalllayouthead nfsly_recall; time_t nfsly_timestamp; struct nfsclclient *nfsly_clp; uint16_t nfsly_flags; uint16_t nfsly_fhlen; uint8_t nfsly_fh[1]; }; /* * Flags for nfsly_flags. */ #define NFSLY_FILES 0x0001 #define NFSLY_BLOCK 0x0002 #define NFSLY_OBJECT 0x0004 #define NFSLY_RECALL 0x0008 #define NFSLY_RECALLFILE 0x0010 #define NFSLY_RECALLFSID 0x0020 #define NFSLY_RECALLALL 0x0040 #define NFSLY_RETONCLOSE 0x0080 #define NFSLY_WRITTEN 0x0100 /* Has been used to write to a DS. */ #define NFSLY_FLEXFILE 0x0200 /* * Flex file layout mirror specific stuff for nfsclflayout. */ struct nfsffm { nfsv4stateid_t st; char dev[NFSX_V4DEVICEID]; uint32_t eff; uid_t user; gid_t group; struct nfsfh *fh[NFSDEV_MAXVERS]; uint16_t fhcnt; }; /* * MALLOC'd to the correct length to accommodate the file handle list for File * layout and the list of mirrors for the Flex File Layout. * These hang off of nfsly_flayread and nfsly_flayrw, sorted in increasing * offset order. * The nfsly_flayread list holds the ones with iomode == NFSLAYOUTIOMODE_READ, * whereas the nfsly_flayrw holds the ones with iomode == NFSLAYOUTIOMODE_RW. */ struct nfsclflayout { LIST_ENTRY(nfsclflayout) nfsfl_list; uint64_t nfsfl_off; uint64_t nfsfl_end; uint32_t nfsfl_iomode; struct nfscldevinfo *nfsfl_devp; uint16_t nfsfl_flags; union { struct { uint64_t patoff; uint32_t util; uint32_t stripe1; uint8_t dev[NFSX_V4DEVICEID]; uint16_t fhcnt; } fl; struct { uint64_t stripeunit; uint32_t fflags; uint32_t statshint; uint16_t mirrorcnt; } ff; } nfsfl_un; union { struct nfsfh *fh[0]; /* FH list for DS File layout */ struct nfsffm ffm[0]; /* Mirror list for Flex File */ } nfsfl_un2; /* Must be last. Malloc'd to correct array length */ }; #define nfsfl_patoff nfsfl_un.fl.patoff #define nfsfl_util nfsfl_un.fl.util #define nfsfl_stripe1 nfsfl_un.fl.stripe1 #define nfsfl_dev nfsfl_un.fl.dev #define nfsfl_fhcnt nfsfl_un.fl.fhcnt #define nfsfl_stripeunit nfsfl_un.ff.stripeunit #define nfsfl_fflags nfsfl_un.ff.fflags #define nfsfl_statshint nfsfl_un.ff.statshint #define nfsfl_mirrorcnt nfsfl_un.ff.mirrorcnt #define nfsfl_fh nfsfl_un2.fh #define nfsfl_ffm nfsfl_un2.ffm /* * Flags for nfsfl_flags. */ #define NFSFL_RECALL 0x0001 /* File layout has been recalled */ #define NFSFL_FILE 0x0002 /* File layout */ #define NFSFL_FLEXFILE 0x0004 /* Flex File layout */ /* * Structure that is used to store a LAYOUTRECALL. */ struct nfsclrecalllayout { LIST_ENTRY(nfsclrecalllayout) nfsrecly_list; uint64_t nfsrecly_off; uint64_t nfsrecly_len; int nfsrecly_recalltype; uint32_t nfsrecly_iomode; uint32_t nfsrecly_stateseqid; }; /* * Stores the NFSv4.1 Device Info. Malloc'd to the correct length to * store the list of network connections and list of indices. * nfsdi_data[] is allocated the following way: * - nfsdi_addrcnt * struct nfsclds * - stripe indices, each stored as one byte, since there can be many * of them. (This implies a limit of 256 on nfsdi_addrcnt, since the * indices select which address.) * For Flex File, the addrcnt is always one and no stripe indices exist. */ struct nfscldevinfo { LIST_ENTRY(nfscldevinfo) nfsdi_list; uint8_t nfsdi_deviceid[NFSX_V4DEVICEID]; struct nfsclclient *nfsdi_clp; uint32_t nfsdi_refcnt; uint32_t nfsdi_layoutrefs; union { struct { uint16_t stripecnt; } fl; struct { int versindex; uint32_t vers; uint32_t minorvers; uint32_t rsize; uint32_t wsize; } ff; } nfsdi_un; uint16_t nfsdi_addrcnt; uint16_t nfsdi_flags; struct nfsclds *nfsdi_data[0]; }; #define nfsdi_stripecnt nfsdi_un.fl.stripecnt #define nfsdi_versindex nfsdi_un.ff.versindex #define nfsdi_vers nfsdi_un.ff.vers #define nfsdi_minorvers nfsdi_un.ff.minorvers #define nfsdi_rsize nfsdi_un.ff.rsize #define nfsdi_wsize nfsdi_un.ff.wsize /* Flags for nfsdi_flags. */ #define NFSDI_FILELAYOUT 0x0001 #define NFSDI_FLEXFILE 0x0002 #define NFSDI_TIGHTCOUPLED 0X0004 /* These inline functions return values from nfsdi_data[]. */ /* * Return a pointer to the address at "pos". */ static __inline struct nfsclds ** nfsfldi_addr(struct nfscldevinfo *ndi, int pos) { if (pos >= ndi->nfsdi_addrcnt) return (NULL); return (&ndi->nfsdi_data[pos]); } /* * Return the Nth ("pos") stripe index. */ static __inline int nfsfldi_stripeindex(struct nfscldevinfo *ndi, int pos) { uint8_t *valp; if (pos >= ndi->nfsdi_stripecnt) return (-1); valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; valp += pos; return ((int)*valp); } /* * Set the Nth ("pos") stripe index to "val". */ static __inline void nfsfldi_setstripeindex(struct nfscldevinfo *ndi, int pos, uint8_t val) { uint8_t *valp; if (pos >= ndi->nfsdi_stripecnt) return; valp = (uint8_t *)&ndi->nfsdi_data[ndi->nfsdi_addrcnt]; valp += pos; *valp = val; } /* * Macro for incrementing the seqid#. */ #define NFSCL_INCRSEQID(s, n) do { \ if (((n)->nd_flag & ND_INCRSEQID)) \ (s)++; \ } while (0) #endif /* _NFS_NFSCLSTATE_H_ */ Index: head/sys/fs/nfs/nfsdport.h =================================================================== --- head/sys/fs/nfs/nfsdport.h (revision 326267) +++ head/sys/fs/nfs/nfsdport.h (revision 326268) @@ -1,123 +1,125 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * These macros handle nfsvattr fields. They look a bit silly here, but * are quite different for the Darwin port. */ #define NFSVNO_ATTRINIT(n) (VATTR_NULL(&((n)->na_vattr))) #define NFSVNO_SETATTRVAL(n, f, v) ((n)->na_##f = (v)) #define NFSVNO_SETACTIVE(n, f) #define NFSVNO_UNSET(n, f) ((n)->na_##f = VNOVAL) #define NFSVNO_NOTSETMODE(n) ((n)->na_mode == ((mode_t)VNOVAL)) #define NFSVNO_ISSETMODE(n) ((n)->na_mode != ((mode_t)VNOVAL)) #define NFSVNO_NOTSETUID(n) ((n)->na_uid == ((uid_t)VNOVAL)) #define NFSVNO_ISSETUID(n) ((n)->na_uid != ((uid_t)VNOVAL)) #define NFSVNO_NOTSETGID(n) ((n)->na_gid == ((gid_t)VNOVAL)) #define NFSVNO_ISSETGID(n) ((n)->na_gid != ((gid_t)VNOVAL)) #define NFSVNO_NOTSETSIZE(n) ((n)->na_size == VNOVAL) #define NFSVNO_ISSETSIZE(n) ((n)->na_size != VNOVAL) #define NFSVNO_NOTSETATIME(n) ((n)->na_atime.tv_sec == VNOVAL) #define NFSVNO_ISSETATIME(n) ((n)->na_atime.tv_sec != VNOVAL) #define NFSVNO_NOTSETMTIME(n) ((n)->na_mtime.tv_sec == VNOVAL) #define NFSVNO_ISSETMTIME(n) ((n)->na_mtime.tv_sec != VNOVAL) /* * This structure acts as a "catch-all" for information that * needs to be returned by nfsd_fhtovp(). */ struct nfsexstuff { int nes_exflag; /* export flags */ int nes_numsecflavor; /* # of security flavors */ int nes_secflavors[MAXSECFLAVORS]; /* and the flavors */ }; /* * These are NO-OPS for BSD until Isilon upstreams EXITCODE support. * EXITCODE is an in-memory ring buffer that holds the routines failing status. * This is a valuable tool to use when debugging and analyzing issues. * In addition to recording a routine's failing status, it offers * logging of routines for call stack tracing. * EXITCODE should be used only in routines that return a true errno value, as * that value will be formatted to a displayable errno string. Routines that * return regular int status that are not true errno should not set EXITCODE. * If you want to log routine tracing, you can add EXITCODE(0) to any routine. * NFS extended the EXITCODE with EXITCODE2 to record either the routine's * exit errno status or the nd_repstat. */ #define NFSEXITCODE(error) #define NFSEXITCODE2(error, nd) #define NFSVNO_EXINIT(e) ((e)->nes_exflag = 0) #define NFSVNO_EXPORTED(e) ((e)->nes_exflag & MNT_EXPORTED) #define NFSVNO_EXRDONLY(e) ((e)->nes_exflag & MNT_EXRDONLY) #define NFSVNO_EXPORTANON(e) ((e)->nes_exflag & MNT_EXPORTANON) #define NFSVNO_EXSTRICTACCESS(e) ((e)->nes_exflag & MNT_EXSTRICTACCESS) #define NFSVNO_EXV4ONLY(e) ((e)->nes_exflag & MNT_EXV4ONLY) #define NFSVNO_SETEXRDONLY(e) ((e)->nes_exflag = (MNT_EXPORTED|MNT_EXRDONLY)) #define NFSVNO_CMPFH(f1, f2) \ ((f1)->fh_fsid.val[0] == (f2)->fh_fsid.val[0] && \ (f1)->fh_fsid.val[1] == (f2)->fh_fsid.val[1] && \ bcmp(&(f1)->fh_fid, &(f2)->fh_fid, sizeof(struct fid)) == 0) #define NFSLOCKHASH(f) \ (&nfslockhash[nfsrv_hashfh(f) % nfsrv_lockhashsize]) #define NFSFPVNODE(f) ((struct vnode *)((f)->f_data)) #define NFSFPCRED(f) ((f)->f_cred) #define NFSFPFLAG(f) ((f)->f_flag) #define NFSNAMEICNDSET(n, c, o, f) do { \ (n)->cn_cred = (c); \ (n)->cn_nameiop = (o); \ (n)->cn_flags = (f); \ } while (0) /* * A little bit of Darwin vfs kpi. */ #define vnode_mount(v) ((v)->v_mount) #define vfs_statfs(m) (&((m)->mnt_stat)) #define NFSPATHLEN_T size_t /* * These are set to the minimum and maximum size of a server file * handle. */ #define NFSRV_MINFH (sizeof (fhandle_t)) #define NFSRV_MAXFH (sizeof (fhandle_t)) /* Use this macro for debug printfs. */ #define NFSD_DEBUG(level, ...) do { \ if (nfsd_debuglevel >= (level)) \ printf(__VA_ARGS__); \ } while (0) Index: head/sys/fs/nfs/nfskpiport.h =================================================================== --- head/sys/fs/nfs/nfskpiport.h (revision 326267) +++ head/sys/fs/nfs/nfskpiport.h (revision 326268) @@ -1,73 +1,75 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSKPIPORT_H_ #define _NFS_NFSKPIPORT_H_ /* * These definitions are needed since the generic code is now using Darwin8 * KPI stuff. (I know, seems a bit silly, but I want the code to build on * Darwin8 and hopefully subsequent releases from Apple.) */ typedef struct mount * mount_t; #define vfs_statfs(m) (&((m)->mnt_stat)) #define vfs_flags(m) ((m)->mnt_flag) typedef struct vnode * vnode_t; #define vnode_mount(v) ((v)->v_mount) #define vnode_vtype(v) ((v)->v_type) typedef struct mbuf * mbuf_t; #define mbuf_freem(m) m_freem(m) #define mbuf_data(m) mtod((m), void *) #define mbuf_len(m) ((m)->m_len) #define mbuf_next(m) ((m)->m_next) #define mbuf_setlen(m, l) ((m)->m_len = (l)) #define mbuf_setnext(m, p) ((m)->m_next = (p)) #define mbuf_pkthdr_len(m) ((m)->m_pkthdr.len) #define mbuf_pkthdr_setlen(m, l) ((m)->m_pkthdr.len = (l)) #define mbuf_pkthdr_setrcvif(m, p) ((m)->m_pkthdr.rcvif = (p)) /* * This stuff is needed by Darwin for handling the uio structure. */ #define CAST_USER_ADDR_T(a) (a) #define CAST_DOWN(c, a) ((c) (a)) #define uio_uio_resid(p) ((p)->uio_resid) #define uio_uio_resid_add(p, v) ((p)->uio_resid += (v)) #define uio_uio_resid_set(p, v) ((p)->uio_resid = (v)) #define uio_iov_base(p) ((p)->uio_iov->iov_base) #define uio_iov_base_add(p, v) do { \ char *pp; \ pp = (char *)(p)->uio_iov->iov_base; \ pp += (v); \ (p)->uio_iov->iov_base = (void *)pp; \ } while (0) #define uio_iov_len(p) ((p)->uio_iov->iov_len) #define uio_iov_len_add(p, v) ((p)->uio_iov->iov_len += (v)) #endif /* _NFS_NFSKPIPORT_H */ Index: head/sys/fs/nfs/nfsrvstate.h =================================================================== --- head/sys/fs/nfs/nfsrvstate.h (revision 326267) +++ head/sys/fs/nfs/nfsrvstate.h (revision 326268) @@ -1,296 +1,298 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSRVSTATE_H_ #define _NFS_NFSRVSTATE_H_ /* * Definitions for NFS V4 server state handling. */ /* * List heads for nfsclient, nfsstate and nfslockfile. * (Some systems seem to like to dynamically size these things, but I * don't see any point in doing so for these ones.) */ LIST_HEAD(nfsclienthashhead, nfsclient); LIST_HEAD(nfsstatehead, nfsstate); LIST_HEAD(nfslockhead, nfslock); LIST_HEAD(nfslockhashhead, nfslockfile); LIST_HEAD(nfssessionhead, nfsdsession); LIST_HEAD(nfssessionhashhead, nfsdsession); /* * List head for nfsusrgrp. */ TAILQ_HEAD(nfsuserhashhead, nfsusrgrp); #define NFSCLIENTHASH(id) \ (&nfsclienthash[(id).lval[1] % nfsrv_clienthashsize]) #define NFSSTATEHASH(clp, id) \ (&((clp)->lc_stateid[(id).other[2] % nfsrv_statehashsize])) #define NFSUSERHASH(id) \ (&nfsuserhash[(id) % nfsrv_lughashsize]) #define NFSUSERNAMEHASH(p, l) \ (&nfsusernamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) #define NFSGROUPHASH(id) \ (&nfsgrouphash[(id) % nfsrv_lughashsize]) #define NFSGROUPNAMEHASH(p, l) \ (&nfsgroupnamehash[((l)>=4?(*(p)+*((p)+1)+*((p)+2)+*((p)+3)):*(p)) \ % nfsrv_lughashsize]) struct nfssessionhash { struct mtx mtx; struct nfssessionhashhead list; }; #define NFSSESSIONHASH(f) \ (&nfssessionhash[nfsrv_hashsessionid(f) % nfsrv_sessionhashsize]) /* * Client server structure for V4. It is doubly linked into two lists. * The first is a hash table based on the clientid and the second is a * list of all clients maintained in LRU order. * The actual size malloc'd is large enough to accommodate the id string. */ struct nfsclient { LIST_ENTRY(nfsclient) lc_hash; /* Clientid hash list */ struct nfsstatehead *lc_stateid; /* Stateid hash */ struct nfsstatehead lc_open; /* Open owner list */ struct nfsstatehead lc_deleg; /* Delegations */ struct nfsstatehead lc_olddeleg; /* and old delegations */ struct nfssessionhead lc_session; /* List of NFSv4.1 sessions */ time_t lc_expiry; /* Expiry time (sec) */ time_t lc_delegtime; /* Old deleg expiry (sec) */ nfsquad_t lc_clientid; /* 64 bit clientid */ nfsquad_t lc_confirm; /* 64 bit confirm value */ u_int32_t lc_program; /* RPC Program # */ u_int32_t lc_callback; /* Callback id */ u_int32_t lc_stateindex; /* Current state index# */ u_int32_t lc_statemaxindex; /* Max state index# */ u_int32_t lc_cbref; /* Cnt of callbacks */ uid_t lc_uid; /* User credential */ gid_t lc_gid; u_int16_t lc_idlen; /* Client ID and len */ u_int16_t lc_namelen; /* plus GSS principal and len */ u_char *lc_name; struct nfssockreq lc_req; /* Callback info */ u_int32_t lc_flags; /* LCL_ flag bits */ u_char lc_verf[NFSX_VERF]; /* client verifier */ u_char lc_id[1]; /* Malloc'd correct size */ }; #define CLOPS_CONFIRM 0x0001 #define CLOPS_RENEW 0x0002 #define CLOPS_RENEWOP 0x0004 /* * Structure for an NFSv4.1 session. * Locking rules for this structure. * To add/delete one of these structures from the lists, you must lock * both: NFSLOCKSTATE() and NFSLOCKSESSION(session hashhead) in that order. * To traverse the lists looking for one of these, you must hold one * of these two locks. * The exception is if the thread holds the exclusive root sleep lock. * In this case, all other nfsd threads are blocked, so locking the * mutexes isn't required. * When manipulating sess_refcnt, NFSLOCKSTATE() must be locked. * When manipulating the fields withinsess_cbsess except nfsess_xprt, * sess_cbsess.nfsess_mtx must be locked. * When manipulating sess_slots and sess_cbsess.nfsess_xprt, * NFSLOCKSESSION(session hashhead) must be locked. */ struct nfsdsession { uint64_t sess_refcnt; /* Reference count. */ LIST_ENTRY(nfsdsession) sess_hash; /* Hash list of sessions. */ LIST_ENTRY(nfsdsession) sess_list; /* List of client sessions. */ struct nfsslot sess_slots[NFSV4_SLOTS]; struct nfsclient *sess_clp; /* Associated clientid. */ uint32_t sess_crflags; uint32_t sess_cbprogram; uint32_t sess_maxreq; uint32_t sess_maxresp; uint32_t sess_maxrespcached; uint32_t sess_maxops; uint32_t sess_maxslots; uint32_t sess_cbmaxreq; uint32_t sess_cbmaxresp; uint32_t sess_cbmaxrespcached; uint32_t sess_cbmaxops; uint8_t sess_sessionid[NFSX_V4SESSIONID]; struct nfsclsession sess_cbsess; /* Callback session. */ }; /* * Nfs state structure. I couldn't resist overloading this one, since * it makes cleanup, etc. simpler. These structures are used in four ways: * - open_owner structures chained off of nfsclient * - open file structures chained off an open_owner structure * - lock_owner structures chained off an open file structure * - delegated file structures chained off of nfsclient and nfslockfile * - the ls_list field is used for the chain it is in * - the ls_head structure is used to chain off the sibling structure * (it is a union between an nfsstate and nfslock structure head) * If it is a lockowner stateid, nfslock structures hang off it. * For the open file and lockowner cases, it is in the hash table in * nfsclient for stateid. */ struct nfsstate { LIST_ENTRY(nfsstate) ls_hash; /* Hash list entry */ LIST_ENTRY(nfsstate) ls_list; /* List of opens/delegs */ LIST_ENTRY(nfsstate) ls_file; /* Opens/Delegs for a file */ union { struct nfsstatehead open; /* Opens list */ struct nfslockhead lock; /* Locks list */ } ls_head; nfsv4stateid_t ls_stateid; /* The state id */ u_int32_t ls_seq; /* seq id */ uid_t ls_uid; /* uid of locker */ u_int32_t ls_flags; /* Type of lock, etc. */ union { struct nfsstate *openowner; /* Open only */ u_int32_t opentolockseq; /* Lock call only */ u_int32_t noopens; /* Openowner only */ struct { u_quad_t filerev; /* Delegations only */ time_t expiry; time_t limit; u_int64_t compref; } deleg; } ls_un; struct nfslockfile *ls_lfp; /* Back pointer */ struct nfsrvcache *ls_op; /* Op cache reference */ struct nfsclient *ls_clp; /* Back pointer */ u_short ls_ownerlen; /* Length of ls_owner */ u_char ls_owner[1]; /* malloc'd the correct size */ }; #define ls_lock ls_head.lock #define ls_open ls_head.open #define ls_opentolockseq ls_un.opentolockseq #define ls_openowner ls_un.openowner #define ls_openstp ls_un.openowner #define ls_noopens ls_un.noopens #define ls_filerev ls_un.deleg.filerev #define ls_delegtime ls_un.deleg.expiry #define ls_delegtimelimit ls_un.deleg.limit #define ls_compref ls_un.deleg.compref /* * Nfs lock structure. * This structure is chained off of the nfsstate (the lockowner) and * nfslockfile (the file) structures, for the file and owner it * refers to. It holds flags and a byte range. * It also has back pointers to the associated lock_owner and lockfile. */ struct nfslock { LIST_ENTRY(nfslock) lo_lckowner; LIST_ENTRY(nfslock) lo_lckfile; struct nfsstate *lo_stp; struct nfslockfile *lo_lfp; u_int64_t lo_first; u_int64_t lo_end; u_int32_t lo_flags; }; /* * Structure used to return a conflicting lock. (Must be large * enough for the largest lock owner we can have.) */ struct nfslockconflict { nfsquad_t cl_clientid; u_int64_t cl_first; u_int64_t cl_end; u_int32_t cl_flags; u_short cl_ownerlen; u_char cl_owner[NFSV4_OPAQUELIMIT]; }; /* * This structure is used to keep track of local locks that might need * to be rolled back. */ struct nfsrollback { LIST_ENTRY(nfsrollback) rlck_list; uint64_t rlck_first; uint64_t rlck_end; int rlck_type; }; /* * This structure refers to a file for which lock(s) and/or open(s) exist. * Searched via hash table on file handle or found via the back pointer from an * open or lock owner. */ struct nfslockfile { LIST_HEAD(, nfsstate) lf_open; /* Open list */ LIST_HEAD(, nfsstate) lf_deleg; /* Delegation list */ LIST_HEAD(, nfslock) lf_lock; /* Lock list */ LIST_HEAD(, nfslock) lf_locallock; /* Local lock list */ LIST_HEAD(, nfsrollback) lf_rollback; /* Local lock rollback list */ LIST_ENTRY(nfslockfile) lf_hash; /* Hash list entry */ fhandle_t lf_fh; /* The file handle */ struct nfsv4lock lf_locallock_lck; /* serialize local locking */ int lf_usecount; /* Ref count for locking */ }; /* * This structure is malloc'd an chained off hash lists for user/group * names. */ struct nfsusrgrp { TAILQ_ENTRY(nfsusrgrp) lug_numhash; /* Hash by id# */ TAILQ_ENTRY(nfsusrgrp) lug_namehash; /* and by name */ time_t lug_expiry; /* Expiry time in sec */ union { uid_t un_uid; /* id# */ gid_t un_gid; } lug_un; struct ucred *lug_cred; /* Cred. with groups list */ int lug_namelen; /* Name length */ u_char lug_name[1]; /* malloc'd correct length */ }; #define lug_uid lug_un.un_uid #define lug_gid lug_un.un_gid /* * These structures are used for the stable storage restart stuff. */ /* * Record at beginning of file. */ struct nfsf_rec { u_int32_t lease; /* Lease duration */ u_int32_t numboots; /* Number of boottimes */ }; #if defined(_KERNEL) || defined(KERNEL) void nfsrv_cleanclient(struct nfsclient *, NFSPROC_T *); void nfsrv_freedeleglist(struct nfsstatehead *); #endif #endif /* _NFS_NFSRVSTATE_H_ */ Index: head/sys/fs/nfs/nfsv4_errstr.h =================================================================== --- head/sys/fs/nfs/nfsv4_errstr.h (revision 326267) +++ head/sys/fs/nfs/nfsv4_errstr.h (revision 326268) @@ -1,101 +1,103 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFS_NFSV4ERRSTR_H_ #define _NFS_NFSV4ERRSTR_H_ /* * Defines static storage in the C file, but I can't be bothered creating * a library of one function for this, since it is only currently used by * mount_newnfs.c. */ static const char *nfsv4_errstr[48] = { "Illegal filehandle", "Undefined NFSv4 err", "READDIR cookie is stale", "operation not supported", "response limit exceeded", "undefined server error", "type invalid for CREATE", "file busy - retry", "nverify says attrs same", "lock unavailable", "lock lease expired", "I/O failed due to lock", "in grace period", "filehandle expired", "share reserve denied", "wrong security flavor", "clientid in use", "resource exhaustion", "filesystem relocated", "current FH is not set", "minor vers not supp", "server has rebooted", "server has rebooted", "state is out of sync", "incorrect stateid", "request is out of seq", "verify - attrs not same", "lock range not supported", "should be file/directory", "no saved filehandle", "some filesystem moved", "recommended attr not sup", "reclaim outside of grace", "reclaim error at server", "conflict on reclaim", "XDR decode failed", "file locks held at CLOSE", "conflict in OPEN and I/O", "owner translation bad", "utf-8 char not supported", "name not supported", "lock range not supported", "no atomic up/downgrade", "undefined operation", "file locking deadlock", "open file blocks op", "lockowner state revoked", "callback path down" }; /* * Return the error string for the NFS4ERR_xxx. The pointers returned are * static and must not be free'd. */ static const char * nfsv4_geterrstr(int errval) { if (errval < NFSERR_BADHANDLE || errval > NFSERR_CBPATHDOWN) return (NULL); return (nfsv4_errstr[errval - NFSERR_BADHANDLE]); } #endif /* _NFS_NFSV4ERRSTR_H_ */ Index: head/sys/fs/nfsclient/nfs_clkdtrace.c =================================================================== --- head/sys/fs/nfsclient/nfs_clkdtrace.c (revision 326267) +++ head/sys/fs/nfsclient/nfs_clkdtrace.c (revision 326268) @@ -1,585 +1,587 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Robert N. M. Watson * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include /* * dtnfscl is a DTrace provider that tracks the intent to perform RPCs * in the NFS client, as well as access to and maintenance of the access and * attribute caches. This is not quite the same as RPCs, because NFS may * issue multiple RPC transactions in the event that authentication fails, * there's a jukebox error, or none at all if the access or attribute cache * hits. However, it cleanly represents the logical layer between RPC * transmission and vnode/vfs operations, providing access to state linking * the two. */ static int dtnfsclient_unload(void); static void dtnfsclient_getargdesc(void *, dtrace_id_t, void *, dtrace_argdesc_t *); static void dtnfsclient_provide(void *, dtrace_probedesc_t *); static void dtnfsclient_destroy(void *, dtrace_id_t, void *); static void dtnfsclient_enable(void *, dtrace_id_t, void *); static void dtnfsclient_disable(void *, dtrace_id_t, void *); static void dtnfsclient_load(void *); static dtrace_pattr_t dtnfsclient_attr = { { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, { DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON }, }; /* * Description of NFSv4, NFSv3 and (optional) NFSv2 probes for a procedure. */ struct dtnfsclient_rpc { char *nr_v4_name; char *nr_v3_name; /* Or NULL if none. */ char *nr_v2_name; /* Or NULL if none. */ /* * IDs for the start and done cases, for NFSv2, NFSv3 and NFSv4. */ uint32_t nr_v2_id_start, nr_v2_id_done; uint32_t nr_v3_id_start, nr_v3_id_done; uint32_t nr_v4_id_start, nr_v4_id_done; }; /* * This table is indexed by NFSv3 procedure number, but also used for NFSv2 * procedure names and NFSv4 operations. */ static struct dtnfsclient_rpc dtnfsclient_rpcs[NFSV41_NPROCS + 1] = { { "null", "null", "null" }, { "getattr", "getattr", "getattr" }, { "setattr", "setattr", "setattr" }, { "lookup", "lookup", "lookup" }, { "access", "access", "noop" }, { "readlink", "readlink", "readlink" }, { "read", "read", "read" }, { "write", "write", "write" }, { "create", "create", "create" }, { "mkdir", "mkdir", "mkdir" }, { "symlink", "symlink", "symlink" }, { "mknod", "mknod" }, { "remove", "remove", "remove" }, { "rmdir", "rmdir", "rmdir" }, { "rename", "rename", "rename" }, { "link", "link", "link" }, { "readdir", "readdir", "readdir" }, { "readdirplus", "readdirplus" }, { "fsstat", "fsstat", "statfs" }, { "fsinfo", "fsinfo" }, { "pathconf", "pathconf" }, { "commit", "commit" }, { "lookupp" }, { "setclientid" }, { "setclientidcfrm" }, { "lock" }, { "locku" }, { "open" }, { "close" }, { "openconfirm" }, { "lockt" }, { "opendowngrade" }, { "renew" }, { "putrootfh" }, { "releaselckown" }, { "delegreturn" }, { "retdelegremove" }, { "retdelegrename1" }, { "retdelegrename2" }, { "getacl" }, { "setacl" }, { "noop", "noop", "noop" } }; /* * Module name strings. */ static char *dtnfsclient_accesscache_str = "accesscache"; static char *dtnfsclient_attrcache_str = "attrcache"; static char *dtnfsclient_nfs2_str = "nfs2"; static char *dtnfsclient_nfs3_str = "nfs3"; static char *dtnfsclient_nfs4_str = "nfs4"; /* * Function name strings. */ static char *dtnfsclient_flush_str = "flush"; static char *dtnfsclient_load_str = "load"; static char *dtnfsclient_get_str = "get"; /* * Name strings. */ static char *dtnfsclient_done_str = "done"; static char *dtnfsclient_hit_str = "hit"; static char *dtnfsclient_miss_str = "miss"; static char *dtnfsclient_start_str = "start"; static dtrace_pops_t dtnfsclient_pops = { .dtps_provide = dtnfsclient_provide, .dtps_provide_module = NULL, .dtps_enable = dtnfsclient_enable, .dtps_disable = dtnfsclient_disable, .dtps_suspend = NULL, .dtps_resume = NULL, .dtps_getargdesc = dtnfsclient_getargdesc, .dtps_getargval = NULL, .dtps_usermode = NULL, .dtps_destroy = dtnfsclient_destroy }; static dtrace_provider_id_t dtnfsclient_id; /* * When tracing on a procedure is enabled, the DTrace ID for an RPC event is * stored in one of these two NFS client-allocated arrays; 0 indicates that * the event is not being traced so probes should not be called. * * For simplicity, we allocate both v2, v3 and v4 arrays as NFSV41_NPROCS + 1, * and the v2, v3 arrays are simply sparse. */ extern uint32_t nfscl_nfs2_start_probes[NFSV41_NPROCS + 1]; extern uint32_t nfscl_nfs2_done_probes[NFSV41_NPROCS + 1]; extern uint32_t nfscl_nfs3_start_probes[NFSV41_NPROCS + 1]; extern uint32_t nfscl_nfs3_done_probes[NFSV41_NPROCS + 1]; extern uint32_t nfscl_nfs4_start_probes[NFSV41_NPROCS + 1]; extern uint32_t nfscl_nfs4_done_probes[NFSV41_NPROCS + 1]; /* * Look up a DTrace probe ID to see if it's associated with a "done" event -- * if so, we will return a fourth argument type of "int". */ static int dtnfs234_isdoneprobe(dtrace_id_t id) { int i; for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v4_id_done == id || dtnfsclient_rpcs[i].nr_v3_id_done == id || dtnfsclient_rpcs[i].nr_v2_id_done == id) return (1); } return (0); } static void dtnfsclient_getargdesc(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc) { const char *p = NULL; if (id == nfscl_accesscache_flush_done_id || id == nfscl_attrcache_flush_done_id || id == nfscl_attrcache_get_miss_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_accesscache_get_hit_id || id == nfscl_accesscache_get_miss_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "uid_t"; break; case 2: p = "uint32_t"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_accesscache_load_done_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "uid_t"; break; case 2: p = "uint32_t"; break; case 3: p = "int"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_attrcache_get_hit_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct vattr *"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else if (id == nfscl_attrcache_load_done_id) { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct vattr *"; break; case 2: p = "int"; break; default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } else { switch (desc->dtargd_ndx) { case 0: p = "struct vnode *"; break; case 1: p = "struct mbuf *"; break; case 2: p = "struct ucred *"; break; case 3: p = "int"; break; case 4: if (dtnfs234_isdoneprobe(id)) { p = "int"; break; } /* FALLSTHROUGH */ default: desc->dtargd_ndx = DTRACE_ARGNONE; break; } } if (p != NULL) strlcpy(desc->dtargd_native, p, sizeof(desc->dtargd_native)); } static void dtnfsclient_provide(void *arg, dtrace_probedesc_t *desc) { int i; if (desc != NULL) return; /* * Register access cache probes. */ if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { nfscl_accesscache_flush_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { nfscl_accesscache_get_hit_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { nfscl_accesscache_get_miss_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_load_str, dtnfsclient_done_str) == 0) { nfscl_accesscache_load_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_accesscache_str, dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); } /* * Register attribute cache probes. */ if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_flush_str, dtnfsclient_done_str) == 0) { nfscl_attrcache_flush_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_flush_str, dtnfsclient_done_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_hit_str) == 0) { nfscl_attrcache_get_hit_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_hit_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_miss_str) == 0) { nfscl_attrcache_get_miss_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_get_str, dtnfsclient_miss_str, 0, NULL); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_load_str, dtnfsclient_done_str) == 0) { nfscl_attrcache_load_done_id = dtrace_probe_create( dtnfsclient_id, dtnfsclient_attrcache_str, dtnfsclient_load_str, dtnfsclient_done_str, 0, NULL); } /* * Register NFSv2 RPC procedures; note sparseness check for each slot * in the NFSv3, NFSv4 procnum-indexed array. */ for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v2_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_start_str, 0, &nfscl_nfs2_start_probes[i]); } if (dtnfsclient_rpcs[i].nr_v2_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v2_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs2_str, dtnfsclient_rpcs[i].nr_v2_name, dtnfsclient_done_str, 0, &nfscl_nfs2_done_probes[i]); } } /* * Register NFSv3 RPC procedures; note sparseness check for each slot * in the NFSv4 procnum-indexed array. */ for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v3_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_start_str, 0, &nfscl_nfs3_start_probes[i]); } if (dtnfsclient_rpcs[i].nr_v3_name != NULL && dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v3_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs3_str, dtnfsclient_rpcs[i].nr_v3_name, dtnfsclient_done_str, 0, &nfscl_nfs3_done_probes[i]); } } /* * Register NFSv4 RPC procedures. */ for (i = 0; i < NFSV41_NPROCS + 1; i++) { if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str) == 0) { dtnfsclient_rpcs[i].nr_v4_id_start = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_start_str, 0, &nfscl_nfs4_start_probes[i]); } if (dtrace_probe_lookup(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_done_str) == 0) { dtnfsclient_rpcs[i].nr_v4_id_done = dtrace_probe_create(dtnfsclient_id, dtnfsclient_nfs4_str, dtnfsclient_rpcs[i].nr_v4_name, dtnfsclient_done_str, 0, &nfscl_nfs4_done_probes[i]); } } } static void dtnfsclient_destroy(void *arg, dtrace_id_t id, void *parg) { } static void dtnfsclient_enable(void *arg, dtrace_id_t id, void *parg) { uint32_t *p = parg; void *f = dtrace_probe; if (id == nfscl_accesscache_flush_done_id) dtrace_nfscl_accesscache_flush_done_probe = f; else if (id == nfscl_accesscache_get_hit_id) dtrace_nfscl_accesscache_get_hit_probe = f; else if (id == nfscl_accesscache_get_miss_id) dtrace_nfscl_accesscache_get_miss_probe = f; else if (id == nfscl_accesscache_load_done_id) dtrace_nfscl_accesscache_load_done_probe = f; else if (id == nfscl_attrcache_flush_done_id) dtrace_nfscl_attrcache_flush_done_probe = f; else if (id == nfscl_attrcache_get_hit_id) dtrace_nfscl_attrcache_get_hit_probe = f; else if (id == nfscl_attrcache_get_miss_id) dtrace_nfscl_attrcache_get_miss_probe = f; else if (id == nfscl_attrcache_load_done_id) dtrace_nfscl_attrcache_load_done_probe = f; else *p = id; } static void dtnfsclient_disable(void *arg, dtrace_id_t id, void *parg) { uint32_t *p = parg; if (id == nfscl_accesscache_flush_done_id) dtrace_nfscl_accesscache_flush_done_probe = NULL; else if (id == nfscl_accesscache_get_hit_id) dtrace_nfscl_accesscache_get_hit_probe = NULL; else if (id == nfscl_accesscache_get_miss_id) dtrace_nfscl_accesscache_get_miss_probe = NULL; else if (id == nfscl_accesscache_load_done_id) dtrace_nfscl_accesscache_load_done_probe = NULL; else if (id == nfscl_attrcache_flush_done_id) dtrace_nfscl_attrcache_flush_done_probe = NULL; else if (id == nfscl_attrcache_get_hit_id) dtrace_nfscl_attrcache_get_hit_probe = NULL; else if (id == nfscl_attrcache_get_miss_id) dtrace_nfscl_attrcache_get_miss_probe = NULL; else if (id == nfscl_attrcache_load_done_id) dtrace_nfscl_attrcache_load_done_probe = NULL; else *p = 0; } static void dtnfsclient_load(void *dummy) { if (dtrace_register("nfscl", &dtnfsclient_attr, DTRACE_PRIV_USER, NULL, &dtnfsclient_pops, NULL, &dtnfsclient_id) != 0) return; dtrace_nfscl_nfs234_start_probe = (dtrace_nfsclient_nfs23_start_probe_func_t)dtrace_probe; dtrace_nfscl_nfs234_done_probe = (dtrace_nfsclient_nfs23_done_probe_func_t)dtrace_probe; } static int dtnfsclient_unload() { dtrace_nfscl_nfs234_start_probe = NULL; dtrace_nfscl_nfs234_done_probe = NULL; return (dtrace_unregister(dtnfsclient_id)); } static int dtnfsclient_modevent(module_t mod __unused, int type, void *data __unused) { int error = 0; switch (type) { case MOD_LOAD: break; case MOD_UNLOAD: break; case MOD_SHUTDOWN: break; default: error = EOPNOTSUPP; break; } return (error); } SYSINIT(dtnfsclient_load, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtnfsclient_load, NULL); SYSUNINIT(dtnfsclient_unload, SI_SUB_DTRACE_PROVIDER, SI_ORDER_ANY, dtnfsclient_unload, NULL); DEV_MODULE(dtnfscl, dtnfsclient_modevent, NULL); MODULE_VERSION(dtnfscl, 1); MODULE_DEPEND(dtnfscl, dtrace, 1, 1, 1); MODULE_DEPEND(dtnfscl, opensolaris, 1, 1, 1); MODULE_DEPEND(dtnfscl, nfscl, 1, 1, 1); MODULE_DEPEND(dtnfscl, nfscommon, 1, 1, 1); Index: head/sys/fs/nfsclient/nfs_clstate.c =================================================================== --- head/sys/fs/nfsclient/nfs_clstate.c (revision 326267) +++ head/sys/fs/nfsclient/nfs_clstate.c (revision 326268) @@ -1,5356 +1,5358 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); /* * These functions implement the client side state handling for NFSv4. * NFSv4 state handling: * - A lockowner is used to determine lock contention, so it * corresponds directly to a Posix pid. (1 to 1 mapping) * - The correct granularity of an OpenOwner is not nearly so * obvious. An OpenOwner does the following: * - provides a serial sequencing of Open/Close/Lock-with-new-lockowner * - is used to check for Open/Share contention (not applicable to * this client, since all Opens are Deny_None) * As such, I considered both extreme. * 1 OpenOwner per ClientID - Simple to manage, but fully serializes * all Open, Close and Lock (with a new lockowner) Ops. * 1 OpenOwner for each Open - This one results in an OpenConfirm for * every Open, for most servers. * So, I chose to use the same mapping as I did for LockOwnwers. * The main concern here is that you can end up with multiple Opens * for the same File Handle, but on different OpenOwners (opens * inherited from parents, grandparents...) and you do not know * which of these the vnodeop close applies to. This is handled by * delaying the Close Op(s) until all of the Opens have been closed. * (It is not yet obvious if this is the correct granularity.) * - How the code handles serialization: * - For the ClientId, it uses an exclusive lock while getting its * SetClientId and during recovery. Otherwise, it uses a shared * lock via a reference count. * - For the rest of the data structures, it uses an SMP mutex * (once the nfs client is SMP safe) and doesn't sleep while * manipulating the linked lists. * - The serialization of Open/Close/Lock/LockU falls out in the * "wash", since OpenOwners and LockOwners are both mapped from * Posix pid. In other words, there is only one Posix pid using * any given owner, so that owner is serialized. (If you change * the granularity of the OpenOwner, then code must be added to * serialize Ops on the OpenOwner.) * - When to get rid of OpenOwners and LockOwners. * - The function nfscl_cleanup_common() is executed after a process exits. * It goes through the client list looking for all Open and Lock Owners. * When one is found, it is marked "defunct" or in the case of * an OpenOwner without any Opens, freed. * The renew thread scans for defunct Owners and gets rid of them, * if it can. The LockOwners will also be deleted when the * associated Open is closed. * - If the LockU or Close Op(s) fail during close in a way * that could be recovered upon retry, they are relinked to the * ClientId's defunct open list and retried by the renew thread * until they succeed or an unmount/recovery occurs. * (Since we are done with them, they do not need to be recovered.) */ #ifndef APPLEKEXT #include /* * Global variables */ extern struct nfsstatsv1 nfsstatsv1; extern struct nfsreqhead nfsd_reqq; extern u_int32_t newnfs_false, newnfs_true; extern int nfscl_debuglevel; extern int nfscl_enablecallb; extern int nfs_numnfscbd; NFSREQSPINLOCK; NFSCLSTATEMUTEX; int nfscl_inited = 0; struct nfsclhead nfsclhead; /* Head of clientid list */ int nfscl_deleghighwater = NFSCLDELEGHIGHWATER; int nfscl_layouthighwater = NFSCLLAYOUTHIGHWATER; #endif /* !APPLEKEXT */ static int nfscl_delegcnt = 0; static int nfscl_layoutcnt = 0; static int nfscl_getopen(struct nfsclownerhead *, u_int8_t *, int, u_int8_t *, u_int8_t *, u_int32_t, struct nfscllockowner **, struct nfsclopen **); static void nfscl_clrelease(struct nfsclclient *); static void nfscl_cleanclient(struct nfsclclient *); static void nfscl_expireclient(struct nfsclclient *, struct nfsmount *, struct ucred *, NFSPROC_T *); static int nfscl_expireopen(struct nfsclclient *, struct nfsclopen *, struct nfsmount *, struct ucred *, NFSPROC_T *); static void nfscl_recover(struct nfsclclient *, struct ucred *, NFSPROC_T *); static void nfscl_insertlock(struct nfscllockowner *, struct nfscllock *, struct nfscllock *, int); static int nfscl_updatelock(struct nfscllockowner *, struct nfscllock **, struct nfscllock **, int); static void nfscl_delegreturnall(struct nfsclclient *, NFSPROC_T *); static u_int32_t nfscl_nextcbident(void); static mount_t nfscl_getmnt(int, uint8_t *, u_int32_t, struct nfsclclient **); static struct nfsclclient *nfscl_getclnt(u_int32_t); static struct nfsclclient *nfscl_getclntsess(uint8_t *); static struct nfscldeleg *nfscl_finddeleg(struct nfsclclient *, u_int8_t *, int); static void nfscl_retoncloselayout(vnode_t, struct nfsclclient *, uint8_t *, int, struct nfsclrecalllayout **); static void nfscl_reldevinfo_locked(struct nfscldevinfo *); static struct nfscllayout *nfscl_findlayout(struct nfsclclient *, u_int8_t *, int); static struct nfscldevinfo *nfscl_finddevinfo(struct nfsclclient *, uint8_t *); static int nfscl_checkconflict(struct nfscllockownerhead *, struct nfscllock *, u_int8_t *, struct nfscllock **); static void nfscl_freealllocks(struct nfscllockownerhead *, int); static int nfscl_localconflict(struct nfsclclient *, u_int8_t *, int, struct nfscllock *, u_int8_t *, struct nfscldeleg *, struct nfscllock **); static void nfscl_newopen(struct nfsclclient *, struct nfscldeleg *, struct nfsclowner **, struct nfsclowner **, struct nfsclopen **, struct nfsclopen **, u_int8_t *, u_int8_t *, int, struct ucred *, int *); static int nfscl_moveopen(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfsclopen *, struct nfsclowner *, struct nfscldeleg *, struct ucred *, NFSPROC_T *); static void nfscl_totalrecall(struct nfsclclient *); static int nfscl_relock(vnode_t , struct nfsclclient *, struct nfsmount *, struct nfscllockowner *, struct nfscllock *, struct ucred *, NFSPROC_T *); static int nfscl_tryopen(struct nfsmount *, vnode_t , u_int8_t *, int, u_int8_t *, int, u_int32_t, struct nfsclopen *, u_int8_t *, int, struct nfscldeleg **, int, u_int32_t, struct ucred *, NFSPROC_T *); static int nfscl_trylock(struct nfsmount *, vnode_t , u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *); static int nfsrpc_reopen(struct nfsmount *, u_int8_t *, int, u_int32_t, struct nfsclopen *, struct nfscldeleg **, struct ucred *, NFSPROC_T *); static void nfscl_freedeleg(struct nfscldeleghead *, struct nfscldeleg *); static int nfscl_errmap(struct nfsrv_descript *, u_int32_t); static void nfscl_cleanup_common(struct nfsclclient *, u_int8_t *); static int nfscl_recalldeleg(struct nfsclclient *, struct nfsmount *, struct nfscldeleg *, vnode_t, struct ucred *, NFSPROC_T *, int); static void nfscl_freeopenowner(struct nfsclowner *, int); static void nfscl_cleandeleg(struct nfscldeleg *); static int nfscl_trydelegreturn(struct nfscldeleg *, struct ucred *, struct nfsmount *, NFSPROC_T *); static void nfscl_emptylockowner(struct nfscllockowner *, struct nfscllockownerfhhead *); static void nfscl_mergeflayouts(struct nfsclflayouthead *, struct nfsclflayouthead *); static int nfscl_layoutrecall(int, struct nfscllayout *, uint32_t, uint64_t, uint64_t, uint32_t, struct nfsclrecalllayout *); static int nfscl_seq(uint32_t, uint32_t); static void nfscl_layoutreturn(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static void nfscl_dolayoutcommit(struct nfsmount *, struct nfscllayout *, struct ucred *, NFSPROC_T *); static short nfscberr_null[] = { 0, 0, }; static short nfscberr_getattr[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short nfscberr_recall[] = { NFSERR_RESOURCE, NFSERR_BADHANDLE, NFSERR_BADSTATEID, NFSERR_BADXDR, NFSERR_RESOURCE, NFSERR_SERVERFAULT, 0, }; static short *nfscl_cberrmap[] = { nfscberr_null, nfscberr_null, nfscberr_null, nfscberr_getattr, nfscberr_recall }; #define NETFAMILY(clp) \ (((clp)->nfsc_flags & NFSCLFLAGS_AFINET6) ? AF_INET6 : AF_INET) /* * Called for an open operation. * If the nfhp argument is NULL, just get an openowner. */ APPLESTATIC int nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, struct ucred *cred, NFSPROC_T *p, struct nfsclowner **owpp, struct nfsclopen **opp, int *newonep, int *retp, int lockit) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op = NULL, *nop = NULL; struct nfscldeleg *dp; struct nfsclownerhead *ohp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret; if (newonep != NULL) *newonep = 0; if (opp != NULL) *opp = NULL; if (owpp != NULL) *owpp = NULL; /* * Might need one or both of these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); if (nfhp != NULL) MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + fhlen - 1, M_NFSCLOPEN, M_WAITOK); ret = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (ret != 0) { FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); return (ret); } /* * Get the Open iff it already exists. * If none found, add the new one or return error, depending upon * "create". */ NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ if (nfhp != NULL && usedeleg) { LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(amode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) break; dp = NULL; break; } } } if (dp != NULL) { nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &dp->nfsdl_owner; } else { /* For NFSv4.1 and this option, use a single open_owner. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); ohp = &clp->nfsc_owner; } /* Now, search for an openowner */ LIST_FOREACH(owp, ohp, nfsow_list) { if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) break; } /* * Create a new open, as required. */ nfscl_newopen(clp, dp, &owp, &nowp, &op, &nop, own, nfhp, fhlen, cred, newonep); /* * Now, check the mode on the open and return the appropriate * value. */ if (retp != NULL) { if (nfhp != NULL && dp != NULL && nop == NULL) /* new local open on delegation */ *retp = NFSCLOPEN_SETCRED; else *retp = NFSCLOPEN_OK; } if (op != NULL && (amode & ~(op->nfso_mode))) { op->nfso_mode |= amode; if (retp != NULL && dp == NULL) *retp = NFSCLOPEN_DOOPEN; } /* * Serialize modifications to the open owner for multiple threads * within the same process using a read/write sleep lock. * For NFSv4.1 and a single OpenOwner, allow concurrent open operations * by acquiring a shared lock. The close operations still use an * exclusive lock for this case. */ if (lockit != 0) { if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) { /* * Get a shared lock on the OpenOwner, but first * wait for any pending exclusive lock, so that the * exclusive locker gets priority. */ nfsv4_lock(&owp->nfsow_rwlock, 0, NULL, NFSCLSTATEMUTEXPTR, NULL); nfsv4_getref(&owp->nfsow_rwlock, NULL, NFSCLSTATEMUTEXPTR, NULL); } else nfscl_lockexcl(&owp->nfsow_rwlock, NFSCLSTATEMUTEXPTR); } NFSUNLOCKCLSTATE(); if (nowp != NULL) FREE((caddr_t)nowp, M_NFSCLOWNER); if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); if (owpp != NULL) *owpp = owp; if (opp != NULL) *opp = op; return (0); } /* * Create a new open, as required. */ static void nfscl_newopen(struct nfsclclient *clp, struct nfscldeleg *dp, struct nfsclowner **owpp, struct nfsclowner **nowpp, struct nfsclopen **opp, struct nfsclopen **nopp, u_int8_t *own, u_int8_t *fhp, int fhlen, struct ucred *cred, int *newonep) { struct nfsclowner *owp = *owpp, *nowp; struct nfsclopen *op, *nop; if (nowpp != NULL) nowp = *nowpp; else nowp = NULL; if (nopp != NULL) nop = *nopp; else nop = NULL; if (owp == NULL && nowp != NULL) { NFSBCOPY(own, nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); if (dp != NULL) { nfsstatsv1.cllocalopenowners++; LIST_INSERT_HEAD(&dp->nfsdl_owner, nowp, nfsow_list); } else { nfsstatsv1.clopenowners++; LIST_INSERT_HEAD(&clp->nfsc_owner, nowp, nfsow_list); } owp = *owpp = nowp; *nowpp = NULL; if (newonep != NULL) *newonep = 1; } /* If an fhp has been specified, create an Open as well. */ if (fhp != NULL) { /* and look for the correct open, based upon FH */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) break; } if (op == NULL && nop != NULL) { nop->nfso_own = owp; nop->nfso_mode = 0; nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = fhlen; NFSBCOPY(fhp, nop->nfso_fh, fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; KASSERT(cred != NULL, ("%s: cred NULL\n", __func__)); newnfs_copyincred(cred, &nop->nfso_cred); if (dp != NULL) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cllocalopens++; } else { nfsstatsv1.clopens++; } LIST_INSERT_HEAD(&owp->nfsow_open, nop, nfso_list); *opp = nop; *nopp = NULL; if (newonep != NULL) *newonep = 1; } else { *opp = op; } } } /* * Called to find/add a delegation to a client. */ APPLESTATIC int nfscl_deleg(mount_t mp, struct nfsclclient *clp, u_int8_t *nfhp, int fhlen, struct ucred *cred, NFSPROC_T *p, struct nfscldeleg **dpp) { struct nfscldeleg *dp = *dpp, *tdp; /* * First, if we have received a Read delegation for a file on a * read/write file system, just return it, because they aren't * useful, imho. */ if (mp != NULL && dp != NULL && !NFSMNT_RDONLY(mp) && (dp->nfsdl_flags & NFSCLDL_READ)) { (void) nfscl_trydelegreturn(dp, cred, VFSTONFS(mp), p); FREE((caddr_t)dp, M_NFSCLDELEG); *dpp = NULL; return (0); } /* Look for the correct deleg, based upon FH */ NFSLOCKCLSTATE(); tdp = nfscl_finddeleg(clp, nfhp, fhlen); if (tdp == NULL) { if (dp == NULL) { NFSUNLOCKCLSTATE(); return (NFSERR_BADSTATEID); } *dpp = NULL; TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); LIST_INSERT_HEAD(NFSCLDELEGHASH(clp, nfhp, fhlen), dp, nfsdl_hash); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; nfsstatsv1.cldelegates++; nfscl_delegcnt++; } else { /* * Delegation already exists, what do we do if a new one?? */ if (dp != NULL) { printf("Deleg already exists!\n"); FREE((caddr_t)dp, M_NFSCLDELEG); *dpp = NULL; } else { *dpp = tdp; } } NFSUNLOCKCLSTATE(); return (0); } /* * Find a delegation for this file handle. Return NULL upon failure. */ static struct nfscldeleg * nfscl_finddeleg(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscldeleg *dp; LIST_FOREACH(dp, NFSCLDELEGHASH(clp, fhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(dp->nfsdl_fh, fhp, fhlen)) break; } return (dp); } /* * Get a stateid for an I/O operation. First, look for an open and iff * found, return either a lockowner stateid or the open stateid. * If no Open is found, just return error and the special stateid of all zeros. */ APPLESTATIC int nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, int fords, struct ucred *cred, NFSPROC_T *p, nfsv4stateid_t *stateidp, void **lckpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op = NULL, *top; struct nfscllockowner *lp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error, done; *lckpp = NULL; /* * Initially, just set the special stateid of all zeros. * (Don't do this for a DS, since the special stateid can't be used.) */ if (fords == 0) { stateidp->seqid = 0; stateidp->other[0] = 0; stateidp->other[1] = 0; stateidp->other[2] = 0; } if (vnode_vtype(vp) != VREG) return (EISDIR); np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } /* * Wait for recovery to complete. */ while ((clp->nfsc_flags & NFSCLFLAGS_RECVRINPROG)) (void) nfsmsleep(&clp->nfsc_flags, NFSCLSTATEMUTEXPTR, PZERO, "nfsrecvr", NULL); /* * First, look for a delegation. */ LIST_FOREACH(dp, NFSCLDELEGHASH(clp, nfhp, fhlen), nfsdl_hash) { if (dp->nfsdl_fhlen == fhlen && !NFSBCMP(nfhp, dp->nfsdl_fh, fhlen)) { if (!(mode & NFSV4OPEN_ACCESSWRITE) || (dp->nfsdl_flags & NFSCLDL_WRITE)) { stateidp->seqid = dp->nfsdl_stateid.seqid; stateidp->other[0] = dp->nfsdl_stateid.other[0]; stateidp->other[1] = dp->nfsdl_stateid.other[1]; stateidp->other[2] = dp->nfsdl_stateid.other[2]; if (!(np->n_flag & NDELEGRECALL)) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; dp->nfsdl_rwlock.nfslock_usecnt++; *lckpp = (void *)&dp->nfsdl_rwlock; } NFSUNLOCKCLSTATE(); return (0); } break; } } if (p != NULL) { /* * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, own, F_POSIX); else nfscl_filllockowner(p->td_proc, own, F_POSIX); lp = NULL; error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, own, own, mode, &lp, &op); if (error == 0 && lp != NULL && fords == 0) { /* Don't return a lock stateid for a DS. */ stateidp->seqid = lp->nfsl_stateid.seqid; stateidp->other[0] = lp->nfsl_stateid.other[0]; stateidp->other[1] = lp->nfsl_stateid.other[1]; stateidp->other[2] = lp->nfsl_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } } if (op == NULL) { /* If not found, just look for any OpenOwner that will work. */ top = NULL; done = 0; owp = LIST_FIRST(&clp->nfsc_owner); while (!done && owp != NULL) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen)) { if (top == NULL && (op->nfso_mode & NFSV4OPEN_ACCESSWRITE) != 0 && (mode & NFSV4OPEN_ACCESSREAD) != 0) top = op; if ((mode & op->nfso_mode) == mode) { done = 1; break; } } } if (!done) owp = LIST_NEXT(owp, nfsow_list); } if (!done) { NFSCL_DEBUG(2, "openmode top=%p\n", top); if (top == NULL || NFSHASOPENMODE(nmp)) { NFSUNLOCKCLSTATE(); return (ENOENT); } else op = top; } /* * For read aheads or write behinds, use the open cred. * A read ahead or write behind is indicated by p == NULL. */ if (p == NULL) newnfs_copycred(&op->nfso_cred, cred); } /* * No lock stateid, so return the open stateid. */ stateidp->seqid = op->nfso_stateid.seqid; stateidp->other[0] = op->nfso_stateid.other[0]; stateidp->other[1] = op->nfso_stateid.other[1]; stateidp->other[2] = op->nfso_stateid.other[2]; NFSUNLOCKCLSTATE(); return (0); } /* * Search for a matching file, mode and, optionally, lockowner. */ static int nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, u_int8_t *openown, u_int8_t *lockown, u_int32_t mode, struct nfscllockowner **lpp, struct nfsclopen **opp) { struct nfsclowner *owp; struct nfsclopen *op, *rop, *rop2; struct nfscllockowner *lp; int keep_looping; if (lpp != NULL) *lpp = NULL; /* * rop will be set to the open to be returned. There are three * variants of this, all for an open of the correct file: * 1 - A match of lockown. * 2 - A match of the openown, when no lockown match exists. * 3 - A match for any open, if no openown or lockown match exists. * Looking for #2 over #3 probably isn't necessary, but since * RFC3530 is vague w.r.t. the relationship between openowners and * lockowners, I think this is the safer way to go. */ rop = NULL; rop2 = NULL; keep_looping = 1; /* Search the client list */ owp = LIST_FIRST(ohp); while (owp != NULL && keep_looping != 0) { /* and look for the correct open */ op = LIST_FIRST(&owp->nfsow_open); while (op != NULL && keep_looping != 0) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, nfhp, fhlen) && (op->nfso_mode & mode) == mode) { if (lpp != NULL) { /* Now look for a matching lockowner. */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, lockown, NFSV4CL_LOCKNAMELEN)) { *lpp = lp; rop = op; keep_looping = 0; break; } } } if (rop == NULL && !NFSBCMP(owp->nfsow_owner, openown, NFSV4CL_LOCKNAMELEN)) { rop = op; if (lpp == NULL) keep_looping = 0; } if (rop2 == NULL) rop2 = op; } op = LIST_NEXT(op, nfso_list); } owp = LIST_NEXT(owp, nfsow_list); } if (rop == NULL) rop = rop2; if (rop == NULL) return (EBADF); *opp = rop; return (0); } /* * Release use of an open owner. Called when open operations are done * with the open owner. */ APPLESTATIC void nfscl_ownerrelease(struct nfsmount *nmp, struct nfsclowner *owp, __unused int error, __unused int candelete, int unlocked) { if (owp == NULL) return; NFSLOCKCLSTATE(); if (unlocked == 0) { if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); } nfscl_clrelease(owp->nfsow_clp); NFSUNLOCKCLSTATE(); } /* * Release use of an open structure under an open owner. */ APPLESTATIC void nfscl_openrelease(struct nfsmount *nmp, struct nfsclopen *op, int error, int candelete) { struct nfsclclient *clp; struct nfsclowner *owp; if (op == NULL) return; NFSLOCKCLSTATE(); owp = op->nfso_own; if (NFSHASONEOPENOWN(nmp)) nfsv4_relref(&owp->nfsow_rwlock); else nfscl_lockunlock(&owp->nfsow_rwlock); clp = owp->nfsow_clp; if (error && candelete && op->nfso_opencnt == 0) nfscl_freeopen(op, 0); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to get a clientid structure. It will optionally lock the * client data structures to do the SetClientId/SetClientId_confirm, * but will release that lock and return the clientid with a reference * count on it. * If the "cred" argument is NULL, a new clientid should not be created. * If the "p" argument is NULL, a SetClientID/SetClientIDConfirm cannot * be done. * The start_renewthread argument tells nfscl_getcl() to start a renew * thread if this creates a new clp. * It always clpp with a reference count on it, unless returning an error. */ APPLESTATIC int nfscl_getcl(struct mount *mp, struct ucred *cred, NFSPROC_T *p, int start_renewthread, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclclient *newclp = NULL; struct nfsmount *nmp; char uuid[HOSTUUIDLEN]; int igotlock = 0, error, trystalecnt, clidinusedelay, i; u_int16_t idlen = 0; nmp = VFSTONFS(mp); if (cred != NULL) { getcredhostuuid(cred, uuid, sizeof uuid); idlen = strlen(uuid); if (idlen > 0) idlen += sizeof (u_int64_t); else idlen += sizeof (u_int64_t) + 16; /* 16 random bytes */ MALLOC(newclp, struct nfsclclient *, sizeof (struct nfsclclient) + idlen - 1, M_NFSCLCLIENT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); /* * If a forced dismount is already in progress, don't * allocate a new clientid and get out now. For the case where * clp != NULL, this is a harmless optimization. */ if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); return (EBADF); } clp = nmp->nm_clp; if (clp == NULL) { if (newclp == NULL) { NFSUNLOCKCLSTATE(); return (EACCES); } clp = newclp; clp->nfsc_idlen = idlen; LIST_INIT(&clp->nfsc_owner); TAILQ_INIT(&clp->nfsc_deleg); TAILQ_INIT(&clp->nfsc_layout); LIST_INIT(&clp->nfsc_devinfo); for (i = 0; i < NFSCLDELEGHASHSIZE; i++) LIST_INIT(&clp->nfsc_deleghash[i]); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); clp->nfsc_flags = NFSCLFLAGS_INITED; clp->nfsc_clientidrev = 1; clp->nfsc_cbident = nfscl_nextcbident(); nfscl_fillclid(nmp->nm_clval, uuid, clp->nfsc_id, clp->nfsc_idlen); LIST_INSERT_HEAD(&nfsclhead, clp, nfsc_list); nmp->nm_clp = clp; clp->nfsc_nmp = nmp; NFSUNLOCKCLSTATE(); if (start_renewthread != 0) nfscl_start_renewthread(clp); } else { NFSUNLOCKCLSTATE(); if (newclp != NULL) free(newclp, M_NFSCLCLIENT); } NFSLOCKCLSTATE(); while ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0 && !igotlock && !NFSCL_FORCEDISM(mp)) igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); if (igotlock == 0) { /* * Call nfsv4_lock() with "iwantlock == 0" so that it will * wait for a pending exclusive lock request. This gives the * exclusive lock request priority over this shared lock * request. * An exclusive lock on nfsc_lock is used mainly for server * crash recoveries. */ nfsv4_lock(&clp->nfsc_lock, 0, NULL, NFSCLSTATEMUTEXPTR, mp); nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, mp); } if (igotlock == 0 && NFSCL_FORCEDISM(mp)) { /* * Both nfsv4_lock() and nfsv4_getref() know to check * for NFSCL_FORCEDISM() and return without sleeping to * wait for the exclusive lock to be released, since it * might be held by nfscl_umount() and we need to get out * now for that case and not wait until nfscl_umount() * releases it. */ NFSUNLOCKCLSTATE(); return (EBADF); } NFSUNLOCKCLSTATE(); /* * If it needs a clientid, do the setclientid now. */ if ((clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID) == 0) { if (!igotlock) panic("nfscl_clget"); if (p == NULL || cred == NULL) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (EACCES); } /* * If RFC3530 Sec. 14.2.33 is taken literally, * NFSERR_CLIDINUSE will be returned persistently for the * case where a new mount of the same file system is using * a different principal. In practice, NFSERR_CLIDINUSE is * only returned when there is outstanding unexpired state * on the clientid. As such, try for twice the lease * interval, if we know what that is. Otherwise, make a * wild ass guess. * The case of returning NFSERR_STALECLIENTID is far less * likely, but might occur if there is a significant delay * between doing the SetClientID and SetClientIDConfirm Ops, * such that the server throws away the clientid before * receiving the SetClientIDConfirm. */ if (clp->nfsc_renew > 0) clidinusedelay = NFSCL_LEASE(clp->nfsc_renew) * 2; else clidinusedelay = 120; trystalecnt = 3; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); if (error == NFSERR_STALECLIENTID || error == NFSERR_STALEDONTRECOVER || error == NFSERR_BADSESSION || error == NFSERR_CLIDINUSE) { (void) nfs_catnap(PZERO, error, "nfs_setcl"); } } while (((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trystalecnt > 0) || (error == NFSERR_CLIDINUSE && --clidinusedelay > 0)); if (error) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (error); } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; } if (igotlock) { NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 1); NFSUNLOCKCLSTATE(); } *clpp = clp; return (0); } /* * Get a reference to a clientid and return it, if valid. */ APPLESTATIC struct nfsclclient * nfscl_findcl(struct nfsmount *nmp) { struct nfsclclient *clp; clp = nmp->nm_clp; if (clp == NULL || !(clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) return (NULL); return (clp); } /* * Release the clientid structure. It may be locked or reference counted. */ static void nfscl_clrelease(struct nfsclclient *clp) { if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); } /* * External call for nfscl_clrelease. */ APPLESTATIC void nfscl_clientrelease(struct nfsclclient *clp) { NFSLOCKCLSTATE(); if (clp->nfsc_lock.nfslock_lock & NFSV4LOCK_LOCK) nfsv4_unlock(&clp->nfsc_lock, 0); else nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Called when wanting to lock a byte region. */ APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllockowner *nlp; struct nfscllock *nlop, *otherlop; struct nfscldeleg *dp = NULL, *ldp = NULL; struct nfscllockownerhead *lhp = NULL; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN], *ownp, openown[NFSV4CL_LOCKNAMELEN]; u_int8_t *openownp; int error = 0, ret, donelocally = 0; u_int32_t mode; /* For Lock Ops, the open mode doesn't matter, so use 0 to match any. */ mode = 0; np = VTONFS(vp); *lpp = NULL; lp = NULL; *newonep = 0; *donelocallyp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nlp, struct nfscllockowner *, sizeof (struct nfscllockowner), M_NFSCLLOCKOWNER, M_WAITOK); MALLOC(otherlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); MALLOC(nlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = type; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) error = NFSERR_INVAL; } if (!error) { if (recovery) clp = rclp; else error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); } if (error) { FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); FREE((caddr_t)otherlop, M_NFSCLLOCK); FREE((caddr_t)nlop, M_NFSCLLOCK); return (error); } op = NULL; if (recovery) { ownp = rownp; openownp = ropenownp; } else { nfscl_filllockowner(id, own, flags); ownp = own; if (NFSHASONEOPENOWN(VFSTONFS(vnode_mount(vp)))) nfscl_filllockowner(NULL, openown, F_POSIX); else nfscl_filllockowner(p->td_proc, openown, F_POSIX); openownp = openown; } if (!recovery) { NFSLOCKCLSTATE(); /* * First, search for a delegation. If one exists for this file, * the lock can be done locally against it, so long as there * isn't a local lock conflict. */ ldp = dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* Just sanity check for correct type of delegation */ if (dp != NULL && ((dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) != 0 || (type == F_WRLCK && (dp->nfsdl_flags & NFSCLDL_WRITE) == 0))) dp = NULL; } if (dp != NULL) { /* Now, find an open and maybe a lockowner. */ ret = nfscl_getopen(&dp->nfsdl_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (ret) ret = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, NULL, &op); if (!ret) { lhp = &dp->nfsdl_lock; TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); TAILQ_INSERT_HEAD(&clp->nfsc_deleg, dp, nfsdl_list); dp->nfsdl_timestamp = NFSD_MONOSEC + 120; donelocally = 1; } else { dp = NULL; } } if (!donelocally) { /* * Get the related Open and maybe lockowner. */ error = nfscl_getopen(&clp->nfsc_owner, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, openownp, ownp, mode, &lp, &op); if (!error) lhp = &op->nfso_lock; } if (!error && !recovery) error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, nlop, ownp, ldp, NULL); if (error) { if (!recovery) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); FREE((caddr_t)otherlop, M_NFSCLLOCK); FREE((caddr_t)nlop, M_NFSCLLOCK); return (error); } /* * Ok, see if a lockowner exists and create one, as required. */ if (lp == NULL) LIST_FOREACH(lp, lhp, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, ownp, NFSV4CL_LOCKNAMELEN)) break; } if (lp == NULL) { NFSBCOPY(ownp, nlp->nfsl_owner, NFSV4CL_LOCKNAMELEN); if (recovery) NFSBCOPY(ropenownp, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); else NFSBCOPY(op->nfso_own->nfsow_owner, nlp->nfsl_openowner, NFSV4CL_LOCKNAMELEN); nlp->nfsl_seqid = 0; nlp->nfsl_lockflags = flags; nlp->nfsl_inprog = NULL; nfscl_lockinit(&nlp->nfsl_rwlock); LIST_INIT(&nlp->nfsl_lock); if (donelocally) { nlp->nfsl_open = NULL; nfsstatsv1.cllocallockowners++; } else { nlp->nfsl_open = op; nfsstatsv1.cllockowners++; } LIST_INSERT_HEAD(lhp, nlp, nfsl_list); lp = nlp; nlp = NULL; *newonep = 1; } /* * Now, update the byte ranges for locks. */ ret = nfscl_updatelock(lp, &nlop, &otherlop, donelocally); if (!ret) donelocally = 1; if (donelocally) { *donelocallyp = 1; if (!recovery) nfscl_clrelease(clp); } else { /* * Serial modifications on the lock owner for multiple threads * for the same process using a read/write lock. */ if (!recovery) nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); } if (!recovery) NFSUNLOCKCLSTATE(); if (nlp) FREE((caddr_t)nlp, M_NFSCLLOCKOWNER); if (nlop) FREE((caddr_t)nlop, M_NFSCLLOCK); if (otherlop) FREE((caddr_t)otherlop, M_NFSCLLOCK); *lpp = lp; return (0); } /* * Called to unlock a byte range, for LockU. */ APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, struct nfsclclient *clp, void *id, int flags, struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllock *nlop, *other_lop = NULL; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int ret = 0, fnd; np = VTONFS(vp); *lpp = NULL; *dorpcp = 0; /* * Might need these, so MALLOC them now, to * avoid a tsleep() in MALLOC later. */ MALLOC(nlop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); nlop->nfslo_type = F_UNLCK; nlop->nfslo_first = off; if (len == NFS64BITSSET) { nlop->nfslo_end = NFS64BITSSET; } else { nlop->nfslo_end = off + len; if (nlop->nfslo_end <= nlop->nfslo_first) { FREE((caddr_t)nlop, M_NFSCLLOCK); return (NFSERR_INVAL); } } if (callcnt == 0) { MALLOC(other_lop, struct nfscllock *, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); /* * First, unlock any local regions on a delegation. */ if (dp != NULL) { /* Look for this lockowner. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) /* Use other_lop, so nlop is still available */ (void)nfscl_updatelock(lp, &other_lop, NULL, 1); } /* * Now, find a matching open/lockowner that hasn't already been done, * as marked by nfsl_inprog. */ lp = NULL; fnd = 0; LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == NULL && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { fnd = 1; break; } } if (fnd) break; } } if (fnd) break; } if (lp != NULL) { ret = nfscl_updatelock(lp, &nlop, NULL, 0); if (ret) *dorpcp = 1; /* * Serial modifications on the lock owner for multiple * threads for the same process using a read/write lock. */ lp->nfsl_inprog = p; nfscl_lockexcl(&lp->nfsl_rwlock, NFSCLSTATEMUTEXPTR); *lpp = lp; } NFSUNLOCKCLSTATE(); if (nlop) FREE((caddr_t)nlop, M_NFSCLLOCK); if (other_lop) FREE((caddr_t)other_lop, M_NFSCLLOCK); return (0); } /* * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lp->nfsl_inprog == p && !NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { lp->nfsl_inprog = NULL; nfscl_lockunlock(&lp->nfsl_rwlock); } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Called to find out if any bytes within the byte range specified are * write locked by the calling process. Used to determine if flushing * is required before a LockU. * If in doubt, return 1, so the flush will occur. */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsclopen *op; struct nfsclclient *clp; struct nfscllock *lop; struct nfscldeleg *dp; struct nfsnode *np; u_int64_t off, end; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error = 0; np = VTONFS(vp); switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: /* * Caller is responsible for adding any necessary offset * when SEEK_CUR is used. */ off = fl->l_start; break; case SEEK_END: off = np->n_size + fl->l_start; break; default: return (1); } if (fl->l_len != 0) { end = off + fl->l_len; if (end < off) return (1); } else { end = NFS64BITSSET; } error = nfscl_getcl(vnode_mount(vp), cred, p, 1, &clp); if (error) return (1); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* * First check the delegation locks. */ dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } /* * Now, check state against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == np->n_fhp->nfh_len && !NFSBCMP(op->nfso_fh, np->n_fhp->nfh_fh, op->nfso_fhlen)) { LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) break; } if (lp != NULL) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= end) break; if (lop->nfslo_end <= off) continue; if (lop->nfslo_type == F_WRLCK) { nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (1); } } } } } } nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); return (0); } /* * Release a byte range lock owner structure. */ APPLESTATIC void nfscl_lockrelease(struct nfscllockowner *lp, int error, int candelete) { struct nfsclclient *clp; if (lp == NULL) return; NFSLOCKCLSTATE(); clp = lp->nfsl_open->nfso_own->nfsow_clp; if (error != 0 && candelete && (lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED) == 0) nfscl_freelockowner(lp, 0); else nfscl_lockunlock(&lp->nfsl_rwlock); nfscl_clrelease(clp); NFSUNLOCKCLSTATE(); } /* * Free up an open structure and any associated byte range lock structures. */ APPLESTATIC void nfscl_freeopen(struct nfsclopen *op, int local) { LIST_REMOVE(op, nfso_list); nfscl_freealllocks(&op->nfso_lock, local); FREE((caddr_t)op, M_NFSCLOPEN); if (local) nfsstatsv1.cllocalopens--; else nfsstatsv1.clopens--; } /* * Free up all lock owners and associated locks. */ static void nfscl_freealllocks(struct nfscllockownerhead *lhp, int local) { struct nfscllockowner *lp, *nlp; LIST_FOREACH_SAFE(lp, lhp, nfsl_list, nlp) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, local); } } /* * Called for an Open when NFSERR_EXPIRED is received from the server. * If there are no byte range locks nor a Share Deny lost, try to do a * fresh Open. Otherwise, free the open. */ static int nfscl_expireopen(struct nfsclclient *clp, struct nfsclopen *op, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *lp; struct nfscldeleg *dp; int mustdelete = 0, error; /* * Look for any byte range lock(s). */ LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { mustdelete = 1; break; } } /* * If no byte range lock(s) nor a Share deny, try to re-open. */ if (!mustdelete && (op->nfso_mode & NFSLCK_DENYBITS) == 0) { newnfs_copycred(&op->nfso_cred, cred); dp = NULL; error = nfsrpc_reopen(nmp, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, &dp, cred, p); if (error) { mustdelete = 1; if (dp != NULL) { FREE((caddr_t)dp, M_NFSCLDELEG); dp = NULL; } } if (dp != NULL) nfscl_deleg(nmp->nm_mountp, clp, op->nfso_fh, op->nfso_fhlen, cred, p, &dp); } /* * If a byte range lock or Share deny or couldn't re-open, free it. */ if (mustdelete) nfscl_freeopen(op, 0); return (mustdelete); } /* * Free up an open owner structure. */ static void nfscl_freeopenowner(struct nfsclowner *owp, int local) { LIST_REMOVE(owp, nfsow_list); FREE((caddr_t)owp, M_NFSCLOWNER); if (local) nfsstatsv1.cllocalopenowners--; else nfsstatsv1.clopenowners--; } /* * Free up a byte range lock owner structure. */ APPLESTATIC void nfscl_freelockowner(struct nfscllockowner *lp, int local) { struct nfscllock *lop, *nlop; LIST_REMOVE(lp, nfsl_list); LIST_FOREACH_SAFE(lop, &lp->nfsl_lock, nfslo_list, nlop) { nfscl_freelock(lop, local); } FREE((caddr_t)lp, M_NFSCLLOCKOWNER); if (local) nfsstatsv1.cllocallockowners--; else nfsstatsv1.cllockowners--; } /* * Free up a byte range lock structure. */ APPLESTATIC void nfscl_freelock(struct nfscllock *lop, int local) { LIST_REMOVE(lop, nfslo_list); FREE((caddr_t)lop, M_NFSCLLOCK); if (local) nfsstatsv1.cllocallocks--; else nfsstatsv1.cllocks--; } /* * Clean out the state related to a delegation. */ static void nfscl_cleandeleg(struct nfscldeleg *dp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscleandel"); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } nfscl_freealllocks(&dp->nfsdl_lock, 1); } /* * Free a delegation. */ static void nfscl_freedeleg(struct nfscldeleghead *hdp, struct nfscldeleg *dp) { TAILQ_REMOVE(hdp, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); FREE((caddr_t)dp, M_NFSCLDELEG); nfsstatsv1.cldelegates--; nfscl_delegcnt--; } /* * Free up all state related to this client structure. */ static void nfscl_cleanclient(struct nfsclclient *clp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) nfscl_freedevinfo(dip); /* Now, all the OpenOwners, etc. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { nfscl_freeopen(op, 0); } nfscl_freeopenowner(owp, 0); } } /* * Called when an NFSERR_EXPIRED is received from the server. */ static void nfscl_expireclient(struct nfsclclient *clp, struct nfsmount *nmp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp, *towp; struct nfsclopen *op, *nop, *top; struct nfscldeleg *dp, *ndp; int ret, printed = 0; /* * First, merge locally issued Opens into the list for the server. */ dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); owp = LIST_FIRST(&dp->nfsdl_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { if (LIST_NEXT(op, nfso_list) != NULL) panic("nfsclexp"); LIST_FOREACH(towp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(towp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) break; } if (towp != NULL) { /* Merge opens in */ LIST_FOREACH(top, &towp->nfsow_open, nfso_list) { if (top->nfso_fhlen == op->nfso_fhlen && !NFSBCMP(top->nfso_fh, op->nfso_fh, op->nfso_fhlen)) { top->nfso_mode |= op->nfso_mode; top->nfso_opencnt += op->nfso_opencnt; break; } } if (top == NULL) { /* Just add the open to the owner list */ LIST_REMOVE(op, nfso_list); op->nfso_own = towp; LIST_INSERT_HEAD(&towp->nfsow_open, op, nfso_list); nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } else { /* Just add the openowner to the client list */ LIST_REMOVE(owp, nfsow_list); owp->nfsow_clp = clp; LIST_INSERT_HEAD(&clp->nfsc_owner, owp, nfsow_list); nfsstatsv1.cllocalopenowners--; nfsstatsv1.clopenowners++; nfsstatsv1.cllocalopens--; nfsstatsv1.clopens++; } } owp = nowp; } if (!printed && !LIST_EMPTY(&dp->nfsdl_lock)) { printed = 1; printf("nfsv4 expired locks lost\n"); } nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); dp = ndp; } if (!TAILQ_EMPTY(&clp->nfsc_deleg)) panic("nfsclexp"); /* * Now, try and reopen against the server. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { owp->nfsow_seqid = 0; LIST_FOREACH_SAFE(op, &owp->nfsow_open, nfso_list, nop) { ret = nfscl_expireopen(clp, op, nmp, cred, p); if (ret && !printed) { printed = 1; printf("nfsv4 expired locks lost\n"); } } if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); } } /* * This function must be called after the process represented by "own" has * exited. Must be called with CLSTATE lock held. */ static void nfscl_cleanup_common(struct nfsclclient *clp, u_int8_t *own) { struct nfsclowner *owp, *nowp; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; /* First, get rid of local locks on delegations. */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (!NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { if ((lp->nfsl_rwlock.nfslock_lock & NFSV4LOCK_WANTED)) panic("nfscllckw"); nfscl_freelockowner(lp, 1); } } } owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); if (!NFSBCMP(owp->nfsow_owner, own, NFSV4CL_LOCKNAMELEN)) { /* * If there are children that haven't closed the * file descriptors yet, the opens will still be * here. For that case, let the renew thread clear * out the OpenOwner later. */ if (LIST_EMPTY(&owp->nfsow_open)) nfscl_freeopenowner(owp, 0); else owp->nfsow_defunct = 1; } owp = nowp; } } /* * Find open/lock owners for processes that have exited. */ static void nfscl_cleanupkext(struct nfsclclient *clp, struct nfscllockownerfhhead *lhp) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleg *dp; NFSPROCLISTLOCK(); NFSLOCKCLSTATE(); LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { LIST_FOREACH_SAFE(lp, &op->nfso_lock, nfsl_list, nlp) { if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_emptylockowner(lp, lhp); } } if (nfscl_procdoesntexist(owp->nfsow_owner)) nfscl_cleanup_common(clp, owp->nfsow_owner); } /* * For the single open_owner case, these lock owners need to be * checked to see if they still exist separately. * This is because nfscl_procdoesntexist() never returns true for * the single open_owner so that the above doesn't ever call * nfscl_cleanup_common(). */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH_SAFE(lp, &dp->nfsdl_lock, nfsl_list, nlp) { if (nfscl_procdoesntexist(lp->nfsl_owner)) nfscl_cleanup_common(clp, lp->nfsl_owner); } } NFSUNLOCKCLSTATE(); NFSPROCLISTUNLOCK(); } /* * Take the empty lock owner and move it to the local lhp list if the * associated process no longer exists. */ static void nfscl_emptylockowner(struct nfscllockowner *lp, struct nfscllockownerfhhead *lhp) { struct nfscllockownerfh *lfhp, *mylfhp; struct nfscllockowner *nlp; int fnd_it; /* If not a Posix lock owner, just return. */ if ((lp->nfsl_lockflags & F_POSIX) == 0) return; fnd_it = 0; mylfhp = NULL; /* * First, search to see if this lock owner is already in the list. * If it is, then the associated process no longer exists. */ SLIST_FOREACH(lfhp, lhp, nfslfh_list) { if (lfhp->nfslfh_len == lp->nfsl_open->nfso_fhlen && !NFSBCMP(lfhp->nfslfh_fh, lp->nfsl_open->nfso_fh, lfhp->nfslfh_len)) mylfhp = lfhp; LIST_FOREACH(nlp, &lfhp->nfslfh_lock, nfsl_list) if (!NFSBCMP(nlp->nfsl_owner, lp->nfsl_owner, NFSV4CL_LOCKNAMELEN)) fnd_it = 1; } /* If not found, check if process still exists. */ if (fnd_it == 0 && nfscl_procdoesntexist(lp->nfsl_owner) == 0) return; /* Move the lock owner over to the local list. */ if (mylfhp == NULL) { mylfhp = malloc(sizeof(struct nfscllockownerfh), M_TEMP, M_NOWAIT); if (mylfhp == NULL) return; mylfhp->nfslfh_len = lp->nfsl_open->nfso_fhlen; NFSBCOPY(lp->nfsl_open->nfso_fh, mylfhp->nfslfh_fh, mylfhp->nfslfh_len); LIST_INIT(&mylfhp->nfslfh_lock); SLIST_INSERT_HEAD(lhp, mylfhp, nfslfh_list); } LIST_REMOVE(lp, nfsl_list); LIST_INSERT_HEAD(&mylfhp->nfslfh_lock, lp, nfsl_list); } static int fake_global; /* Used to force visibility of MNTK_UNMOUNTF */ /* * Called from nfs umount to free up the clientid. */ APPLESTATIC void nfscl_umount(struct nfsmount *nmp, NFSPROC_T *p) { struct nfsclclient *clp; struct ucred *cred; int igotlock; /* * For the case that matters, this is the thread that set * MNTK_UNMOUNTF, so it will see it set. The code that follows is * done to ensure that any thread executing nfscl_getcl() after * this time, will see MNTK_UNMOUNTF set. nfscl_getcl() uses the * mutex for NFSLOCKCLSTATE(), so it is "m" for the following * explanation, courtesy of Alan Cox. * What follows is a snippet from Alan Cox's email at: * http://docs.FreeBSD.org/cgi/ * mid.cgi?BANLkTikR3d65zPHo9==08ZfJ2vmqZucEvw * * 1. Set MNTK_UNMOUNTF * 2. Acquire a standard FreeBSD mutex "m". * 3. Update some data structures. * 4. Release mutex "m". * * Then, other threads that acquire "m" after step 4 has occurred will * see MNTK_UNMOUNTF as set. But, other threads that beat thread X to * step 2 may or may not see MNTK_UNMOUNTF as set. */ NFSLOCKCLSTATE(); if ((nmp->nm_mountp->mnt_kern_flag & MNTK_UNMOUNTF) != 0) { fake_global++; NFSUNLOCKCLSTATE(); NFSLOCKCLSTATE(); } clp = nmp->nm_clp; if (clp != NULL) { if ((clp->nfsc_flags & NFSCLFLAGS_INITED) == 0) panic("nfscl umount"); /* * First, handshake with the nfscl renew thread, to terminate * it. */ clp->nfsc_flags |= NFSCLFLAGS_UMOUNT; while (clp->nfsc_flags & NFSCLFLAGS_HASTHREAD) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfsclumnt", hz); /* * Now, get the exclusive lock on the client state, so * that no uses of the state are still in progress. */ do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); /* * Free up all the state. It will expire on the server, but * maybe we should do a SetClientId/SetClientIdConfirm so * the server throws it away? */ LIST_REMOVE(clp, nfsc_list); nfscl_delegreturnall(clp, p); cred = newnfs_getcred(); if (NFSHASNFSV4N(nmp)) { (void)nfsrpc_destroysession(nmp, clp, cred, p); (void)nfsrpc_destroyclient(nmp, clp, cred, p); } else (void)nfsrpc_setclient(nmp, clp, 0, cred, p); nfscl_cleanclient(clp); nmp->nm_clp = NULL; NFSFREECRED(cred); free(clp, M_NFSCLCLIENT); } else NFSUNLOCKCLSTATE(); } /* * This function is called when a server replies with NFSERR_STALECLIENTID * NFSERR_STALESTATEID or NFSERR_BADSESSION. It traverses the clientid lists, * doing Opens and Locks with reclaim. If these fail, it deletes the * corresponding state. */ static void nfscl_recover(struct nfsclclient *clp, struct ucred *cred, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op, *nop; struct nfscllockowner *lp, *nlp; struct nfscllock *lop, *nlop; struct nfscldeleg *dp, *ndp, *tdp; struct nfsmount *nmp; struct ucred *tcred; struct nfsclopenhead extra_open; struct nfscldeleghead extra_deleg; struct nfsreq *rep; u_int64_t len; u_int32_t delegtype = NFSV4OPEN_DELEGATEWRITE, mode; int i, igotlock = 0, error, trycnt, firstlock; struct nfscllayout *lyp, *nlyp; /* * First, lock the client structure, so everyone else will * block when trying to use state. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl recover"); /* * For now, just get rid of all layouts. There may be a need * to do LayoutCommit Ops with reclaim == true later. */ TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) nfscl_freelayout(lyp); TAILQ_INIT(&clp->nfsc_layout); for (i = 0; i < NFSCLLAYOUTHASHSIZE; i++) LIST_INIT(&clp->nfsc_layouthash[i]); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 1, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~(NFSCLFLAGS_RECOVER | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return; } clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; /* * Mark requests already queued on the server, so that they don't * initiate another recovery cycle. Any requests already in the * queue that handle state information will have the old stale * clientid/stateid and will get a NFSERR_STALESTATEID, * NFSERR_STALECLIENTID or NFSERR_BADSESSION reply from the server. * This will be translated to NFSERR_STALEDONTRECOVER when * R_DONTRECOVER is set. */ NFSLOCKREQ(); TAILQ_FOREACH(rep, &nfsd_reqq, r_chain) { if (rep->r_nmp == nmp) rep->r_flags |= R_DONTRECOVER; } NFSUNLOCKREQ(); /* * Now, mark all delegations "need reclaim". */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) dp->nfsdl_flags |= NFSCLDL_NEEDRECLAIM; TAILQ_INIT(&extra_deleg); LIST_INIT(&extra_open); /* * Now traverse the state lists, doing Open and Lock Reclaims. */ tcred = newnfs_getcred(); owp = LIST_FIRST(&clp->nfsc_owner); while (owp != NULL) { nowp = LIST_NEXT(owp, nfsow_list); owp->nfsow_seqid = 0; op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { nop = LIST_NEXT(op, nfso_list); if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { /* Search for a delegation to reclaim with the open */ TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) break; } ndp = dp; if (dp == NULL) delegtype = NFSV4OPEN_DELEGATENONE; newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryopen(nmp, NULL, op->nfso_fh, op->nfso_fhlen, op->nfso_fh, op->nfso_fhlen, op->nfso_mode, op, NULL, 0, &ndp, 1, delegtype, tcred, p); if (!error) { /* Handle any replied delegation */ if (ndp != NULL && ((ndp->nfsdl_flags & NFSCLDL_WRITE) || NFSMNT_RDONLY(nmp->nm_mountp))) { if ((ndp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if (!(dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) continue; if ((op->nfso_mode & mode) == mode && op->nfso_fhlen == dp->nfsdl_fhlen && !NFSBCMP(op->nfso_fh, dp->nfsdl_fh, op->nfso_fhlen)) { dp->nfsdl_stateid = ndp->nfsdl_stateid; dp->nfsdl_sizelimit = ndp->nfsdl_sizelimit; dp->nfsdl_ace = ndp->nfsdl_ace; dp->nfsdl_change = ndp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((ndp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; FREE((caddr_t)ndp, M_NFSCLDELEG); ndp = NULL; break; } } } if (ndp != NULL) TAILQ_INSERT_HEAD(&extra_deleg, ndp, nfsdl_list); /* and reclaim all byte range locks */ lp = LIST_FIRST(&op->nfso_lock); while (lp != NULL) { nlp = LIST_NEXT(lp, nfsl_list); lp->nfsl_seqid = 0; firstlock = 1; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { nlop = LIST_NEXT(lop, nfslo_list); if (lop->nfslo_end == NFS64BITSSET) len = NFS64BITSSET; else len = lop->nfslo_end - lop->nfslo_first; error = nfscl_trylock(nmp, NULL, op->nfso_fh, op->nfso_fhlen, lp, firstlock, 1, lop->nfslo_first, len, lop->nfslo_type, tcred, p); if (error != 0) nfscl_freelock(lop, 0); else firstlock = 0; lop = nlop; } /* If no locks, but a lockowner, just delete it. */ if (LIST_EMPTY(&lp->nfsl_lock)) nfscl_freelockowner(lp, 0); lp = nlp; } } } if (error != 0 && error != NFSERR_BADSESSION) nfscl_freeopen(op, 0); op = nop; } owp = nowp; } /* * Now, try and get any delegations not yet reclaimed by cobbling * to-gether an appropriate open. */ nowp = NULL; dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_NEEDRECLAIM)) { if (nowp == NULL) { MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); /* * Name must be as long an largest possible * NFSV4CL_LOCKNAMELEN. 12 for now. */ NFSBCOPY("RECLAIMDELEG", nowp->nfsow_owner, NFSV4CL_LOCKNAMELEN); LIST_INIT(&nowp->nfsow_open); nowp->nfsow_clp = clp; nowp->nfsow_seqid = 0; nowp->nfsow_defunct = 0; nfscl_lockinit(&nowp->nfsow_rwlock); } nop = NULL; if (error != NFSERR_NOGRACE && error != NFSERR_BADSESSION) { MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + dp->nfsdl_fhlen - 1, M_NFSCLOPEN, M_WAITOK); nop->nfso_own = nowp; if ((dp->nfsdl_flags & NFSCLDL_WRITE)) { nop->nfso_mode = NFSV4OPEN_ACCESSWRITE; delegtype = NFSV4OPEN_DELEGATEWRITE; } else { nop->nfso_mode = NFSV4OPEN_ACCESSREAD; delegtype = NFSV4OPEN_DELEGATEREAD; } nop->nfso_opencnt = 0; nop->nfso_posixlock = 1; nop->nfso_fhlen = dp->nfsdl_fhlen; NFSBCOPY(dp->nfsdl_fh, nop->nfso_fh, dp->nfsdl_fhlen); LIST_INIT(&nop->nfso_lock); nop->nfso_stateid.seqid = 0; nop->nfso_stateid.other[0] = 0; nop->nfso_stateid.other[1] = 0; nop->nfso_stateid.other[2] = 0; newnfs_copycred(&dp->nfsdl_cred, tcred); newnfs_copyincred(tcred, &nop->nfso_cred); tdp = NULL; error = nfscl_tryopen(nmp, NULL, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_fh, nop->nfso_fhlen, nop->nfso_mode, nop, NULL, 0, &tdp, 1, delegtype, tcred, p); if (tdp != NULL) { if ((tdp->nfsdl_flags & NFSCLDL_WRITE)) mode = NFSV4OPEN_ACCESSWRITE; else mode = NFSV4OPEN_ACCESSREAD; if ((nop->nfso_mode & mode) == mode && nop->nfso_fhlen == tdp->nfsdl_fhlen && !NFSBCMP(nop->nfso_fh, tdp->nfsdl_fh, nop->nfso_fhlen)) { dp->nfsdl_stateid = tdp->nfsdl_stateid; dp->nfsdl_sizelimit = tdp->nfsdl_sizelimit; dp->nfsdl_ace = tdp->nfsdl_ace; dp->nfsdl_change = tdp->nfsdl_change; dp->nfsdl_flags &= ~NFSCLDL_NEEDRECLAIM; if ((tdp->nfsdl_flags & NFSCLDL_RECALL)) dp->nfsdl_flags |= NFSCLDL_RECALL; FREE((caddr_t)tdp, M_NFSCLDELEG); } else { TAILQ_INSERT_HEAD(&extra_deleg, tdp, nfsdl_list); } } } if (error) { if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); /* * Couldn't reclaim it, so throw the state * away. Ouch!! */ nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } else { LIST_INSERT_HEAD(&extra_open, nop, nfso_list); } } dp = ndp; } /* * Now, get rid of extra Opens and Delegations. */ LIST_FOREACH_SAFE(op, &extra_open, nfso_list, nop) { do { newnfs_copycred(&op->nfso_cred, tcred); error = nfscl_tryclose(op, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexcls"); } while (error == NFSERR_GRACE); LIST_REMOVE(op, nfso_list); FREE((caddr_t)op, M_NFSCLOPEN); } if (nowp != NULL) FREE((caddr_t)nowp, M_NFSCLOWNER); TAILQ_FOREACH_SAFE(dp, &extra_deleg, nfsdl_list, ndp) { do { newnfs_copycred(&dp->nfsdl_cred, tcred); error = nfscl_trydelegreturn(dp, tcred, nmp, p); if (error == NFSERR_GRACE) (void) nfs_catnap(PZERO, error, "nfsexdlg"); } while (error == NFSERR_GRACE); TAILQ_REMOVE(&extra_deleg, dp, nfsdl_list); FREE((caddr_t)dp, M_NFSCLDELEG); } /* For NFSv4.1 or later, do a RECLAIM_COMPLETE. */ if (NFSHASNFSV4N(nmp)) (void)nfsrpc_reclaimcomplete(nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECVRINPROG; wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(tcred); } /* * This function is called when a server replies with NFSERR_EXPIRED. * It deletes all state for the client and does a fresh SetClientId/confirm. * XXX Someday it should post a signal to the process(es) that hold the * state, so they know that lock state has been lost. */ APPLESTATIC int nfscl_hasexpired(struct nfsclclient *clp, u_int32_t clidrev, NFSPROC_T *p) { struct nfsmount *nmp; struct ucred *cred; int igotlock = 0, error, trycnt; /* * If the clientid has gone away or a new SetClientid has already * been done, just return ok. */ if (clp == NULL || clidrev != clp->nfsc_clientidrev) return (0); /* * First, lock the client structure, so everyone else will * block when trying to use state. Also, use NFSCLFLAGS_EXPIREIT so * that only one thread does the work. */ NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_EXPIREIT; do { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, NULL, NFSCLSTATEMUTEXPTR, NULL); } while (!igotlock && (clp->nfsc_flags & NFSCLFLAGS_EXPIREIT)); if ((clp->nfsc_flags & NFSCLFLAGS_EXPIREIT) == 0) { if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (0); } clp->nfsc_flags |= NFSCLFLAGS_RECVRINPROG; NFSUNLOCKCLSTATE(); nmp = clp->nfsc_nmp; if (nmp == NULL) panic("nfscl expired"); cred = newnfs_getcred(); trycnt = 5; do { error = nfsrpc_setclient(nmp, clp, 0, cred, p); } while ((error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION || error == NFSERR_STALEDONTRECOVER) && --trycnt > 0); if (error) { NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } else { /* * Expire the state for the client. */ nfscl_expireclient(clp, nmp, cred, p); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASCLIENTID; clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; } clp->nfsc_flags &= ~(NFSCLFLAGS_EXPIREIT | NFSCLFLAGS_RECVRINPROG); wakeup(&clp->nfsc_flags); nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); NFSFREECRED(cred); return (error); } /* * This function inserts a lock in the list after insert_lop. */ static void nfscl_insertlock(struct nfscllockowner *lp, struct nfscllock *new_lop, struct nfscllock *insert_lop, int local) { if ((struct nfscllockowner *)insert_lop == lp) LIST_INSERT_HEAD(&lp->nfsl_lock, new_lop, nfslo_list); else LIST_INSERT_AFTER(insert_lop, new_lop, nfslo_list); if (local) nfsstatsv1.cllocallocks++; else nfsstatsv1.cllocks++; } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSCLLOCK_READ or NFSCLLOCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. * Returns 1 if the locks were modified, 0 otherwise. */ static int nfscl_updatelock(struct nfscllockowner *lp, struct nfscllock **new_lopp, struct nfscllock **other_lopp, int local) { struct nfscllock *new_lop = *new_lopp; struct nfscllock *lop, *tlop, *ilop; struct nfscllock *other_lop; int unlock = 0, modified = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->nfslo_type == F_UNLCK) unlock = 1; ilop = (struct nfscllock *)lp; lop = LIST_FIRST(&lp->nfsl_lock); while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->nfslo_end >= new_lop->nfslo_first) { if (new_lop->nfslo_end < lop->nfslo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->nfslo_type == lop->nfslo_type || (new_lop->nfslo_first <= lop->nfslo_first && new_lop->nfslo_end >= lop->nfslo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (new_lop->nfslo_type != lop->nfslo_type || new_lop->nfslo_first != lop->nfslo_first || new_lop->nfslo_end != lop->nfslo_end) modified = 1; if (lop->nfslo_first < new_lop->nfslo_first) new_lop->nfslo_first = lop->nfslo_first; if (lop->nfslo_end > new_lop->nfslo_end) new_lop->nfslo_end = lop->nfslo_end; tlop = lop; lop = LIST_NEXT(lop, nfslo_list); nfscl_freelock(tlop, local); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->nfslo_first <= lop->nfslo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ if (lop->nfslo_first != new_lop->nfslo_end) { lop->nfslo_first = new_lop->nfslo_end; modified = 1; } break; } if (new_lop->nfslo_end >= lop->nfslo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ if (lop->nfslo_end != new_lop->nfslo_first) { lop->nfslo_end = new_lop->nfslo_first; modified = 1; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->nfslo_type any longer. */ tmp = new_lop->nfslo_first; if (unlock) { other_lop = new_lop; *new_lopp = NULL; } else { other_lop = *other_lopp; *other_lopp = NULL; } other_lop->nfslo_first = new_lop->nfslo_end; other_lop->nfslo_end = lop->nfslo_end; other_lop->nfslo_type = lop->nfslo_type; lop->nfslo_end = tmp; nfscl_insertlock(lp, other_lop, lop, local); ilop = lop; modified = 1; break; } ilop = lop; lop = LIST_NEXT(lop, nfslo_list); if (lop == NULL) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfscl_insertlock(lp, new_lop, ilop, local); *new_lopp = NULL; modified = 1; } return (modified); } /* * This function must be run as a kernel thread. * It does Renew Ops and recovery, when required. */ APPLESTATIC void nfscl_renewthread(struct nfsclclient *clp, NFSPROC_T *p) { struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscllockowner *lp, *nlp; struct nfscldeleghead dh; struct nfscldeleg *dp, *ndp; struct ucred *cred; u_int32_t clidrev; int error, cbpathdown, islept, igotlock, ret, clearok; uint32_t recover_done_time = 0; time_t mytime; static time_t prevsec = 0; struct nfscllockownerfh *lfhp, *nlfhp; struct nfscllockownerfhhead lfh; struct nfscllayout *lyp, *nlyp; struct nfscldevinfo *dip, *ndip; struct nfscllayouthead rlh; struct nfsclrecalllayout *recallp; struct nfsclds *dsp; cred = newnfs_getcred(); NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); for(;;) { newnfs_setroot(cred); cbpathdown = 0; if (clp->nfsc_flags & NFSCLFLAGS_RECOVER) { /* * Only allow one recover within 1/2 of the lease * duration (nfsc_renew). */ if (recover_done_time < NFSD_MONOSEC) { recover_done_time = NFSD_MONOSEC + clp->nfsc_renew; NFSCL_DEBUG(1, "Doing recovery..\n"); nfscl_recover(clp, cred, p); } else { NFSCL_DEBUG(1, "Clear Recovery dt=%u ms=%jd\n", recover_done_time, (intmax_t)NFSD_MONOSEC); NFSLOCKCLSTATE(); clp->nfsc_flags &= ~NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } } if (clp->nfsc_expire <= NFSD_MONOSEC && (clp->nfsc_flags & NFSCLFLAGS_HASCLIENTID)) { clp->nfsc_expire = NFSD_MONOSEC + clp->nfsc_renew; clidrev = clp->nfsc_clientidrev; error = nfsrpc_renew(clp, NULL, cred, p); if (error == NFSERR_CBPATHDOWN) cbpathdown = 1; else if (error == NFSERR_STALECLIENTID || error == NFSERR_BADSESSION) { NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); } else if (error == NFSERR_EXPIRED) (void) nfscl_hasexpired(clp, clidrev, p); } checkdsrenew: if (NFSHASNFSV4N(clp->nfsc_nmp)) { /* Do renews for any DS sessions. */ NFSLOCKMNT(clp->nfsc_nmp); /* Skip first entry, since the MDS is handled above. */ dsp = TAILQ_FIRST(&clp->nfsc_nmp->nm_sess); if (dsp != NULL) dsp = TAILQ_NEXT(dsp, nfsclds_list); while (dsp != NULL) { if (dsp->nfsclds_expire <= NFSD_MONOSEC && dsp->nfsclds_sess.nfsess_defunct == 0) { dsp->nfsclds_expire = NFSD_MONOSEC + clp->nfsc_renew; NFSUNLOCKMNT(clp->nfsc_nmp); (void)nfsrpc_renew(clp, dsp, cred, p); goto checkdsrenew; } dsp = TAILQ_NEXT(dsp, nfsclds_list); } NFSUNLOCKMNT(clp->nfsc_nmp); } TAILQ_INIT(&dh); NFSLOCKCLSTATE(); if (cbpathdown) /* It's a Total Recall! */ nfscl_totalrecall(clp); /* * Now, handle defunct owners. */ LIST_FOREACH_SAFE(owp, &clp->nfsc_owner, nfsow_list, nowp) { if (LIST_EMPTY(&owp->nfsow_open)) { if (owp->nfsow_defunct != 0) nfscl_freeopenowner(owp, 0); } } /* * Do the recall on any delegations. To avoid trouble, always * come back up here after having slept. */ igotlock = 0; tryagain: dp = TAILQ_FIRST(&clp->nfsc_deleg); while (dp != NULL) { ndp = TAILQ_NEXT(dp, nfsdl_list); if ((dp->nfsdl_flags & NFSCLDL_RECALL)) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); goto tryagain; } while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) goto tryagain; } NFSUNLOCKCLSTATE(); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_recalldeleg(clp, clp->nfsc_nmp, dp, NULL, cred, p, 1); if (!ret) { nfscl_cleandeleg(dp); TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } NFSLOCKCLSTATE(); } dp = ndp; } /* * Clear out old delegations, if we are above the high water * mark. Only clear out ones with no state related to them. * The tailq list is in LRU order. */ dp = TAILQ_LAST(&clp->nfsc_deleg, nfscldeleghead); while (nfscl_delegcnt > nfscl_deleghighwater && dp != NULL) { ndp = TAILQ_PREV(dp, nfscldeleghead, nfsdl_list); if (dp->nfsdl_rwlock.nfslock_usecnt == 0 && dp->nfsdl_rwlock.nfslock_lock == 0 && dp->nfsdl_timestamp < NFSD_MONOSEC && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_ZAPPED | NFSCLDL_NEEDRECLAIM | NFSCLDL_DELEGRET)) == 0) { clearok = 1; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { clearok = 0; break; } } if (clearok) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { clearok = 0; break; } } } if (clearok) { TAILQ_REMOVE(&clp->nfsc_deleg, dp, nfsdl_list); LIST_REMOVE(dp, nfsdl_hash); TAILQ_INSERT_HEAD(&dh, dp, nfsdl_list); nfscl_delegcnt--; nfsstatsv1.cldelegates--; } } dp = ndp; } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); /* * Do the recall on any layouts. To avoid trouble, always * come back up here after having slept. */ TAILQ_INIT(&rlh); tryagain2: TAILQ_FOREACH_SAFE(lyp, &clp->nfsc_layout, nfsly_list, nlyp) { if ((lyp->nfsly_flags & NFSLY_RECALL) != 0) { /* * Wait for outstanding I/O ops to be done. */ if (lyp->nfsly_lock.nfslock_usecnt > 0 || (lyp->nfsly_lock.nfslock_lock & NFSV4LOCK_LOCK) != 0) { lyp->nfsly_lock.nfslock_lock |= NFSV4LOCK_WANTED; (void)nfsmsleep(&lyp->nfsly_lock, NFSCLSTATEMUTEXPTR, PZERO, "nfslyp", NULL); goto tryagain2; } /* Move the layout to the recall list. */ TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); LIST_REMOVE(lyp, nfsly_hash); TAILQ_INSERT_HEAD(&rlh, lyp, nfsly_list); /* Handle any layout commits. */ if (!NFSHASNOLAYOUTCOMMIT(clp->nfsc_nmp) && (lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(3, "do layoutcommit\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, cred, p); NFSLOCKCLSTATE(); goto tryagain2; } } } /* Now, look for stale layouts. */ lyp = TAILQ_LAST(&clp->nfsc_layout, nfscllayouthead); while (lyp != NULL) { nlyp = TAILQ_PREV(lyp, nfscllayouthead, nfsly_list); if (lyp->nfsly_timestamp < NFSD_MONOSEC && (lyp->nfsly_flags & NFSLY_RECALL) == 0 && lyp->nfsly_lock.nfslock_usecnt == 0 && lyp->nfsly_lock.nfslock_lock == 0) { NFSCL_DEBUG(4, "ret stale lay=%d\n", nfscl_layoutcnt); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_NOWAIT); if (recallp == NULL) break; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, NFSLAYOUTIOMODE_ANY, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); } lyp = nlyp; } /* * Free up any unreferenced device info structures. */ LIST_FOREACH_SAFE(dip, &clp->nfsc_devinfo, nfsdi_list, ndip) { if (dip->nfsdi_layoutrefs == 0 && dip->nfsdi_refcnt == 0) { NFSCL_DEBUG(4, "freeing devinfo\n"); LIST_REMOVE(dip, nfsdi_list); nfscl_freedevinfo(dip); } } NFSUNLOCKCLSTATE(); /* Do layout return(s), as required. */ TAILQ_FOREACH_SAFE(lyp, &rlh, nfsly_list, nlyp) { TAILQ_REMOVE(&rlh, lyp, nfsly_list); NFSCL_DEBUG(4, "ret layout\n"); nfscl_layoutreturn(clp->nfsc_nmp, lyp, cred, p); nfscl_freelayout(lyp); } /* * Delegreturn any delegations cleaned out or recalled. */ TAILQ_FOREACH_SAFE(dp, &dh, nfsdl_list, ndp) { newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); TAILQ_REMOVE(&dh, dp, nfsdl_list); FREE((caddr_t)dp, M_NFSCLDELEG); } SLIST_INIT(&lfh); /* * Call nfscl_cleanupkext() once per second to check for * open/lock owners where the process has exited. */ mytime = NFSD_MONOSEC; if (prevsec != mytime) { prevsec = mytime; nfscl_cleanupkext(clp, &lfh); } /* * Do a ReleaseLockOwner for all lock owners where the * associated process no longer exists, as found by * nfscl_cleanupkext(). */ newnfs_setroot(cred); SLIST_FOREACH_SAFE(lfhp, &lfh, nfslfh_list, nlfhp) { LIST_FOREACH_SAFE(lp, &lfhp->nfslfh_lock, nfsl_list, nlp) { (void)nfsrpc_rellockown(clp->nfsc_nmp, lp, lfhp->nfslfh_fh, lfhp->nfslfh_len, cred, p); nfscl_freelockowner(lp, 0); } free(lfhp, M_TEMP); } SLIST_INIT(&lfh); NFSLOCKCLSTATE(); if ((clp->nfsc_flags & NFSCLFLAGS_RECOVER) == 0) (void)mtx_sleep(clp, NFSCLSTATEMUTEXPTR, PWAIT, "nfscl", hz); if (clp->nfsc_flags & NFSCLFLAGS_UMOUNT) { clp->nfsc_flags &= ~NFSCLFLAGS_HASTHREAD; NFSUNLOCKCLSTATE(); NFSFREECRED(cred); wakeup((caddr_t)clp); return; } NFSUNLOCKCLSTATE(); } } /* * Initiate state recovery. Called when NFSERR_STALECLIENTID, * NFSERR_STALESTATEID or NFSERR_BADSESSION is received. */ APPLESTATIC void nfscl_initiate_recovery(struct nfsclclient *clp) { if (clp == NULL) return; NFSLOCKCLSTATE(); clp->nfsc_flags |= NFSCLFLAGS_RECOVER; NFSUNLOCKCLSTATE(); wakeup((caddr_t)clp); } /* * Dump out the state stuff for debugging. */ APPLESTATIC void nfscl_dumpstate(struct nfsmount *nmp, int openowner, int opens, int lockowner, int locks) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscllockowner *lp; struct nfscllock *lop; struct nfscldeleg *dp; clp = nmp->nm_clp; if (clp == NULL) { printf("nfscl dumpstate NULL clp\n"); return; } NFSLOCKCLSTATE(); TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (openowner && !LIST_EMPTY(&owp->nfsow_open)) printf("owner=0x%x 0x%x 0x%x 0x%x seqid=%d\n", owp->nfsow_owner[0], owp->nfsow_owner[1], owp->nfsow_owner[2], owp->nfsow_owner[3], owp->nfsow_seqid); LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (opens) printf("open st=0x%x 0x%x 0x%x cnt=%d fh12=0x%x\n", op->nfso_stateid.other[0], op->nfso_stateid.other[1], op->nfso_stateid.other[2], op->nfso_opencnt, op->nfso_fh[12]); LIST_FOREACH(lp, &op->nfso_lock, nfsl_list) { if (lockowner) printf("lckown=0x%x 0x%x 0x%x 0x%x seqid=%d st=0x%x 0x%x 0x%x\n", lp->nfsl_owner[0], lp->nfsl_owner[1], lp->nfsl_owner[2], lp->nfsl_owner[3], lp->nfsl_seqid, lp->nfsl_stateid.other[0], lp->nfsl_stateid.other[1], lp->nfsl_stateid.other[2]); LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (locks) #ifdef __FreeBSD__ printf("lck typ=%d fst=%ju end=%ju\n", lop->nfslo_type, (intmax_t)lop->nfslo_first, (intmax_t)lop->nfslo_end); #else printf("lck typ=%d fst=%qd end=%qd\n", lop->nfslo_type, lop->nfslo_first, lop->nfslo_end); #endif } } } } NFSUNLOCKCLSTATE(); } /* * Check for duplicate open owners and opens. * (Only used as a diagnostic aid.) */ APPLESTATIC void nfscl_dupopen(vnode_t vp, int dupopens) { struct nfsclclient *clp; struct nfsclowner *owp, *owp2; struct nfsclopen *op, *op2; struct nfsfh *nfhp; clp = VFSTONFS(vnode_mount(vp))->nm_clp; if (clp == NULL) { printf("nfscl dupopen NULL clp\n"); return; } nfhp = VTONFS(vp)->n_fhp; NFSLOCKCLSTATE(); /* * First, search for duplicate owners. * These should never happen! */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (owp != owp2 && !NFSBCMP(owp->nfsow_owner, owp2->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("DUP OWNER\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } /* * Now, search for duplicate stateids. * These shouldn't happen, either. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && (op->nfso_stateid.other[0] != 0 || op->nfso_stateid.other[1] != 0 || op->nfso_stateid.other[2] != 0) && op->nfso_stateid.other[0] == op2->nfso_stateid.other[0] && op->nfso_stateid.other[1] == op2->nfso_stateid.other[1] && op->nfso_stateid.other[2] == op2->nfso_stateid.other[2]) { NFSUNLOCKCLSTATE(); printf("DUP STATEID\n"); nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } /* * Now search for duplicate opens. * Duplicate opens for the same owner * should never occur. Other duplicates are * possible and are checked for if "dupopens" * is true. */ LIST_FOREACH(owp2, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op2, &owp2->nfsow_open, nfso_list) { if (nfhp->nfh_len == op2->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op2->nfso_fh, nfhp->nfh_len)) { LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op != op2 && nfhp->nfh_len == op->nfso_fhlen && !NFSBCMP(nfhp->nfh_fh, op->nfso_fh, nfhp->nfh_len) && (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN) || dupopens)) { if (!NFSBCMP(op->nfso_own->nfsow_owner, op2->nfso_own->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { NFSUNLOCKCLSTATE(); printf("BADDUP OPEN\n"); } else { NFSUNLOCKCLSTATE(); printf("DUP OPEN\n"); } nfscl_dumpstate(VFSTONFS(vnode_mount(vp)), 1, 1, 0, 0); return; } } } } } } NFSUNLOCKCLSTATE(); } /* * During close, find an open that needs to be dereferenced and * dereference it. If there are no more opens for this file, * log a message to that effect. * Opens aren't actually Close'd until VOP_INACTIVE() is performed * on the file's vnode. * This is the safe way, since it is difficult to identify * which open the close is for and I/O can be performed after the * close(2) system call when a file is mmap'd. * If it returns 0 for success, there will be a referenced * clp returned via clpp. */ APPLESTATIC int nfscl_getclose(vnode_t vp, struct nfsclclient **clpp) { struct nfsclclient *clp; struct nfsclowner *owp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; int error, notdecr; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; notdecr = 1; NFSLOCKCLSTATE(); /* * First, look for one under a delegation that was locally issued * and just decrement the opencnt for it. Since all my Opens against * the server are DENY_NONE, I don't see a problem with hanging * onto them. (It is much easier to use one of the extant Opens * that I already have on the server when a Delegation is recalled * than to do fresh Opens.) Someday, I might need to rethink this, but. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { /* * Since a delegation is for a file, there * should never be more than one open for * each openowner. */ if (LIST_NEXT(op, nfso_list) != NULL) panic("nfscdeleg opens"); if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; break; } } } } /* Now process the opens against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, decrement cnt if possible */ if (notdecr && op->nfso_opencnt > 0) { notdecr = 0; op->nfso_opencnt--; } /* * There are more opens, so just return. */ if (op->nfso_opencnt > 0) { NFSUNLOCKCLSTATE(); return (0); } } } } NFSUNLOCKCLSTATE(); if (notdecr) printf("nfscl: never fnd open\n"); return (0); } APPLESTATIC int nfscl_doclose(vnode_t vp, struct nfsclclient **clpp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfsclowner *owp, *nowp; struct nfsclopen *op; struct nfscldeleg *dp; struct nfsfh *nfhp; struct nfsclrecalllayout *recallp; int error; error = nfscl_getcl(vnode_mount(vp), NULL, NULL, 1, &clp); if (error) return (error); *clpp = clp; nfhp = VTONFS(vp)->n_fhp; recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); NFSLOCKCLSTATE(); /* * First get rid of the local Open structures, which should be no * longer in use. */ dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { LIST_FOREACH_SAFE(owp, &dp->nfsdl_owner, nfsow_list, nowp) { op = LIST_FIRST(&owp->nfsow_open); if (op != NULL) { KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on deleg")); nfscl_freeopen(op, 1); } nfscl_freeopenowner(owp, 1); } } /* Return any layouts marked return on close. */ nfscl_retoncloselayout(vp, clp, nfhp->nfh_fh, nfhp->nfh_len, &recallp); /* Now process the opens against the server. */ lookformore: LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { op = LIST_FIRST(&owp->nfsow_open); while (op != NULL) { if (op->nfso_fhlen == nfhp->nfh_len && !NFSBCMP(op->nfso_fh, nfhp->nfh_fh, nfhp->nfh_len)) { /* Found an open, close it. */ KASSERT((op->nfso_opencnt == 0), ("nfscl: bad open cnt on server")); NFSUNLOCKCLSTATE(); nfsrpc_doclose(VFSTONFS(vnode_mount(vp)), op, p); NFSLOCKCLSTATE(); goto lookformore; } op = LIST_NEXT(op, nfso_list); } } NFSUNLOCKCLSTATE(); /* * recallp has been set NULL by nfscl_retoncloselayout() if it was * used by the function, but calling free() with a NULL pointer is ok. */ free(recallp, M_NFSLAYRECALL); return (0); } /* * Return all delegations on this client. * (Must be called with client sleep lock.) */ static void nfscl_delegreturnall(struct nfsclclient *clp, NFSPROC_T *p) { struct nfscldeleg *dp, *ndp; struct ucred *cred; cred = newnfs_getcred(); TAILQ_FOREACH_SAFE(dp, &clp->nfsc_deleg, nfsdl_list, ndp) { nfscl_cleandeleg(dp); (void) nfscl_trydelegreturn(dp, cred, clp->nfsc_nmp, p); nfscl_freedeleg(&clp->nfsc_deleg, dp); } NFSFREECRED(cred); } /* * Do a callback RPC. */ APPLESTATIC void nfscl_docb(struct nfsrv_descript *nd, NFSPROC_T *p) { int clist, gotseq_ok, i, j, k, op, rcalls; u_int32_t *tl; struct nfsclclient *clp; struct nfscldeleg *dp = NULL; int numops, taglen = -1, error = 0, trunc; u_int32_t minorvers = 0, retops = 0, *retopsp = NULL, *repp, cbident; u_char tag[NFSV4_SMALLSTR + 1], *tagstr; vnode_t vp = NULL; struct nfsnode *np; struct vattr va; struct nfsfh *nfhp; mount_t mp; nfsattrbit_t attrbits, rattrbits; nfsv4stateid_t stateid; uint32_t seqid, slotid = 0, highslot, cachethis; uint8_t sessionid[NFSX_V4SESSIONID]; struct mbuf *rep; struct nfscllayout *lyp; uint64_t filesid[2], len, off; int changed, gotone, laytype, recalltype; uint32_t iomode; struct nfsclrecalllayout *recallp = NULL; struct nfsclsession *tsep; gotseq_ok = 0; nfsrvd_rephead(nd); NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); taglen = fxdr_unsigned(int, *tl); if (taglen < 0) { error = EBADRPC; goto nfsmout; } if (taglen <= NFSV4_SMALLSTR) tagstr = tag; else tagstr = malloc(taglen + 1, M_TEMP, M_WAITOK); error = nfsrv_mtostr(nd, tagstr, taglen); if (error) { if (taglen > NFSV4_SMALLSTR) free(tagstr, M_TEMP); taglen = -1; goto nfsmout; } (void) nfsm_strtom(nd, tag, taglen); if (taglen > NFSV4_SMALLSTR) { free(tagstr, M_TEMP); } NFSM_BUILD(retopsp, u_int32_t *, NFSX_UNSIGNED); NFSM_DISSECT(tl, u_int32_t *, 3 * NFSX_UNSIGNED); minorvers = fxdr_unsigned(u_int32_t, *tl++); if (minorvers != NFSV4_MINORVERSION && minorvers != NFSV41_MINORVERSION) nd->nd_repstat = NFSERR_MINORVERMISMATCH; cbident = fxdr_unsigned(u_int32_t, *tl++); if (nd->nd_repstat) numops = 0; else numops = fxdr_unsigned(int, *tl); /* * Loop around doing the sub ops. */ for (i = 0; i < numops; i++) { NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); NFSM_BUILD(repp, u_int32_t *, 2 * NFSX_UNSIGNED); *repp++ = *tl; op = fxdr_unsigned(int, *tl); if (op < NFSV4OP_CBGETATTR || (op > NFSV4OP_CBRECALL && minorvers == NFSV4_MINORVERSION) || (op > NFSV4OP_CBNOTIFYDEVID && minorvers == NFSV41_MINORVERSION)) { nd->nd_repstat = NFSERR_OPILLEGAL; *repp = nfscl_errmap(nd, minorvers); retops++; break; } nd->nd_procnum = op; if (op < NFSV41_CBNOPS) nfsstatsv1.cbrpccnt[nd->nd_procnum]++; switch (op) { case NFSV4OP_CBGETATTR: NFSCL_DEBUG(4, "cbgetattr\n"); mp = NULL; vp = NULL; error = nfsm_getfh(nd, &nfhp); if (!error) error = nfsrv_getattrbits(nd, &attrbits, NULL, NULL); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { mp = nfscl_getmnt(minorvers, sessionid, cbident, &clp); if (mp == NULL) error = NFSERR_SERVERFAULT; } if (!error) { error = nfscl_ngetreopen(mp, nfhp->nfh_fh, nfhp->nfh_len, p, &np); if (!error) vp = NFSTOV(np); } if (!error) { NFSZERO_ATTRBIT(&rattrbits); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL) { if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_SIZE)) { if (vp != NULL) va.va_size = np->n_size; else va.va_size = dp->nfsdl_size; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_SIZE); } if (NFSISSET_ATTRBIT(&attrbits, NFSATTRBIT_CHANGE)) { va.va_filerev = dp->nfsdl_change; if (vp == NULL || (np->n_flag & NDELEGMOD)) va.va_filerev++; NFSSETBIT_ATTRBIT(&rattrbits, NFSATTRBIT_CHANGE); } } else error = NFSERR_SERVERFAULT; NFSUNLOCKCLSTATE(); } if (vp != NULL) vrele(vp); if (mp != NULL) vfs_unbusy(mp); if (nfhp != NULL) FREE((caddr_t)nfhp, M_NFSFH); if (!error) (void) nfsv4_fillattr(nd, NULL, NULL, NULL, &va, NULL, 0, &rattrbits, NULL, p, 0, 0, 0, 0, (uint64_t)0); break; case NFSV4OP_CBRECALL: NFSCL_DEBUG(4, "cbrecall\n"); NFSM_DISSECT(tl, u_int32_t *, NFSX_STATEID + NFSX_UNSIGNED); stateid.seqid = *tl++; NFSBCOPY((caddr_t)tl, (caddr_t)stateid.other, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); trunc = fxdr_unsigned(int, *tl); error = nfsm_getfh(nd, &nfhp); if (error == 0 && i == 0 && minorvers != NFSV4_MINORVERSION) error = NFSERR_OPNOTINSESS; if (!error) { NFSLOCKCLSTATE(); if (minorvers == NFSV4_MINORVERSION) clp = nfscl_getclnt(cbident); else clp = nfscl_getclntsess(sessionid); if (clp != NULL) { dp = nfscl_finddeleg(clp, nfhp->nfh_fh, nfhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) { dp->nfsdl_flags |= NFSCLDL_RECALL; wakeup((caddr_t)clp); } } else { error = NFSERR_SERVERFAULT; } NFSUNLOCKCLSTATE(); } if (nfhp != NULL) FREE((caddr_t)nfhp, M_NFSFH); break; case NFSV4OP_CBLAYOUTRECALL: NFSCL_DEBUG(4, "cblayrec\n"); nfhp = NULL; NFSM_DISSECT(tl, uint32_t *, 4 * NFSX_UNSIGNED); laytype = fxdr_unsigned(int, *tl++); iomode = fxdr_unsigned(uint32_t, *tl++); if (newnfs_true == *tl++) changed = 1; else changed = 0; recalltype = fxdr_unsigned(int, *tl); recallp = malloc(sizeof(*recallp), M_NFSLAYRECALL, M_WAITOK); if (laytype != NFSLAYOUT_NFSV4_1_FILES) error = NFSERR_NOMATCHLAYOUT; else if (recalltype == NFSLAYOUTRETURN_FILE) { error = nfsm_getfh(nd, &nfhp); NFSCL_DEBUG(4, "retfile getfh=%d\n", error); if (error != 0) goto nfsmout; NFSM_DISSECT(tl, u_int32_t *, 2 * NFSX_HYPER + NFSX_STATEID); off = fxdr_hyper(tl); tl += 2; len = fxdr_hyper(tl); tl += 2; stateid.seqid = fxdr_unsigned(uint32_t, *tl++); NFSBCOPY(tl, stateid.other, NFSX_STATEIDOTHER); if (minorvers == NFSV4_MINORVERSION) error = NFSERR_NOTSUPP; else if (i == 0) error = NFSERR_OPNOTINSESS; if (error == 0) { NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); NFSCL_DEBUG(4, "cbly clp=%p\n", clp); if (clp != NULL) { lyp = nfscl_findlayout(clp, nfhp->nfh_fh, nfhp->nfh_len); NFSCL_DEBUG(4, "cblyp=%p\n", lyp); if (lyp != NULL && (lyp->nfsly_flags & NFSLY_FILES) != 0 && !NFSBCMP(stateid.other, lyp->nfsly_stateid.other, NFSX_STATEIDOTHER)) { error = nfscl_layoutrecall( recalltype, lyp, iomode, off, len, stateid.seqid, recallp); recallp = NULL; wakeup(clp); NFSCL_DEBUG(4, "aft layrcal=%d\n", error); } else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } free(nfhp, M_NFSFH); } else if (recalltype == NFSLAYOUTRETURN_FSID) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_HYPER); filesid[0] = fxdr_hyper(tl); tl += 2; filesid[1] = fxdr_hyper(tl); tl += 2; gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { if (lyp->nfsly_filesid[0] == filesid[0] && lyp->nfsly_filesid[1] == filesid[1]) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else if (recalltype == NFSLAYOUTRETURN_ALL) { gotone = 0; NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { TAILQ_FOREACH(lyp, &clp->nfsc_layout, nfsly_list) { error = nfscl_layoutrecall( recalltype, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, recallp); recallp = NULL; gotone = 1; } if (gotone != 0) wakeup(clp); else error = NFSERR_NOMATCHLAYOUT; } else error = NFSERR_NOMATCHLAYOUT; NFSUNLOCKCLSTATE(); } else error = NFSERR_NOMATCHLAYOUT; if (recallp != NULL) { free(recallp, M_NFSLAYRECALL); recallp = NULL; } break; case NFSV4OP_CBSEQUENCE: NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(tl, sessionid, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; seqid = fxdr_unsigned(uint32_t, *tl++); slotid = fxdr_unsigned(uint32_t, *tl++); highslot = fxdr_unsigned(uint32_t, *tl++); cachethis = *tl++; /* Throw away the referring call stuff. */ clist = fxdr_unsigned(int, *tl); for (j = 0; j < clist; j++) { NFSM_DISSECT(tl, uint32_t *, NFSX_V4SESSIONID + NFSX_UNSIGNED); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; rcalls = fxdr_unsigned(int, *tl); for (k = 0; k < rcalls; k++) { NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); } } NFSLOCKCLSTATE(); if (i == 0) { clp = nfscl_getclntsess(sessionid); if (clp == NULL) error = NFSERR_SERVERFAULT; } else error = NFSERR_SEQUENCEPOS; if (error == 0) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); error = nfsv4_seqsession(seqid, slotid, highslot, tsep->nfsess_cbslots, &rep, tsep->nfsess_backslots); } NFSUNLOCKCLSTATE(); if (error == 0 || error == NFSERR_REPLYFROMCACHE) { gotseq_ok = 1; if (rep != NULL) { /* * Handle a reply for a retried * callback. The reply will be * re-inserted in the session cache * by the nfsv4_seqsess_cacherep() call * after out: */ KASSERT(error == NFSERR_REPLYFROMCACHE, ("cbsequence: non-NULL rep")); NFSCL_DEBUG(4, "Got cbretry\n"); m_freem(nd->nd_mreq); nd->nd_mreq = rep; rep = NULL; goto out; } NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 4 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; *tl++ = txdr_unsigned(seqid); *tl++ = txdr_unsigned(slotid); *tl++ = txdr_unsigned(NFSV4_CBSLOTS - 1); *tl = txdr_unsigned(NFSV4_CBSLOTS - 1); } break; default: if (i == 0 && minorvers == NFSV41_MINORVERSION) error = NFSERR_OPNOTINSESS; else { NFSCL_DEBUG(1, "unsupp callback %d\n", op); error = NFSERR_NOTSUPP; } break; } if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) { nd->nd_repstat = NFSERR_BADXDR; } else { nd->nd_repstat = error; } error = 0; } retops++; if (nd->nd_repstat) { *repp = nfscl_errmap(nd, minorvers); break; } else *repp = 0; /* NFS4_OK */ } nfsmout: if (recallp != NULL) free(recallp, M_NFSLAYRECALL); if (error) { if (error == EBADRPC || error == NFSERR_BADXDR) nd->nd_repstat = NFSERR_BADXDR; else printf("nfsv4 comperr1=%d\n", error); } if (taglen == -1) { NFSM_BUILD(tl, u_int32_t *, 2 * NFSX_UNSIGNED); *tl++ = 0; *tl = 0; } else { *retopsp = txdr_unsigned(retops); } *nd->nd_errp = nfscl_errmap(nd, minorvers); out: if (gotseq_ok != 0) { rep = m_copym(nd->nd_mreq, 0, M_COPYALL, M_WAITOK); NFSLOCKCLSTATE(); clp = nfscl_getclntsess(sessionid); if (clp != NULL) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); nfsv4_seqsess_cacherep(slotid, tsep->nfsess_cbslots, NFSERR_OK, &rep); NFSUNLOCKCLSTATE(); } else { NFSUNLOCKCLSTATE(); m_freem(rep); } } } /* * Generate the next cbident value. Basically just increment a static value * and then check that it isn't already in the list, if it has wrapped around. */ static u_int32_t nfscl_nextcbident(void) { struct nfsclclient *clp; int matched; static u_int32_t nextcbident = 0; static int haswrapped = 0; nextcbident++; if (nextcbident == 0) haswrapped = 1; if (haswrapped) { /* * Search the clientid list for one already using this cbident. */ do { matched = 0; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { if (clp->nfsc_cbident == nextcbident) { matched = 1; break; } } NFSUNLOCKCLSTATE(); if (matched == 1) nextcbident++; } while (matched); } return (nextcbident); } /* * Get the mount point related to a given cbident or session and busy it. */ static mount_t nfscl_getmnt(int minorvers, uint8_t *sessionid, u_int32_t cbident, struct nfsclclient **clpp) { struct nfsclclient *clp; mount_t mp; int error; struct nfsclsession *tsep; *clpp = NULL; NFSLOCKCLSTATE(); LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (minorvers == NFSV4_MINORVERSION) { if (clp->nfsc_cbident == cbident) break; } else if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } if (clp == NULL) { NFSUNLOCKCLSTATE(); return (NULL); } mp = clp->nfsc_nmp->nm_mountp; vfs_ref(mp); NFSUNLOCKCLSTATE(); error = vfs_busy(mp, 0); vfs_rel(mp); if (error != 0) return (NULL); *clpp = clp; return (mp); } /* * Get the clientid pointer related to a given cbident. */ static struct nfsclclient * nfscl_getclnt(u_int32_t cbident) { struct nfsclclient *clp; LIST_FOREACH(clp, &nfsclhead, nfsc_list) if (clp->nfsc_cbident == cbident) break; return (clp); } /* * Get the clientid pointer related to a given sessionid. */ static struct nfsclclient * nfscl_getclntsess(uint8_t *sessionid) { struct nfsclclient *clp; struct nfsclsession *tsep; LIST_FOREACH(clp, &nfsclhead, nfsc_list) { tsep = nfsmnt_mdssession(clp->nfsc_nmp); if (!NFSBCMP(tsep->nfsess_sessionid, sessionid, NFSX_V4SESSIONID)) break; } return (clp); } /* * Search for a lock conflict locally on the client. A conflict occurs if * - not same owner and overlapping byte range and at least one of them is * a write lock or this is an unlock. */ static int nfscl_localconflict(struct nfsclclient *clp, u_int8_t *fhp, int fhlen, struct nfscllock *nlop, u_int8_t *own, struct nfscldeleg *dp, struct nfscllock **lopp) { struct nfsclowner *owp; struct nfsclopen *op; int ret; if (dp != NULL) { ret = nfscl_checkconflict(&dp->nfsdl_lock, nlop, own, lopp); if (ret) return (ret); } LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if (op->nfso_fhlen == fhlen && !NFSBCMP(op->nfso_fh, fhp, fhlen)) { ret = nfscl_checkconflict(&op->nfso_lock, nlop, own, lopp); if (ret) return (ret); } } } return (0); } static int nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, u_int8_t *own, struct nfscllock **lopp) { struct nfscllockowner *lp; struct nfscllock *lop; LIST_FOREACH(lp, lhp, nfsl_list) { if (NFSBCMP(lp->nfsl_owner, own, NFSV4CL_LOCKNAMELEN)) { LIST_FOREACH(lop, &lp->nfsl_lock, nfslo_list) { if (lop->nfslo_first >= nlop->nfslo_end) break; if (lop->nfslo_end <= nlop->nfslo_first) continue; if (lop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_WRLCK || nlop->nfslo_type == F_UNLCK) { if (lopp != NULL) *lopp = lop; return (NFSERR_DENIED); } } } } return (0); } /* * Check for a local conflicting lock. */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; struct nfsnode *np; u_int8_t own[NFSV4CL_LOCKNAMELEN]; int error; nlck.nfslo_type = fl->l_type; nlck.nfslo_first = off; if (len == NFS64BITSSET) { nlck.nfslo_end = NFS64BITSSET; } else { nlck.nfslo_end = off + len; if (nlck.nfslo_end <= nlck.nfslo_first) return (NFSERR_INVAL); } np = VTONFS(vp); nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, &nlck, own, dp, &lop); if (error != 0) { fl->l_whence = SEEK_SET; fl->l_start = lop->nfslo_first; if (lop->nfslo_end == NFS64BITSSET) fl->l_len = 0; else fl->l_len = lop->nfslo_end - lop->nfslo_first; fl->l_pid = (pid_t)0; fl->l_type = lop->nfslo_type; error = -1; /* no RPC required */ } else if (dp != NULL && ((dp->nfsdl_flags & NFSCLDL_WRITE) || fl->l_type == F_RDLCK)) { /* * The delegation ensures that there isn't a conflicting * lock on the server, so return -1 to indicate an RPC * isn't required. */ fl->l_type = F_UNLCK; error = -1; } NFSUNLOCKCLSTATE(); return (error); } /* * Handle Recall of a delegation. * The clp must be exclusive locked when this is called. */ static int nfscl_recalldeleg(struct nfsclclient *clp, struct nfsmount *nmp, struct nfscldeleg *dp, vnode_t vp, struct ucred *cred, NFSPROC_T *p, int called_from_renewthread) { struct nfsclowner *owp, *lowp, *nowp; struct nfsclopen *op, *lop; struct nfscllockowner *lp; struct nfscllock *lckp; struct nfsnode *np; int error = 0, ret, gotvp = 0; if (vp == NULL) { /* * First, get a vnode for the file. This is needed to do RPCs. */ ret = nfscl_ngetreopen(nmp->nm_mountp, dp->nfsdl_fh, dp->nfsdl_fhlen, p, &np); if (ret) { /* * File isn't open, so nothing to move over to the * server. */ return (0); } vp = NFSTOV(np); gotvp = 1; } else { np = VTONFS(vp); } dp->nfsdl_flags &= ~NFSCLDL_MODTIMESET; /* * Ok, if it's a write delegation, flush data to the server, so * that close/open consistency is retained. */ ret = 0; NFSLOCKNODE(np); if ((dp->nfsdl_flags & NFSCLDL_WRITE) && (np->n_flag & NMODIFIED)) { np->n_flag |= NDELEGRECALL; NFSUNLOCKNODE(np); ret = ncl_flush(vp, MNT_WAIT, p, 1, called_from_renewthread); NFSLOCKNODE(np); np->n_flag &= ~NDELEGRECALL; } NFSINVALATTRCACHE(np); NFSUNLOCKNODE(np); if (ret == EIO && called_from_renewthread != 0) { /* * If the flush failed with EIO for the renew thread, * return now, so that the dirty buffer will be flushed * later. */ if (gotvp != 0) vrele(vp); return (ret); } /* * Now, for each openowner with opens issued locally, move them * over to state against the server. */ LIST_FOREACH(lowp, &dp->nfsdl_owner, nfsow_list) { lop = LIST_FIRST(&lowp->nfsow_open); if (lop != NULL) { if (LIST_NEXT(lop, nfso_list) != NULL) panic("nfsdlg mult opens"); /* * Look for the same openowner against the server. */ LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { if (!NFSBCMP(lowp->nfsow_owner, owp->nfsow_owner, NFSV4CL_LOCKNAMELEN)) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } break; } } /* * If no openowner found, create one and get an open * for it. */ if (owp == NULL) { MALLOC(nowp, struct nfsclowner *, sizeof (struct nfsclowner), M_NFSCLOWNER, M_WAITOK); nfscl_newopen(clp, NULL, &owp, &nowp, &op, NULL, lowp->nfsow_owner, dp->nfsdl_fh, dp->nfsdl_fhlen, NULL, NULL); newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_moveopen(vp, clp, nmp, lop, owp, dp, cred, p); if (ret) { nfscl_freeopenowner(owp, 0); if (ret == NFSERR_STALECLIENTID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret) { nfscl_freeopen(lop, 1); if (!error) error = ret; } } } } } /* * Now, get byte range locks for any locks done locally. */ LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { LIST_FOREACH(lckp, &lp->nfsl_lock, nfslo_list) { newnfs_copycred(&dp->nfsdl_cred, cred); ret = nfscl_relock(vp, clp, nmp, lp, lckp, cred, p); if (ret == NFSERR_STALESTATEID || ret == NFSERR_STALEDONTRECOVER || ret == NFSERR_STALECLIENTID || ret == NFSERR_BADSESSION) { if (gotvp) vrele(vp); return (ret); } if (ret && !error) error = ret; } } if (gotvp) vrele(vp); return (error); } /* * Move a locally issued open over to an owner on the state list. * SIDE EFFECT: If it needs to sleep (do an rpc), it unlocks clstate and * returns with it unlocked. */ static int nfscl_moveopen(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfsclopen *lop, struct nfsclowner *owp, struct nfscldeleg *dp, struct ucred *cred, NFSPROC_T *p) { struct nfsclopen *op, *nop; struct nfscldeleg *ndp; struct nfsnode *np; int error = 0, newone; /* * First, look for an appropriate open, If found, just increment the * opencnt in it. */ LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { if ((op->nfso_mode & lop->nfso_mode) == lop->nfso_mode && op->nfso_fhlen == lop->nfso_fhlen && !NFSBCMP(op->nfso_fh, lop->nfso_fh, op->nfso_fhlen)) { op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); return (0); } } /* No appropriate open, so we have to do one against the server. */ np = VTONFS(vp); MALLOC(nop, struct nfsclopen *, sizeof (struct nfsclopen) + lop->nfso_fhlen - 1, M_NFSCLOPEN, M_WAITOK); newone = 0; nfscl_newopen(clp, NULL, &owp, NULL, &op, &nop, owp->nfsow_owner, lop->nfso_fh, lop->nfso_fhlen, cred, &newone); ndp = dp; error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, lop->nfso_fh, lop->nfso_fhlen, lop->nfso_mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, &ndp, 0, 0, cred, p); if (error) { if (newone) nfscl_freeopen(op, 0); } else { op->nfso_mode |= lop->nfso_mode; op->nfso_opencnt += lop->nfso_opencnt; nfscl_freeopen(lop, 1); } if (nop != NULL) FREE((caddr_t)nop, M_NFSCLOPEN); if (ndp != NULL) { /* * What should I do with the returned delegation, since the * delegation is being recalled? For now, just printf and * through it away. */ printf("Moveopen returned deleg\n"); FREE((caddr_t)ndp, M_NFSCLDELEG); } return (error); } /* * Recall all delegations on this client. */ static void nfscl_totalrecall(struct nfsclclient *clp) { struct nfscldeleg *dp; TAILQ_FOREACH(dp, &clp->nfsc_deleg, nfsdl_list) { if ((dp->nfsdl_flags & NFSCLDL_DELEGRET) == 0) dp->nfsdl_flags |= NFSCLDL_RECALL; } } /* * Relock byte ranges. Called for delegation recall and state expiry. */ static int nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, struct nfscllockowner *lp, struct nfscllock *lop, struct ucred *cred, NFSPROC_T *p) { struct nfscllockowner *nlp; struct nfsfh *nfhp; u_int64_t off, len; u_int32_t clidrev = 0; int error, newone, donelocally; off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, clp, 1, NULL, lp->nfsl_lockflags, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); if (nmp->nm_clp != NULL) clidrev = nmp->nm_clp->nfsc_clientidrev; else clidrev = 0; nfhp = VTONFS(vp)->n_fhp; error = nfscl_trylock(nmp, vp, nfhp->nfh_fh, nfhp->nfh_len, nlp, newone, 0, off, len, lop->nfslo_type, cred, p); if (error) nfscl_freelockowner(nlp, 0); return (error); } /* * Called to re-open a file. Basically get a vnode for the file handle * and then call nfsrpc_openrpc() to do the rest. */ static int nfsrpc_reopen(struct nfsmount *nmp, u_int8_t *fhp, int fhlen, u_int32_t mode, struct nfsclopen *op, struct nfscldeleg **dpp, struct ucred *cred, NFSPROC_T *p) { struct nfsnode *np; vnode_t vp; int error; error = nfscl_ngetreopen(nmp->nm_mountp, fhp, fhlen, p, &np); if (error) return (error); vp = NFSTOV(np); if (np->n_v4 != NULL) { error = nfscl_tryopen(nmp, vp, np->n_v4->n4_data, np->n_v4->n4_fhlen, fhp, fhlen, mode, op, NFS4NODENAME(np->n_v4), np->n_v4->n4_namelen, dpp, 0, 0, cred, p); } else { error = EINVAL; } vrele(vp); return (error); } /* * Try an open against the server. Just call nfsrpc_openrpc(), retrying while * NFSERR_DELAY. Also, try system credentials, if the passed in credentials * fail. */ static int nfscl_tryopen(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, u_int8_t *newfhp, int newfhlen, u_int32_t mode, struct nfsclopen *op, u_int8_t *name, int namelen, struct nfscldeleg **ndpp, int reclaim, u_int32_t delegtype, struct ucred *cred, NFSPROC_T *p) { int error; do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 0, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_openrpc(nmp, vp, fhp, fhlen, newfhp, newfhlen, mode, op, name, namelen, ndpp, reclaim, delegtype, cred, p, 1, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstryop"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a byte range lock. Just loop on nfsrpc_lock() while it returns * NFSERR_DELAY. Also, retry with system credentials, if the provided * cred don't work. */ static int nfscl_trylock(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, struct nfscllockowner *nlp, int newone, int reclaim, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 0); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; if (error == EAUTH || error == EACCES) { /* Try again using root credentials */ newnfs_setroot(cred); do { error = nfsrpc_lock(nd, nmp, vp, fhp, fhlen, nlp, newone, reclaim, off, len, type, cred, p, 1); if (!error && nd->nd_repstat == NFSERR_DELAY) (void) nfs_catnap(PZERO, (int)nd->nd_repstat, "nfstrylck"); } while (!error && nd->nd_repstat == NFSERR_DELAY); if (!error) error = nd->nd_repstat; } return (error); } /* * Try a delegreturn against the server. Just call nfsrpc_delegreturn(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ static int nfscl_trydelegreturn(struct nfscldeleg *dp, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { int error; do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_delegreturn(dp, cred, nmp, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrydp"); } while (error == NFSERR_DELAY); } return (error); } /* * Try a close against the server. Just call nfsrpc_closerpc(), * retrying while NFSERR_DELAY. Also, try system credentials, if the passed in * credentials fail. */ APPLESTATIC int nfscl_tryclose(struct nfsclopen *op, struct ucred *cred, struct nfsmount *nmp, NFSPROC_T *p) { struct nfsrv_descript nfsd, *nd = &nfsd; int error; do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 0); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); if (error == EAUTH || error == EACCES) { /* Try again using system credentials */ newnfs_setroot(cred); do { error = nfsrpc_closerpc(nd, nmp, op, cred, p, 1); if (error == NFSERR_DELAY) (void) nfs_catnap(PZERO, error, "nfstrycl"); } while (error == NFSERR_DELAY); } return (error); } /* * Decide if a delegation on a file permits close without flushing writes * to the server. This might be a big performance win in some environments. * (Not useful until the client does caching on local stable storage.) */ APPLESTATIC int nfscl_mustflush(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == NFSCLDL_WRITE && (dp->nfsdl_sizelimit >= np->n_size || !NFSHASSTRICT3530(nmp))) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * See if a (write) delegation exists for this file. */ APPLESTATIC int nfscl_nodeleg(vnode_t vp, int writedeleg) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np; struct nfsmount *nmp; np = VTONFS(vp); nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return (1); NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (1); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_RECALL | NFSCLDL_DELEGRET)) == 0 && (writedeleg == 0 || (dp->nfsdl_flags & NFSCLDL_WRITE) == NFSCLDL_WRITE)) { NFSUNLOCKCLSTATE(); return (0); } NFSUNLOCKCLSTATE(); return (1); } /* * Look for an associated delegation that should be DelegReturned. */ APPLESTATIC int nfscl_removedeleg(vnode_t vp, NFSPROC_T *p, nfsv4stateid_t *stp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(vp)); np = VTONFS(vp); NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on vp that has state, lock the client and * do a recall * - return delegation with no state */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, vp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *stp = dp->nfsdl_stateid; retcnt = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) nfsv4_unlock(&clp->nfsc_lock, 0); NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Look for associated delegation(s) that should be DelegReturned. */ APPLESTATIC int nfscl_renamedeleg(vnode_t fvp, nfsv4stateid_t *fstp, int *gotfdp, vnode_t tvp, nfsv4stateid_t *tstp, int *gottdp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsclowner *owp; struct nfscllockowner *lp; struct nfsmount *nmp; struct ucred *cred; struct nfsnode *np; int igotlock = 0, triedrecall = 0, needsrecall, retcnt = 0, islept; nmp = VFSTONFS(vnode_mount(fvp)); *gotfdp = 0; *gottdp = 0; NFSLOCKCLSTATE(); /* * Loop around waiting for: * - outstanding I/O operations on delegations to complete * - for a delegation on fvp that has state, lock the client and * do a recall * - return delegation(s) with no state. */ while (1) { clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (retcnt); } np = VTONFS(fvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gotfdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } needsrecall = 0; LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { needsrecall = 1; break; } } if (!needsrecall) { LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { needsrecall = 1; break; } } } if (needsrecall && !triedrecall) { dp->nfsdl_flags |= NFSCLDL_DELEGRET; islept = 0; while (!igotlock) { igotlock = nfsv4_lock(&clp->nfsc_lock, 1, &islept, NFSCLSTATEMUTEXPTR, NULL); if (islept) break; } if (islept) continue; NFSUNLOCKCLSTATE(); cred = newnfs_getcred(); newnfs_copycred(&dp->nfsdl_cred, cred); (void) nfscl_recalldeleg(clp, nmp, dp, fvp, cred, p, 0); NFSFREECRED(cred); triedrecall = 1; NFSLOCKCLSTATE(); nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; continue; } *fstp = dp->nfsdl_stateid; retcnt++; *gotfdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } if (igotlock) { nfsv4_unlock(&clp->nfsc_lock, 0); igotlock = 0; } if (tvp != NULL) { np = VTONFS(tvp); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && *gottdp == 0) { /* * Wait for outstanding I/O ops to be done. */ if (dp->nfsdl_rwlock.nfslock_usecnt > 0) { dp->nfsdl_rwlock.nfslock_lock |= NFSV4LOCK_WANTED; (void) nfsmsleep(&dp->nfsdl_rwlock, NFSCLSTATEMUTEXPTR, PZERO, "nfscld", NULL); continue; } LIST_FOREACH(owp, &dp->nfsdl_owner, nfsow_list) { if (!LIST_EMPTY(&owp->nfsow_open)) { NFSUNLOCKCLSTATE(); return (retcnt); } } LIST_FOREACH(lp, &dp->nfsdl_lock, nfsl_list) { if (!LIST_EMPTY(&lp->nfsl_lock)) { NFSUNLOCKCLSTATE(); return (retcnt); } } *tstp = dp->nfsdl_stateid; retcnt++; *gottdp = 1; nfscl_cleandeleg(dp); nfscl_freedeleg(&clp->nfsc_deleg, dp); } } NFSUNLOCKCLSTATE(); return (retcnt); } } /* * Get a reference on the clientid associated with the mount point. * Return 1 if success, 0 otherwise. */ APPLESTATIC int nfscl_getref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return (0); } nfsv4_getref(&clp->nfsc_lock, NULL, NFSCLSTATEMUTEXPTR, NULL); NFSUNLOCKCLSTATE(); return (1); } /* * Release a reference on a clientid acquired with the above call. */ APPLESTATIC void nfscl_relref(struct nfsmount *nmp) { struct nfsclclient *clp; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } nfsv4_relref(&clp->nfsc_lock); NFSUNLOCKCLSTATE(); } /* * Save the size attribute in the delegation, since the nfsnode * is going away. */ APPLESTATIC void nfscl_reclaimnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) dp->nfsdl_size = np->n_size; NFSUNLOCKCLSTATE(); } /* * Get the saved size attribute in the delegation, since it is a * newly allocated nfsnode. */ APPLESTATIC void nfscl_newnode(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) np->n_size = dp->nfsdl_size; NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file, set the modtime * to the local clock time. */ APPLESTATIC void nfscl_delegmodtime(vnode_t vp) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & NFSCLDL_WRITE)) { nanotime(&dp->nfsdl_modtime); dp->nfsdl_flags |= NFSCLDL_MODTIMESET; } NFSUNLOCKCLSTATE(); } /* * If there is a valid write delegation for this file with a modtime set, * put that modtime in mtime. */ APPLESTATIC void nfscl_deleggetmodtime(vnode_t vp, struct timespec *mtime) { struct nfsclclient *clp; struct nfscldeleg *dp; struct nfsnode *np = VTONFS(vp); struct nfsmount *nmp; nmp = VFSTONFS(vnode_mount(vp)); if (!NFSHASNFSV4(nmp)) return; NFSLOCKCLSTATE(); clp = nfscl_findcl(nmp); if (clp == NULL) { NFSUNLOCKCLSTATE(); return; } dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (dp != NULL && (dp->nfsdl_flags & (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) == (NFSCLDL_WRITE | NFSCLDL_MODTIMESET)) *mtime = dp->nfsdl_modtime; NFSUNLOCKCLSTATE(); } static int nfscl_errmap(struct nfsrv_descript *nd, u_int32_t minorvers) { short *defaulterrp, *errp; if (!nd->nd_repstat) return (0); if (nd->nd_procnum == NFSPROC_NOOP) return (txdr_unsigned(nd->nd_repstat & 0xffff)); if (nd->nd_repstat == EBADRPC) return (txdr_unsigned(NFSERR_BADXDR)); if (nd->nd_repstat == NFSERR_MINORVERMISMATCH || nd->nd_repstat == NFSERR_OPILLEGAL) return (txdr_unsigned(nd->nd_repstat)); if (nd->nd_repstat >= NFSERR_BADIOMODE && nd->nd_repstat < 20000 && minorvers > NFSV4_MINORVERSION) { /* NFSv4.n error. */ return (txdr_unsigned(nd->nd_repstat)); } if (nd->nd_procnum < NFSV4OP_CBNOPS) errp = defaulterrp = nfscl_cberrmap[nd->nd_procnum]; else return (txdr_unsigned(nd->nd_repstat)); while (*++errp) if (*errp == (short)nd->nd_repstat) return (txdr_unsigned(nd->nd_repstat)); return (txdr_unsigned(*defaulterrp)); } /* * Called to find/add a layout to a client. * This function returns the layout with a refcnt (shared lock) upon * success (returns 0) or with no lock/refcnt on the layout when an * error is returned. * If a layout is passed in via lypp, it is locked (exclusively locked). */ APPLESTATIC int nfscl_layout(struct nfsmount *nmp, vnode_t vp, u_int8_t *fhp, int fhlen, nfsv4stateid_t *stateidp, int layouttype, int retonclose, struct nfsclflayouthead *fhlp, struct nfscllayout **lypp, struct ucred *cred, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp, *tlyp; struct nfsclflayout *flp; struct nfsnode *np = VTONFS(vp); mount_t mp; int layout_passed_in; mp = nmp->nm_mountp; layout_passed_in = 1; tlyp = NULL; lyp = *lypp; if (lyp == NULL) { layout_passed_in = 0; tlyp = malloc(sizeof(*tlyp) + fhlen - 1, M_NFSLAYOUT, M_WAITOK | M_ZERO); } NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } if (lyp == NULL) { /* * Although no lyp was passed in, another thread might have * allocated one. If one is found, just increment it's ref * count and return it. */ lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp == NULL) { lyp = tlyp; tlyp = NULL; lyp->nfsly_stateid.seqid = stateidp->seqid; lyp->nfsly_stateid.other[0] = stateidp->other[0]; lyp->nfsly_stateid.other[1] = stateidp->other[1]; lyp->nfsly_stateid.other[2] = stateidp->other[2]; lyp->nfsly_lastbyte = 0; LIST_INIT(&lyp->nfsly_flayread); LIST_INIT(&lyp->nfsly_flayrw); LIST_INIT(&lyp->nfsly_recall); lyp->nfsly_filesid[0] = np->n_vattr.na_filesid[0]; lyp->nfsly_filesid[1] = np->n_vattr.na_filesid[1]; lyp->nfsly_clp = clp; if (layouttype == NFSLAYOUT_FLEXFILE) lyp->nfsly_flags = NFSLY_FLEXFILE; else lyp->nfsly_flags = NFSLY_FILES; if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; lyp->nfsly_fhlen = fhlen; NFSBCOPY(fhp, lyp->nfsly_fh, fhlen); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); LIST_INSERT_HEAD(NFSCLLAYOUTHASH(clp, fhp, fhlen), lyp, nfsly_hash); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; nfscl_layoutcnt++; } else { if (retonclose != 0) lyp->nfsly_flags |= NFSLY_RETONCLOSE; TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (EPERM); } *lypp = lyp; } else lyp->nfsly_stateid.seqid = stateidp->seqid; /* Merge the new list of File Layouts into the list. */ flp = LIST_FIRST(fhlp); if (flp != NULL) { if (flp->nfsfl_iomode == NFSLAYOUTIOMODE_READ) nfscl_mergeflayouts(&lyp->nfsly_flayread, fhlp); else nfscl_mergeflayouts(&lyp->nfsly_flayrw, fhlp); } if (layout_passed_in != 0) nfsv4_unlock(&lyp->nfsly_lock, 1); NFSUNLOCKCLSTATE(); if (tlyp != NULL) free(tlyp, M_NFSLAYOUT); return (0); } /* * Search for a layout by MDS file handle. * If one is found, it is returned with a refcnt (shared lock) iff * retflpp returned non-NULL and locked (exclusive locked) iff retflpp is * returned NULL. */ struct nfscllayout * nfscl_getlayout(struct nfsclclient *clp, uint8_t *fhp, int fhlen, uint64_t off, struct nfsclflayout **retflpp, int *recalledp) { struct nfscllayout *lyp; mount_t mp; int error, igotlock; mp = clp->nfsc_nmp->nm_mountp; *recalledp = 0; *retflpp = NULL; NFSLOCKCLSTATE(); lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL) { if ((lyp->nfsly_flags & NFSLY_RECALL) == 0) { TAILQ_REMOVE(&clp->nfsc_layout, lyp, nfsly_list); TAILQ_INSERT_HEAD(&clp->nfsc_layout, lyp, nfsly_list); lyp->nfsly_timestamp = NFSD_MONOSEC + 120; error = nfscl_findlayoutforio(lyp, off, NFSV4OPEN_ACCESSREAD, retflpp); if (error == 0) nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); else { do { igotlock = nfsv4_lock(&lyp->nfsly_lock, 1, NULL, NFSCLSTATEMUTEXPTR, mp); } while (igotlock == 0 && !NFSCL_FORCEDISM(mp)); *retflpp = NULL; } if (NFSCL_FORCEDISM(mp)) { lyp = NULL; *recalledp = 1; } } else { lyp = NULL; *recalledp = 1; } } NFSUNLOCKCLSTATE(); return (lyp); } /* * Search for a layout by MDS file handle. If one is found, mark in to be * recalled, if it already marked "return on close". */ static void nfscl_retoncloselayout(vnode_t vp, struct nfsclclient *clp, uint8_t *fhp, int fhlen, struct nfsclrecalllayout **recallpp) { struct nfscllayout *lyp; uint32_t iomode; if (vp->v_type != VREG || !NFSHASPNFS(VFSTONFS(vnode_mount(vp))) || nfscl_enablecallb == 0 || nfs_numnfscbd == 0 || (VTONFS(vp)->n_flag & NNOLAYOUT) != 0) return; lyp = nfscl_findlayout(clp, fhp, fhlen); if (lyp != NULL && (lyp->nfsly_flags & (NFSLY_RETONCLOSE | NFSLY_RECALL)) == NFSLY_RETONCLOSE) { iomode = 0; if (!LIST_EMPTY(&lyp->nfsly_flayread)) iomode |= NFSLAYOUTIOMODE_READ; if (!LIST_EMPTY(&lyp->nfsly_flayrw)) iomode |= NFSLAYOUTIOMODE_RW; (void)nfscl_layoutrecall(NFSLAYOUTRETURN_FILE, lyp, iomode, 0, UINT64_MAX, lyp->nfsly_stateid.seqid, *recallpp); NFSCL_DEBUG(4, "retoncls recall iomode=%d\n", iomode); *recallpp = NULL; } } /* * Dereference a layout. */ void nfscl_rellayout(struct nfscllayout *lyp, int exclocked) { NFSLOCKCLSTATE(); if (exclocked != 0) nfsv4_unlock(&lyp->nfsly_lock, 0); else nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); } /* * Search for a devinfo by deviceid. If one is found, return it after * acquiring a reference count on it. */ struct nfscldevinfo * nfscl_getdevinfo(struct nfsclclient *clp, uint8_t *deviceid, struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); if (dip == NULL) dip = nfscl_finddevinfo(clp, deviceid); if (dip != NULL) dip->nfsdi_refcnt++; NFSUNLOCKCLSTATE(); return (dip); } /* * Dereference a devinfo structure. */ static void nfscl_reldevinfo_locked(struct nfscldevinfo *dip) { dip->nfsdi_refcnt--; if (dip->nfsdi_refcnt == 0) wakeup(&dip->nfsdi_refcnt); } /* * Dereference a devinfo structure. */ void nfscl_reldevinfo(struct nfscldevinfo *dip) { NFSLOCKCLSTATE(); nfscl_reldevinfo_locked(dip); NFSUNLOCKCLSTATE(); } /* * Find a layout for this file handle. Return NULL upon failure. */ static struct nfscllayout * nfscl_findlayout(struct nfsclclient *clp, u_int8_t *fhp, int fhlen) { struct nfscllayout *lyp; LIST_FOREACH(lyp, NFSCLLAYOUTHASH(clp, fhp, fhlen), nfsly_hash) if (lyp->nfsly_fhlen == fhlen && !NFSBCMP(lyp->nfsly_fh, fhp, fhlen)) break; return (lyp); } /* * Find a devinfo for this deviceid. Return NULL upon failure. */ static struct nfscldevinfo * nfscl_finddevinfo(struct nfsclclient *clp, uint8_t *deviceid) { struct nfscldevinfo *dip; LIST_FOREACH(dip, &clp->nfsc_devinfo, nfsdi_list) if (NFSBCMP(dip->nfsdi_deviceid, deviceid, NFSX_V4DEVICEID) == 0) break; return (dip); } /* * Merge the new file layout list into the main one, maintaining it in * increasing offset order. */ static void nfscl_mergeflayouts(struct nfsclflayouthead *fhlp, struct nfsclflayouthead *newfhlp) { struct nfsclflayout *flp, *nflp, *prevflp, *tflp; flp = LIST_FIRST(fhlp); prevflp = NULL; LIST_FOREACH_SAFE(nflp, newfhlp, nfsfl_list, tflp) { while (flp != NULL && flp->nfsfl_off < nflp->nfsfl_off) { prevflp = flp; flp = LIST_NEXT(flp, nfsfl_list); } if (prevflp == NULL) LIST_INSERT_HEAD(fhlp, nflp, nfsfl_list); else LIST_INSERT_AFTER(prevflp, nflp, nfsfl_list); prevflp = nflp; } } /* * Add this nfscldevinfo to the client, if it doesn't already exist. * This function consumes the structure pointed at by dip, if not NULL. */ APPLESTATIC int nfscl_adddevinfo(struct nfsmount *nmp, struct nfscldevinfo *dip, struct nfsclflayout *flp) { struct nfsclclient *clp; struct nfscldevinfo *tdip; uint8_t *dev; NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (ENODEV); } if ((flp->nfsfl_flags & NFSFL_FILE) != 0) dev = flp->nfsfl_dev; else dev = flp->nfsfl_ffm[0].dev; tdip = nfscl_finddevinfo(clp, dev); if (tdip != NULL) { tdip->nfsdi_layoutrefs++; flp->nfsfl_devp = tdip; nfscl_reldevinfo_locked(tdip); NFSUNLOCKCLSTATE(); if (dip != NULL) free(dip, M_NFSDEVINFO); return (0); } if (dip != NULL) { LIST_INSERT_HEAD(&clp->nfsc_devinfo, dip, nfsdi_list); dip->nfsdi_layoutrefs = 1; flp->nfsfl_devp = dip; } NFSUNLOCKCLSTATE(); if (dip == NULL) return (ENODEV); return (0); } /* * Free up a layout structure and associated file layout structure(s). */ APPLESTATIC void nfscl_freelayout(struct nfscllayout *layp) { struct nfsclflayout *flp, *nflp; struct nfsclrecalllayout *rp, *nrp; LIST_FOREACH_SAFE(flp, &layp->nfsly_flayread, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(flp, &layp->nfsly_flayrw, nfsfl_list, nflp) { LIST_REMOVE(flp, nfsfl_list); nfscl_freeflayout(flp); } LIST_FOREACH_SAFE(rp, &layp->nfsly_recall, nfsrecly_list, nrp) { LIST_REMOVE(rp, nfsrecly_list); free(rp, M_NFSLAYRECALL); } nfscl_layoutcnt--; free(layp, M_NFSLAYOUT); } /* * Free up a file layout structure. */ APPLESTATIC void nfscl_freeflayout(struct nfsclflayout *flp) { int i, j; if ((flp->nfsfl_flags & NFSFL_FILE) != 0) for (i = 0; i < flp->nfsfl_fhcnt; i++) free(flp->nfsfl_fh[i], M_NFSFH); if ((flp->nfsfl_flags & NFSFL_FLEXFILE) != 0) for (i = 0; i < flp->nfsfl_mirrorcnt; i++) for (j = 0; j < flp->nfsfl_ffm[i].fhcnt; j++) free(flp->nfsfl_ffm[i].fh[j], M_NFSFH); if (flp->nfsfl_devp != NULL) flp->nfsfl_devp->nfsdi_layoutrefs--; free(flp, M_NFSFLAYOUT); } /* * Free up a file layout devinfo structure. */ APPLESTATIC void nfscl_freedevinfo(struct nfscldevinfo *dip) { free(dip, M_NFSDEVINFO); } /* * Mark any layouts that match as recalled. */ static int nfscl_layoutrecall(int recalltype, struct nfscllayout *lyp, uint32_t iomode, uint64_t off, uint64_t len, uint32_t stateseqid, struct nfsclrecalllayout *recallp) { struct nfsclrecalllayout *rp, *orp; recallp->nfsrecly_recalltype = recalltype; recallp->nfsrecly_iomode = iomode; recallp->nfsrecly_stateseqid = stateseqid; recallp->nfsrecly_off = off; recallp->nfsrecly_len = len; /* * Order the list as file returns first, followed by fsid and any * returns, both in increasing stateseqid order. * Note that the seqids wrap around, so 1 is after 0xffffffff. * (I'm not sure this is correct because I find RFC5661 confusing * on this, but hopefully it will work ok.) */ orp = NULL; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { orp = rp; if ((recalltype == NFSLAYOUTRETURN_FILE && (rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE || nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) || (recalltype != NFSLAYOUTRETURN_FILE && rp->nfsrecly_recalltype != NFSLAYOUTRETURN_FILE && nfscl_seq(stateseqid, rp->nfsrecly_stateseqid) != 0)) { LIST_INSERT_BEFORE(rp, recallp, nfsrecly_list); break; } } if (rp == NULL) { if (orp == NULL) LIST_INSERT_HEAD(&lyp->nfsly_recall, recallp, nfsrecly_list); else LIST_INSERT_AFTER(orp, recallp, nfsrecly_list); } lyp->nfsly_flags |= NFSLY_RECALL; return (0); } /* * Compare the two seqids for ordering. The trick is that the seqids can * wrap around from 0xffffffff->0, so check for the cases where one * has wrapped around. * Return 1 if seqid1 comes before seqid2, 0 otherwise. */ static int nfscl_seq(uint32_t seqid1, uint32_t seqid2) { if (seqid2 > seqid1 && (seqid2 - seqid1) >= 0x7fffffff) /* seqid2 has wrapped around. */ return (0); if (seqid1 > seqid2 && (seqid1 - seqid2) >= 0x7fffffff) /* seqid1 has wrapped around. */ return (1); if (seqid1 <= seqid2) return (1); return (0); } /* * Do a layout return for each of the recalls. */ static void nfscl_layoutreturn(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclrecalllayout *rp; nfsv4stateid_t stateid; int layouttype; NFSBCOPY(lyp->nfsly_stateid.other, stateid.other, NFSX_STATEIDOTHER); stateid.seqid = lyp->nfsly_stateid.seqid; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(rp, &lyp->nfsly_recall, nfsrecly_list) { (void)nfsrpc_layoutreturn(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, layouttype, rp->nfsrecly_iomode, rp->nfsrecly_recalltype, rp->nfsrecly_off, rp->nfsrecly_len, &stateid, cred, p, NULL); } } /* * Do the layout commit for a file layout. */ static void nfscl_dolayoutcommit(struct nfsmount *nmp, struct nfscllayout *lyp, struct ucred *cred, NFSPROC_T *p) { struct nfsclflayout *flp; uint64_t len; int error, layouttype; if ((lyp->nfsly_flags & NFSLY_FILES) != 0) layouttype = NFSLAYOUT_NFSV4_1_FILES; else layouttype = NFSLAYOUT_FLEXFILE; LIST_FOREACH(flp, &lyp->nfsly_flayrw, nfsfl_list) { if (layouttype == NFSLAYOUT_FLEXFILE && (flp->nfsfl_fflags & NFSFLEXFLAG_NO_LAYOUTCOMMIT) != 0) { NFSCL_DEBUG(4, "Flex file: no layoutcommit\n"); /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } else if (flp->nfsfl_off <= lyp->nfsly_lastbyte) { len = flp->nfsfl_end - flp->nfsfl_off; error = nfsrpc_layoutcommit(nmp, lyp->nfsly_fh, lyp->nfsly_fhlen, 0, flp->nfsfl_off, len, lyp->nfsly_lastbyte, &lyp->nfsly_stateid, layouttype, cred, p, NULL); NFSCL_DEBUG(4, "layoutcommit err=%d\n", error); if (error == NFSERR_NOTSUPP) { /* If not supported, don't bother doing it. */ NFSLOCKMNT(nmp); nmp->nm_state |= NFSSTA_NOLAYOUTCOMMIT; NFSUNLOCKMNT(nmp); break; } } } } /* * Commit all layouts for a file (vnode). */ int nfscl_layoutcommit(vnode_t vp, NFSPROC_T *p) { struct nfsclclient *clp; struct nfscllayout *lyp; struct nfsnode *np = VTONFS(vp); mount_t mp; struct nfsmount *nmp; mp = vnode_mount(vp); nmp = VFSTONFS(mp); if (NFSHASNOLAYOUTCOMMIT(nmp)) return (0); NFSLOCKCLSTATE(); clp = nmp->nm_clp; if (clp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } lyp = nfscl_findlayout(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); if (lyp == NULL) { NFSUNLOCKCLSTATE(); return (EPERM); } nfsv4_getref(&lyp->nfsly_lock, NULL, NFSCLSTATEMUTEXPTR, mp); if (NFSCL_FORCEDISM(mp)) { NFSUNLOCKCLSTATE(); return (EPERM); } tryagain: if ((lyp->nfsly_flags & NFSLY_WRITTEN) != 0) { lyp->nfsly_flags &= ~NFSLY_WRITTEN; NFSUNLOCKCLSTATE(); NFSCL_DEBUG(4, "do layoutcommit2\n"); nfscl_dolayoutcommit(clp->nfsc_nmp, lyp, NFSPROCCRED(p), p); NFSLOCKCLSTATE(); goto tryagain; } nfsv4_relref(&lyp->nfsly_lock); NFSUNLOCKCLSTATE(); return (0); } Index: head/sys/fs/nfsclient/nfs_kdtrace.h =================================================================== --- head/sys/fs/nfsclient/nfs_kdtrace.h (revision 326267) +++ head/sys/fs/nfsclient/nfs_kdtrace.h (revision 326268) @@ -1,120 +1,122 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Robert N. M. Watson * All rights reserved. * * This software was developed at the University of Cambridge Computer * Laboratory with support from a grant from Google, Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _NFSCL_NFS_KDTRACE_H_ #define _NFSCL_NFS_KDTRACE_H_ /* * Definitions for NFS access cache probes. */ extern uint32_t nfscl_accesscache_flush_done_id; extern uint32_t nfscl_accesscache_get_hit_id; extern uint32_t nfscl_accesscache_get_miss_id; extern uint32_t nfscl_accesscache_load_done_id; /* * Definitions for NFS attribute cache probes. */ extern uint32_t nfscl_attrcache_flush_done_id; extern uint32_t nfscl_attrcache_get_hit_id; extern uint32_t nfscl_attrcache_get_miss_id; extern uint32_t nfscl_attrcache_load_done_id; #ifdef KDTRACE_HOOKS #include #define KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp) do { \ if (dtrace_nfscl_accesscache_flush_done_probe != NULL) \ (dtrace_nfscl_accesscache_flush_done_probe)( \ nfscl_accesscache_flush_done_id, (vp)); \ } while (0) #define KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, uid, mode) do { \ if (dtrace_nfscl_accesscache_get_hit_probe != NULL) \ (dtrace_nfscl_accesscache_get_hit_probe)( \ nfscl_accesscache_get_hit_id, (vp), (uid), \ (mode)); \ } while (0) #define KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, uid, mode) do { \ if (dtrace_nfscl_accesscache_get_miss_probe != NULL) \ (dtrace_nfscl_accesscache_get_miss_probe)( \ nfscl_accesscache_get_miss_id, (vp), (uid), \ (mode)); \ } while (0) #define KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, uid, rmode, error) do { \ if (dtrace_nfscl_accesscache_load_done_probe != NULL) \ (dtrace_nfscl_accesscache_load_done_probe)( \ nfscl_accesscache_load_done_id, (vp), (uid), \ (rmode), (error)); \ } while (0) #define KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp) do { \ if (dtrace_nfscl_attrcache_flush_done_probe != NULL) \ (dtrace_nfscl_attrcache_flush_done_probe)( \ nfscl_attrcache_flush_done_id, (vp)); \ } while (0) #define KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap) do { \ if (dtrace_nfscl_attrcache_get_hit_probe != NULL) \ (dtrace_nfscl_attrcache_get_hit_probe)( \ nfscl_attrcache_get_hit_id, (vp), (vap)); \ } while (0) #define KDTRACE_NFS_ATTRCACHE_GET_MISS(vp) do { \ if (dtrace_nfscl_attrcache_get_miss_probe != NULL) \ (dtrace_nfscl_attrcache_get_miss_probe)( \ nfscl_attrcache_get_miss_id, (vp)); \ } while (0) #define KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error) do { \ if (dtrace_nfscl_attrcache_load_done_probe != NULL) \ (dtrace_nfscl_attrcache_load_done_probe)( \ nfscl_attrcache_load_done_id, (vp), (vap), \ (error)); \ } while (0) #else /* !KDTRACE_HOOKS */ #define KDTRACE_NFS_ACCESSCACHE_FLUSH_DONE(vp) #define KDTRACE_NFS_ACCESSCACHE_GET_HIT(vp, uid, mode) #define KDTRACE_NFS_ACCESSCACHE_GET_MISS(vp, uid, mode) #define KDTRACE_NFS_ACCESSCACHE_LOAD_DONE(vp, uid, rmode, error) #define KDTRACE_NFS_ATTRCACHE_FLUSH_DONE(vp) #define KDTRACE_NFS_ATTRCACHE_GET_HIT(vp, vap) #define KDTRACE_NFS_ATTRCACHE_GET_MISS(vp) #define KDTRACE_NFS_ATTRCACHE_LOAD_DONE(vp, vap, error) #endif /* KDTRACE_HOOKS */ #endif /* !_NFSCL_NFS_KDTRACE_H_ */ Index: head/sys/fs/nfsclient/nlminfo.h =================================================================== --- head/sys/fs/nfsclient/nlminfo.h (revision 326267) +++ head/sys/fs/nfsclient/nlminfo.h (revision 326268) @@ -1,41 +1,43 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 1998 Berkeley Software Design, Inc. All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Berkeley Software Design Inc's name may not be used to endorse or * promote products derived from this software without specific prior * written permission. * * THIS SOFTWARE IS PROVIDED BY BERKELEY SOFTWARE DESIGN INC ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL BERKELEY SOFTWARE DESIGN INC BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Misc NLM informationi, some needed for the master lockd process, and some * needed by every process doing nlm based locking. */ struct nlminfo { /* these are used by any process doing nlm locking */ int msg_seq; /* sequence counter for lock requests */ int retcode; /* return code for lock requests */ int set_getlk_pid; int getlk_pid; struct timeval pid_start; /* process starting time */ }; Index: head/sys/fs/nfsserver/nfs_fha_new.c =================================================================== --- head/sys/fs/nfsserver/nfs_fha_new.c (revision 326267) +++ head/sys/fs/nfsserver/nfs_fha_new.c (revision 326268) @@ -1,276 +1,278 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Copyright (c) 2013 Spectra Logic Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static void fhanew_init(void *foo); static void fhanew_uninit(void *foo); rpcproc_t fhanew_get_procnum(rpcproc_t procnum); int fhanew_realign(struct mbuf **mb, int malloc_flags); int fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos); int fhanew_is_read(rpcproc_t procnum); int fhanew_is_write(rpcproc_t procnum); int fhanew_get_offset(struct mbuf **md, caddr_t *dpos, int v3, struct fha_info *info); int fhanew_no_offset(rpcproc_t procnum); void fhanew_set_locktype(rpcproc_t procnum, struct fha_info *info); static int fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS); static struct fha_params fhanew_softc; SYSCTL_DECL(_vfs_nfsd); extern int newnfs_nfsv3_procid[]; extern SVCPOOL *nfsrvd_pool; SYSINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_init, NULL); SYSUNINIT(nfs_fhanew, SI_SUB_ROOT_CONF, SI_ORDER_ANY, fhanew_uninit, NULL); static void fhanew_init(void *foo) { struct fha_params *softc; softc = &fhanew_softc; bzero(softc, sizeof(*softc)); /* * Setup the callbacks for this FHA personality. */ softc->callbacks.get_procnum = fhanew_get_procnum; softc->callbacks.realign = fhanew_realign; softc->callbacks.get_fh = fhanew_get_fh; softc->callbacks.is_read = fhanew_is_read; softc->callbacks.is_write = fhanew_is_write; softc->callbacks.get_offset = fhanew_get_offset; softc->callbacks.no_offset = fhanew_no_offset; softc->callbacks.set_locktype = fhanew_set_locktype; softc->callbacks.fhe_stats_sysctl = fhenew_stats_sysctl; snprintf(softc->server_name, sizeof(softc->server_name), FHANEW_SERVER_NAME); softc->pool = &nfsrvd_pool; /* * Initialize the sysctl context list for the fha module. */ sysctl_ctx_init(&softc->sysctl_ctx); softc->sysctl_tree = SYSCTL_ADD_NODE(&softc->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_vfs_nfsd), OID_AUTO, "fha", CTLFLAG_RD, 0, "NFS File Handle Affinity (FHA)"); if (softc->sysctl_tree == NULL) { printf("%s: unable to allocate sysctl tree\n", __func__); return; } fha_init(softc); } static void fhanew_uninit(void *foo) { struct fha_params *softc; softc = &fhanew_softc; fha_uninit(softc); } rpcproc_t fhanew_get_procnum(rpcproc_t procnum) { if (procnum > NFSV2PROC_STATFS) return (-1); return (newnfs_nfsv3_procid[procnum]); } int fhanew_realign(struct mbuf **mb, int malloc_flags) { return (newnfs_realign(mb, malloc_flags)); } int fhanew_get_fh(uint64_t *fh, int v3, struct mbuf **md, caddr_t *dpos) { struct nfsrv_descript lnd, *nd; uint32_t *tl; uint8_t *buf; uint64_t t; int error, len, i; error = 0; len = 0; nd = &lnd; nd->nd_md = *md; nd->nd_dpos = *dpos; if (v3) { NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED); if ((len = fxdr_unsigned(int, *tl)) <= 0 || len > NFSX_FHMAX) { error = EBADRPC; goto nfsmout; } } else { len = NFSX_V2FH; } t = 0; if (len != 0) { NFSM_DISSECT_NONBLOCK(buf, uint8_t *, len); for (i = 0; i < len; i++) t ^= ((uint64_t)buf[i] << (i & 7) * 8); } *fh = t; nfsmout: *md = nd->nd_md; *dpos = nd->nd_dpos; return (error); } int fhanew_is_read(rpcproc_t procnum) { if (procnum == NFSPROC_READ) return (1); else return (0); } int fhanew_is_write(rpcproc_t procnum) { if (procnum == NFSPROC_WRITE) return (1); else return (0); } int fhanew_get_offset(struct mbuf **md, caddr_t *dpos, int v3, struct fha_info *info) { struct nfsrv_descript lnd, *nd; uint32_t *tl; int error; error = 0; nd = &lnd; nd->nd_md = *md; nd->nd_dpos = *dpos; if (v3) { NFSM_DISSECT_NONBLOCK(tl, uint32_t *, 2 * NFSX_UNSIGNED); info->offset = fxdr_hyper(tl); } else { NFSM_DISSECT_NONBLOCK(tl, uint32_t *, NFSX_UNSIGNED); info->offset = fxdr_unsigned(uint32_t, *tl); } nfsmout: *md = nd->nd_md; *dpos = nd->nd_dpos; return (error); } int fhanew_no_offset(rpcproc_t procnum) { if (procnum == NFSPROC_FSSTAT || procnum == NFSPROC_FSINFO || procnum == NFSPROC_PATHCONF || procnum == NFSPROC_NOOP || procnum == NFSPROC_NULL) return (1); else return (0); } void fhanew_set_locktype(rpcproc_t procnum, struct fha_info *info) { switch (procnum) { case NFSPROC_NULL: case NFSPROC_GETATTR: case NFSPROC_LOOKUP: case NFSPROC_ACCESS: case NFSPROC_READLINK: case NFSPROC_READ: case NFSPROC_READDIR: case NFSPROC_READDIRPLUS: case NFSPROC_WRITE: info->locktype = LK_SHARED; break; case NFSPROC_SETATTR: case NFSPROC_CREATE: case NFSPROC_MKDIR: case NFSPROC_SYMLINK: case NFSPROC_MKNOD: case NFSPROC_REMOVE: case NFSPROC_RMDIR: case NFSPROC_RENAME: case NFSPROC_LINK: case NFSPROC_FSSTAT: case NFSPROC_FSINFO: case NFSPROC_PATHCONF: case NFSPROC_COMMIT: case NFSPROC_NOOP: info->locktype = LK_EXCLUSIVE; break; } } static int fhenew_stats_sysctl(SYSCTL_HANDLER_ARGS) { return (fhe_stats_sysctl(oidp, arg1, arg2, req, &fhanew_softc)); } SVCTHREAD * fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req) { return (fha_assign(this_thread, req, &fhanew_softc)); } Index: head/sys/fs/nfsserver/nfs_fha_new.h =================================================================== --- head/sys/fs/nfsserver/nfs_fha_new.h (revision 326267) +++ head/sys/fs/nfsserver/nfs_fha_new.h (revision 326268) @@ -1,39 +1,41 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2008 Isilon Inc http://www.isilon.com/ * Copyright (c) 2013 Spectra Logic Corporation * * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* $FreeBSD$ */ #ifndef _NFS_FHA_NEW_H #define _NFS_FHA_NEW_H 1 #ifdef _KERNEL #define FHANEW_SERVER_NAME "nfsd" SVCTHREAD *fhanew_assign(SVCTHREAD *this_thread, struct svc_req *req); #endif /* _KERNEL */ #endif /* _NFS_FHA_NEW_H */ Index: head/sys/fs/nfsserver/nfs_nfsdstate.c =================================================================== --- head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 326267) +++ head/sys/fs/nfsserver/nfs_nfsdstate.c (revision 326268) @@ -1,6130 +1,6132 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2009 Rick Macklem, University of Guelph * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #ifndef APPLEKEXT #include struct nfsrv_stablefirst nfsrv_stablefirst; int nfsrv_issuedelegs = 0; int nfsrv_dolocallocks = 0; struct nfsv4lock nfsv4rootfs_lock; extern int newnfs_numnfsd; extern struct nfsstatsv1 nfsstatsv1; extern int nfsrv_lease; extern struct timeval nfsboottime; extern u_int32_t newnfs_true, newnfs_false; NFSV4ROOTLOCKMUTEX; NFSSTATESPINLOCK; SYSCTL_DECL(_vfs_nfsd); int nfsrv_statehashsize = NFSSTATEHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, statehashsize, CTLFLAG_RDTUN, &nfsrv_statehashsize, 0, "Size of state hash table set via loader.conf"); int nfsrv_clienthashsize = NFSCLIENTHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, clienthashsize, CTLFLAG_RDTUN, &nfsrv_clienthashsize, 0, "Size of client hash table set via loader.conf"); int nfsrv_lockhashsize = NFSLOCKHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, fhhashsize, CTLFLAG_RDTUN, &nfsrv_lockhashsize, 0, "Size of file handle hash table set via loader.conf"); int nfsrv_sessionhashsize = NFSSESSIONHASHSIZE; SYSCTL_INT(_vfs_nfsd, OID_AUTO, sessionhashsize, CTLFLAG_RDTUN, &nfsrv_sessionhashsize, 0, "Size of session hash table set via loader.conf"); static int nfsrv_v4statelimit = NFSRV_V4STATELIMIT; SYSCTL_INT(_vfs_nfsd, OID_AUTO, v4statelimit, CTLFLAG_RWTUN, &nfsrv_v4statelimit, 0, "High water limit for NFSv4 opens+locks+delegations"); static int nfsrv_writedelegifpos = 0; SYSCTL_INT(_vfs_nfsd, OID_AUTO, writedelegifpos, CTLFLAG_RW, &nfsrv_writedelegifpos, 0, "Issue a write delegation for read opens if possible"); static int nfsrv_allowreadforwriteopen = 1; SYSCTL_INT(_vfs_nfsd, OID_AUTO, allowreadforwriteopen, CTLFLAG_RW, &nfsrv_allowreadforwriteopen, 0, "Allow Reads to be done with Write Access StateIDs"); /* * Hash lists for nfs V4. */ struct nfsclienthashhead *nfsclienthash; struct nfslockhashhead *nfslockhash; struct nfssessionhash *nfssessionhash; #endif /* !APPLEKEXT */ static u_int32_t nfsrv_openpluslock = 0, nfsrv_delegatecnt = 0; static time_t nfsrvboottime; static int nfsrv_returnoldstateid = 0, nfsrv_clients = 0; static int nfsrv_clienthighwater = NFSRV_CLIENTHIGHWATER; static int nfsrv_nogsscallback = 0; /* local functions */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp); static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p); static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p); static void nfsrv_freenfslock(struct nfslock *lop); static void nfsrv_freenfslockfile(struct nfslockfile *lfp); static void nfsrv_freedeleg(struct nfsstate *); static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, u_int32_t flags, struct nfsstate **stpp); static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp); static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p); static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit); static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp); static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp); static int nfsrv_getipnumber(u_char *cp); static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid); static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags); static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p); static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp); static u_int32_t nfsrv_nextclientindex(void); static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp); static void nfsrv_markstable(struct nfsclient *clp); static int nfsrv_checkstable(struct nfsclient *clp); static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, struct vnode *vp, NFSPROC_T *p); static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp); static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p); static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp); static time_t nfsrv_leaseexpiry(void); static void nfsrv_delaydelegtimeout(struct nfsstate *stp); static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op); static int nfsrv_nootherstate(struct nfsstate *stp); static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p); static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p); static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p); static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end); static void nfsrv_locklf(struct nfslockfile *lfp); static void nfsrv_unlocklf(struct nfslockfile *lfp); static struct nfsdsession *nfsrv_findsession(uint8_t *sessionid); static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid); static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp); static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp); /* * Scan the client list for a match and either return the current one, * create a new entry or return an error. * If returning a non-error, the clp structure must either be linked into * the client list or free'd. */ APPLESTATIC int nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, nfsquad_t *clientidp, nfsquad_t *confirmp, NFSPROC_T *p) { struct nfsclient *clp = NULL, *new_clp = *new_clpp; int i, error = 0; struct nfsstate *stp, *tstp; struct sockaddr_in *sad, *rad; int zapit = 0, gotit, hasstate = 0, igotlock; static u_int64_t confirm_index = 0; /* * Check for state resource limit exceeded. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } if (nfsrv_issuedelegs == 0 || ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* * Don't do callbacks when delegations are disabled or * for AUTH_GSS unless enabled via nfsrv_nogsscallback. * If establishing a callback connection is attempted * when a firewall is blocking the callback path, the * server may wait too long for the connect attempt to * succeed during the Open. Some clients, such as Linux, * may timeout and give up on the Open before the server * replies. Also, since AUTH_GSS callbacks are not * yet interoperability tested, they might cause the * server to crap out, if they get past the Init call to * the client. */ new_clp->lc_program = 0; /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (new_clp->lc_idlen == clp->lc_idlen && !NFSBCMP(new_clp->lc_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } if (gotit == 0) i++; } if (!gotit || (clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_ADMINREVOKED))) { if ((nd->nd_flag & ND_NFSV41) != 0 && confirmp->lval[1] != 0) { /* * For NFSv4.1, if confirmp->lval[1] is non-zero, the * client is trying to update a confirmed clientid. */ NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); confirmp->lval[1] = 0; error = NFSERR_NOENT; goto out; } /* * Get rid of the old one. */ if (i != nfsrv_clienthashsize) { LIST_REMOVE(clp, lc_hash); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); zapit = 1; } /* * Add it after assigning a client id to it. */ new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = (u_int32_t)nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); LIST_INIT(&new_clp->lc_open); LIST_INIT(&new_clp->lc_deleg); LIST_INIT(&new_clp->lc_olddeleg); LIST_INIT(&new_clp->lc_session); for (i = 0; i < nfsrv_statehashsize; i++) LIST_INIT(&new_clp->lc_stateid[i]); LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if (zapit) nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* * Now, handle the cases where the id is already issued. */ if (nfsrv_notsamecredname(nd, clp)) { /* * Check to see if there is expired state that should go away. */ if (clp->lc_expiry < NFSD_MONOSEC && (!LIST_EMPTY(&clp->lc_open) || !LIST_EMPTY(&clp->lc_deleg))) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); } /* * If there is outstanding state, then reply NFSERR_CLIDINUSE per * RFC3530 Sec. 8.1.2 last para. */ if (!LIST_EMPTY(&clp->lc_deleg)) { hasstate = 1; } else if (LIST_EMPTY(&clp->lc_open)) { hasstate = 0; } else { hasstate = 0; /* Look for an Open on the OpenOwner */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { if (!LIST_EMPTY(&stp->ls_open)) { hasstate = 1; break; } } } if (hasstate) { /* * If the uid doesn't match, return NFSERR_CLIDINUSE after * filling out the correct ipaddr and portnum. */ sad = NFSSOCKADDR(new_clp->lc_req.nr_nam, struct sockaddr_in *); rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); sad->sin_addr.s_addr = rad->sin_addr.s_addr; sad->sin_port = rad->sin_port; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIDINUSE; goto out; } } if (NFSBCMP(new_clp->lc_verf, clp->lc_verf, NFSX_VERF)) { /* * If the verifier has changed, the client has rebooted * and a new client id is issued. The old state info * can be thrown away once the SETCLIENTID_CONFIRM occurs. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= LCL_NEEDSCONFIRM; if ((nd->nd_flag & ND_NFSV41) != 0) new_clp->lc_confirm.lval[0] = confirmp->lval[0] = ++confirm_index; else confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = nfsrvboottime; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = nfsrv_nextclientindex(); new_clp->lc_stateindex = 0; new_clp->lc_statemaxindex = 0; new_clp->lc_cbref = 0; new_clp->lc_expiry = nfsrv_leaseexpiry(); /* * Save the state until confirmed. */ LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsd clp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; goto out; } /* For NFSv4.1, mark that we found a confirmed clientid. */ if ((nd->nd_flag & ND_NFSV41) != 0) { clientidp->lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = clp->lc_clientid.lval[1]; confirmp->lval[0] = 0; /* Ignored by client */ confirmp->lval[1] = 1; } else { /* * id and verifier match, so update the net address info * and get rid of any existing callback authentication * handle, so a new one will be acquired. */ LIST_REMOVE(clp, lc_hash); new_clp->lc_flags |= (LCL_NEEDSCONFIRM | LCL_DONTCLEAN); new_clp->lc_expiry = nfsrv_leaseexpiry(); confirmp->qval = new_clp->lc_confirm.qval = ++confirm_index; clientidp->lval[0] = new_clp->lc_clientid.lval[0] = clp->lc_clientid.lval[0]; clientidp->lval[1] = new_clp->lc_clientid.lval[1] = clp->lc_clientid.lval[1]; new_clp->lc_delegtime = clp->lc_delegtime; new_clp->lc_stateindex = clp->lc_stateindex; new_clp->lc_statemaxindex = clp->lc_statemaxindex; new_clp->lc_cbref = 0; LIST_NEWHEAD(&new_clp->lc_open, &clp->lc_open, ls_list); LIST_FOREACH(tstp, &new_clp->lc_open, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_deleg, &clp->lc_deleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_deleg, ls_list) tstp->ls_clp = new_clp; LIST_NEWHEAD(&new_clp->lc_olddeleg, &clp->lc_olddeleg, ls_list); LIST_FOREACH(tstp, &new_clp->lc_olddeleg, ls_list) tstp->ls_clp = new_clp; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_NEWHEAD(&new_clp->lc_stateid[i], &clp->lc_stateid[i], ls_hash); LIST_FOREACH(tstp, &new_clp->lc_stateid[i], ls_hash) tstp->ls_clp = new_clp; } LIST_INSERT_HEAD(NFSCLIENTHASH(new_clp->lc_clientid), new_clp, lc_hash); nfsstatsv1.srvclients++; nfsrv_openpluslock++; nfsrv_clients++; } NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); if ((nd->nd_flag & ND_NFSV41) == 0) { /* * Must wait until any outstanding callback on the old clp * completes. */ NFSLOCKSTATE(); while (clp->lc_cbref) { clp->lc_flags |= LCL_WAKEUPWANTED; (void)mtx_sleep(clp, NFSSTATEMUTEXPTR, PZERO - 1, "nfsdclp", 10 * hz); } NFSUNLOCKSTATE(); nfsrv_zapclient(clp, p); *new_clpp = NULL; } out: NFSEXITCODE2(error, nd); return (error); } /* * Check to see if the client id exists and optionally confirm it. */ APPLESTATIC int nfsrv_getclient(nfsquad_t clientid, int opflags, struct nfsclient **clpp, struct nfsdsession *nsep, nfsquad_t confirm, uint32_t cbprogram, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int i; struct nfsclienthashhead *hp; int error = 0, igotlock, doneok; struct nfssessionhash *shp; struct nfsdsession *sep; uint64_t sessid[2]; static uint64_t next_sess = 0; if (clpp) *clpp = NULL; if ((nd == NULL || (nd->nd_flag & ND_NFSV41) == 0 || opflags != CLOPS_RENEW) && nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* * If called with opflags == CLOPS_RENEW, the State Lock is * already held. Otherwise, we need to get either that or, * for the case of Confirm, lock out the nfsd threads. */ if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); /* * Create a new sessionid here, since we need to do it where * there is a mutex held to serialize update of next_sess. */ if ((nd->nd_flag & ND_NFSV41) != 0) { sessid[0] = ++next_sess; sessid[1] = clientid.qval; } NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSLOCKSTATE(); } /* For NFSv4.1, the clp is acquired from the associated session. */ if (nd != NULL && (nd->nd_flag & ND_NFSV41) != 0 && opflags == CLOPS_RENEW) { clp = NULL; if ((nd->nd_flag & ND_HASSEQUENCE) != 0) { shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep != NULL) clp = sep->sess_clp; NFSUNLOCKSESSION(shp); } } else { hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } } if (clp == NULL) { if (opflags & CLOPS_CONFIRM) error = NFSERR_STALECLIENTID; else error = NFSERR_EXPIRED; } else if (clp->lc_flags & LCL_ADMINREVOKED) { /* * If marked admin revoked, just return the error. */ error = NFSERR_ADMINREVOKED; } if (error) { if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } goto out; } /* * Perform any operations specified by the opflags. */ if (opflags & CLOPS_CONFIRM) { if (((nd->nd_flag & ND_NFSV41) != 0 && clp->lc_confirm.lval[0] != confirm.lval[0]) || ((nd->nd_flag & ND_NFSV41) == 0 && clp->lc_confirm.qval != confirm.qval)) error = NFSERR_STALECLIENTID; else if (nfsrv_notsamecredname(nd, clp)) error = NFSERR_CLIDINUSE; if (!error) { if ((clp->lc_flags & (LCL_NEEDSCONFIRM | LCL_DONTCLEAN)) == LCL_NEEDSCONFIRM) { /* * Hang onto the delegations (as old delegations) * for an Open with CLAIM_DELEGATE_PREV unless in * grace, but get rid of the rest of the state. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_olddeleg); if (nfsrv_checkgrace(nd, clp, 0)) { /* In grace, so just delete delegations */ nfsrv_freedeleglist(&clp->lc_deleg); } else { LIST_FOREACH(stp, &clp->lc_deleg, ls_list) stp->ls_flags |= NFSLCK_OLDDELEG; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; LIST_NEWHEAD(&clp->lc_olddeleg, &clp->lc_deleg, ls_list); } if ((nd->nd_flag & ND_NFSV41) != 0) clp->lc_program = cbprogram; } clp->lc_flags &= ~(LCL_NEEDSCONFIRM | LCL_DONTCLEAN); if (clp->lc_program) clp->lc_flags |= LCL_NEEDSCBNULL; /* For NFSv4.1, link the session onto the client. */ if (nsep != NULL) { /* Hold a reference on the xprt for a backchannel. */ if ((nsep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0 && clp->lc_req.nr_client == NULL) { clp->lc_req.nr_client = (struct __rpc_client *) clnt_bck_create(nd->nd_xprt->xp_socket, cbprogram, NFSV4_CBVERS); if (clp->lc_req.nr_client != NULL) { SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = clp->lc_req.nr_client->cl_private; /* Disable idle timeout. */ nd->nd_xprt->xp_idletimeout = 0; nsep->sess_cbsess.nfsess_xprt = nd->nd_xprt; } else nsep->sess_crflags &= ~NFSV4CRSESS_CONNBACKCHAN; } NFSBCOPY(sessid, nsep->sess_sessionid, NFSX_V4SESSIONID); NFSBCOPY(sessid, nsep->sess_cbsess.nfsess_sessionid, NFSX_V4SESSIONID); shp = NFSSESSIONHASH(nsep->sess_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); LIST_INSERT_HEAD(&shp->list, nsep, sess_hash); LIST_INSERT_HEAD(&clp->lc_session, nsep, sess_list); nsep->sess_clp = clp; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); } } } else if (clp->lc_flags & LCL_NEEDSCONFIRM) { error = NFSERR_EXPIRED; } /* * If called by the Renew Op, we must check the principal. */ if (!error && (opflags & CLOPS_RENEWOP)) { if (nfsrv_notsamecredname(nd, clp)) { doneok = 0; for (i = 0; i < nfsrv_statehashsize && doneok == 0; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if ((stp->ls_flags & NFSLCK_OPEN) && stp->ls_uid == nd->nd_cred->cr_uid) { doneok = 1; break; } } } if (!doneok) error = NFSERR_ACCES; } if (!error && (clp->lc_flags & LCL_CBDOWN)) error = NFSERR_CBPATHDOWN; } if ((!error || error == NFSERR_CBPATHDOWN) && (opflags & CLOPS_RENEW)) { clp->lc_expiry = nfsrv_leaseexpiry(); } if (opflags & CLOPS_CONFIRM) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } else if (opflags != CLOPS_RENEW) { NFSUNLOCKSTATE(); } if (clpp) *clpp = clp; out: NFSEXITCODE2(error, nd); return (error); } /* * Perform the NFSv4.1 destroy clientid. */ int nfsrv_destroyclient(nfsquad_t clientid, NFSPROC_T *p) { struct nfsclient *clp; struct nfsclienthashhead *hp; int error = 0, i, igotlock; if (nfsrvboottime != clientid.lval[0]) { error = NFSERR_STALECLIENTID; goto out; } /* Lock out other nfsd threads */ NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (igotlock == 0); NFSUNLOCKV4ROOTMUTEX(); hp = NFSCLIENTHASH(clientid); LIST_FOREACH(clp, hp, lc_hash) { if (clp->lc_clientid.lval[1] == clientid.lval[1]) break; } if (clp == NULL) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); /* Just return ok, since it is gone. */ goto out; } /* Scan for state on the clientid. */ for (i = 0; i < nfsrv_statehashsize; i++) if (!LIST_EMPTY(&clp->lc_stateid[i])) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } if (!LIST_EMPTY(&clp->lc_session) || !LIST_EMPTY(&clp->lc_deleg)) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_CLIENTIDBUSY; goto out; } /* Destroy the clientid and return ok. */ nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); nfsrv_zapclient(clp, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Called from the new nfssvc syscall to admin revoke a clientid. * Returns 0 for success, error otherwise. */ APPLESTATIC int nfsrv_adminrevoke(struct nfsd_clid *revokep, NFSPROC_T *p) { struct nfsclient *clp = NULL; int i, error = 0; int gotit, igotlock; /* * First, lock out the nfsd so that state won't change while the * revocation record is being written to the stable storage restart * file. */ NFSLOCKV4ROOTMUTEX(); do { igotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!igotlock); NFSUNLOCKV4ROOTMUTEX(); /* * Search for a match in the client list. */ gotit = i = 0; while (i < nfsrv_clienthashsize && !gotit) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { if (revokep->nclid_idlen == clp->lc_idlen && !NFSBCMP(revokep->nclid_id, clp->lc_id, clp->lc_idlen)) { gotit = 1; break; } } i++; } if (!gotit) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); error = EPERM; goto out; } /* * Now, write out the revocation record */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); /* * and clear out the state, marking the clientid revoked. */ clp->lc_flags &= ~LCL_CALLBACKSON; clp->lc_flags |= LCL_ADMINREVOKED; nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 0); NFSUNLOCKV4ROOTMUTEX(); out: NFSEXITCODE(error); return (error); } /* * Dump out stats for all clients. Called from nfssvc(2), that is used * nfsstatsv1. */ APPLESTATIC void nfsrv_dumpclients(struct nfsd_dumpclients *dumpp, int maxcnt) { struct nfsclient *clp; int i = 0, cnt = 0; /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired while dumping the clients. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); /* * Rattle through the client lists until done. */ while (i < nfsrv_clienthashsize && cnt < maxcnt) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i]) && cnt < maxcnt) { nfsrv_dumpaclient(clp, &dumpp[cnt]); cnt++; clp = LIST_NEXT(clp, lc_hash); } i++; } if (cnt < maxcnt) dumpp[cnt].ndcl_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Dump stats for a client. Must be called with the NFSSTATELOCK and spl'd. */ static void nfsrv_dumpaclient(struct nfsclient *clp, struct nfsd_dumpclients *dumpp) { struct nfsstate *stp, *openstp, *lckownstp; struct nfslock *lop; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; dumpp->ndcl_nopenowners = dumpp->ndcl_nlockowners = 0; dumpp->ndcl_nopens = dumpp->ndcl_nlocks = 0; dumpp->ndcl_ndelegs = dumpp->ndcl_nolddelegs = 0; dumpp->ndcl_flags = clp->lc_flags; dumpp->ndcl_clid.nclid_idlen = clp->lc_idlen; NFSBCOPY(clp->lc_id, dumpp->ndcl_clid.nclid_id, clp->lc_idlen); sad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr *); dumpp->ndcl_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; dumpp->ndcl_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; dumpp->ndcl_cbaddr.sin6_addr = rad6->sin6_addr; } /* * Now, scan the state lists and total up the opens and locks. */ LIST_FOREACH(stp, &clp->lc_open, ls_list) { dumpp->ndcl_nopenowners++; LIST_FOREACH(openstp, &stp->ls_open, ls_list) { dumpp->ndcl_nopens++; LIST_FOREACH(lckownstp, &openstp->ls_open, ls_list) { dumpp->ndcl_nlockowners++; LIST_FOREACH(lop, &lckownstp->ls_lock, lo_lckowner) { dumpp->ndcl_nlocks++; } } } } /* * and the delegation lists. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { dumpp->ndcl_ndelegs++; } LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { dumpp->ndcl_nolddelegs++; } } /* * Dump out lock stats for a file. */ APPLESTATIC void nfsrv_dumplocks(vnode_t vp, struct nfsd_dumplocks *ldumpp, int maxcnt, NFSPROC_T *p) { struct nfsstate *stp; struct nfslock *lop; int cnt = 0; struct nfslockfile *lfp; struct sockaddr *sad; struct sockaddr_in *rad; struct sockaddr_in6 *rad6; int ret; fhandle_t nfh; ret = nfsrv_getlockfh(vp, 0, NULL, &nfh, p); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock on it cannot be acquired while dumping the locks. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); NFSLOCKSTATE(); if (!ret) ret = nfsrv_getlockfile(0, NULL, &lfp, &nfh, 0); if (ret) { ldumpp[0].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); return; } /* * For each open share on file, dump it out. */ stp = LIST_FIRST(&lfp->lf_open); while (stp != LIST_END(&lfp->lf_open) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_openowner->ls_ownerlen; NFSBCOPY(stp->ls_openowner->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_openowner->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * and all locks. */ lop = LIST_FIRST(&lfp->lf_lock); while (lop != LIST_END(&lfp->lf_lock) && cnt < maxcnt) { stp = lop->lo_stp; ldumpp[cnt].ndlck_flags = lop->lo_flags; ldumpp[cnt].ndlck_first = lop->lo_first; ldumpp[cnt].ndlck_end = lop->lo_end; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = stp->ls_ownerlen; NFSBCOPY(stp->ls_owner, ldumpp[cnt].ndlck_owner.nclid_id, stp->ls_ownerlen); ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } lop = LIST_NEXT(lop, lo_lckfile); cnt++; } /* * and the delegations. */ stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg) && cnt < maxcnt) { ldumpp[cnt].ndlck_flags = stp->ls_flags; ldumpp[cnt].ndlck_stateid.seqid = stp->ls_stateid.seqid; ldumpp[cnt].ndlck_stateid.other[0] = stp->ls_stateid.other[0]; ldumpp[cnt].ndlck_stateid.other[1] = stp->ls_stateid.other[1]; ldumpp[cnt].ndlck_stateid.other[2] = stp->ls_stateid.other[2]; ldumpp[cnt].ndlck_owner.nclid_idlen = 0; ldumpp[cnt].ndlck_clid.nclid_idlen = stp->ls_clp->lc_idlen; NFSBCOPY(stp->ls_clp->lc_id, ldumpp[cnt].ndlck_clid.nclid_id, stp->ls_clp->lc_idlen); sad=NFSSOCKADDR(stp->ls_clp->lc_req.nr_nam, struct sockaddr *); ldumpp[cnt].ndlck_addrfam = sad->sa_family; if (sad->sa_family == AF_INET) { rad = (struct sockaddr_in *)sad; ldumpp[cnt].ndlck_cbaddr.sin_addr = rad->sin_addr; } else { rad6 = (struct sockaddr_in6 *)sad; ldumpp[cnt].ndlck_cbaddr.sin6_addr = rad6->sin6_addr; } stp = LIST_NEXT(stp, ls_file); cnt++; } /* * If list isn't full, mark end of list by setting the client name * to zero length. */ if (cnt < maxcnt) ldumpp[cnt].ndlck_clid.nclid_idlen = 0; NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * Server timer routine. It can scan any linked list, so long * as it holds the spin/mutex lock and there is no exclusive lock on * nfsv4rootfs_lock. * (For OpenBSD, a kthread is ok. For FreeBSD, I think it is ok * to do this from a callout, since the spin locks work. For * Darwin, I'm not sure what will work correctly yet.) * Should be called once per second. */ APPLESTATIC void nfsrv_servertimer(void) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int got_ref, i; /* * Make sure nfsboottime is set. This is used by V3 as well * as V4. Note that nfsboottime is not nfsrvboottime, which is * only used by the V4 server for leases. */ if (nfsboottime.tv_sec == 0) NFSSETBOOTTIME(nfsboottime); /* * If server hasn't started yet, just return. */ NFSLOCKSTATE(); if (nfsrv_stablefirst.nsf_eograce == 0) { NFSUNLOCKSTATE(); return; } if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) { if (!(nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) && NFSD_MONOSEC > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_flags |= (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); NFSUNLOCKSTATE(); return; } /* * Try and get a reference count on the nfsv4rootfs_lock so that * no nfsd thread can acquire an exclusive lock on it before this * call is done. If it is already exclusively locked, just return. */ NFSLOCKV4ROOTMUTEX(); got_ref = nfsv4_getref_nonblock(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); if (got_ref == 0) { NFSUNLOCKSTATE(); return; } /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { clp = LIST_FIRST(&nfsclienthash[i]); while (clp != LIST_END(&nfsclienthash[i])) { nclp = LIST_NEXT(clp, lc_hash); if (!(clp->lc_flags & LCL_EXPIREIT)) { if (((clp->lc_expiry + NFSRV_STALELEASE) < NFSD_MONOSEC && ((LIST_EMPTY(&clp->lc_deleg) && LIST_EMPTY(&clp->lc_open)) || nfsrv_clients > nfsrv_clienthighwater)) || (clp->lc_expiry + NFSRV_MOULDYLEASE) < NFSD_MONOSEC || (clp->lc_expiry < NFSD_MONOSEC && (nfsrv_openpluslock * 10 / 9) > nfsrv_v4statelimit)) { /* * Lease has expired several nfsrv_lease times ago: * PLUS * - no state is associated with it * OR * - above high water mark for number of clients * (nfsrv_clienthighwater should be large enough * that this only occurs when clients fail to * use the same nfs_client_id4.id. Maybe somewhat * higher that the maximum number of clients that * will mount this server?) * OR * Lease has expired a very long time ago * OR * Lease has expired PLUS the number of opens + locks * has exceeded 90% of capacity * * --> Mark for expiry. The actual expiry will be done * by an nfsd sometime soon. */ clp->lc_flags |= LCL_EXPIREIT; nfsrv_stablefirst.nsf_flags |= (NFSNSF_NEEDLOCK | NFSNSF_EXPIREDCLIENT); } else { /* * If there are no opens, increment no open tick cnt * If time exceeds NFSNOOPEN, mark it to be thrown away * otherwise, if there is an open, reset no open time * Hopefully, this will avoid excessive re-creation * of open owners and subsequent open confirms. */ stp = LIST_FIRST(&clp->lc_open); while (stp != LIST_END(&clp->lc_open)) { nstp = LIST_NEXT(stp, ls_list); if (LIST_EMPTY(&stp->ls_open)) { stp->ls_noopens++; if (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit) nfsrv_stablefirst.nsf_flags |= NFSNSF_NOOPENS; } else { stp->ls_noopens = 0; } stp = nstp; } } } clp = nclp; } } NFSUNLOCKSTATE(); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } /* * The following set of functions free up the various data structures. */ /* * Clear out all open/lock state related to this nfsclient. * Caller must hold an exclusive lock on nfsv4rootfs_lock, so that * there are no other active nfsd threads. */ APPLESTATIC void nfsrv_cleanclient(struct nfsclient *clp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; struct nfsdsession *sep, *nsep; LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) nfsrv_freeopenowner(stp, 1, p); if ((clp->lc_flags & LCL_ADMINREVOKED) == 0) LIST_FOREACH_SAFE(sep, &clp->lc_session, sess_list, nsep) (void)nfsrv_freesession(sep, NULL); } /* * Free a client that has been cleaned. It should also already have been * removed from the lists. * (Just to be safe w.r.t. newnfs_disconnect(), call this function when * softclock interrupts are enabled.) */ APPLESTATIC void nfsrv_zapclient(struct nfsclient *clp, NFSPROC_T *p) { #ifdef notyet if ((clp->lc_flags & (LCL_GSS | LCL_CALLBACKSON)) == (LCL_GSS | LCL_CALLBACKSON) && (clp->lc_hand.nfsh_flag & NFSG_COMPLETE) && clp->lc_handlelen > 0) { clp->lc_hand.nfsh_flag &= ~NFSG_COMPLETE; clp->lc_hand.nfsh_flag |= NFSG_DESTROYED; (void) nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); } #endif newnfs_disconnect(&clp->lc_req); NFSSOCKADDRFREE(clp->lc_req.nr_nam); NFSFREEMUTEX(&clp->lc_req.nr_mtx); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); NFSLOCKSTATE(); nfsstatsv1.srvclients--; nfsrv_openpluslock--; nfsrv_clients--; NFSUNLOCKSTATE(); } /* * Free a list of delegation state structures. * (This function will also free all nfslockfile structures that no * longer have associated state.) */ APPLESTATIC void nfsrv_freedeleglist(struct nfsstatehead *sthp) { struct nfsstate *stp, *nstp; LIST_FOREACH_SAFE(stp, sthp, ls_list, nstp) { nfsrv_freedeleg(stp); } LIST_INIT(sthp); } /* * Free up a delegation. */ static void nfsrv_freedeleg(struct nfsstate *stp) { struct nfslockfile *lfp; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; if (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && nfsv4_testlock(&lfp->lf_locallock_lck) == 0) nfsrv_freenfslockfile(lfp); FREE((caddr_t)stp, M_NFSDSTATE); nfsstatsv1.srvdelegates--; nfsrv_openpluslock--; nfsrv_delegatecnt--; } /* * This function frees an open owner and all associated opens. */ static void nfsrv_freeopenowner(struct nfsstate *stp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; LIST_REMOVE(stp, ls_list); /* * Now, free all associated opens. */ nstp = LIST_FIRST(&stp->ls_open); while (nstp != LIST_END(&stp->ls_open)) { tstp = nstp; nstp = LIST_NEXT(nstp, ls_list); (void) nfsrv_freeopen(tstp, NULL, cansleep, p); } if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); nfsstatsv1.srvopenowners--; nfsrv_openpluslock--; } /* * This function frees an open (nfsstate open structure) with all associated * lock_owners and locks. It also frees the nfslockfile structure iff there * are no other opens on the file. * Returns 1 if it free'd the nfslockfile, 0 otherwise. */ static int nfsrv_freeopen(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfsstate *nstp, *tstp; struct nfslockfile *lfp; int ret; LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_file); lfp = stp->ls_lfp; /* * Now, free all lockowners associated with this open. */ LIST_FOREACH_SAFE(tstp, &stp->ls_open, ls_list, nstp) nfsrv_freelockowner(tstp, vp, cansleep, p); /* * The nfslockfile is freed here if there are no locks * associated with the open. * If there are locks associated with the open, the * nfslockfile structure can be freed via nfsrv_freelockowner(). * Acquire the state mutex to avoid races with calls to * nfsrv_getlockfile(). */ if (cansleep != 0) NFSLOCKSTATE(); if (lfp != NULL && LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_deleg) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_locallock) && LIST_EMPTY(&lfp->lf_rollback) && lfp->lf_usecount == 0 && (cansleep != 0 || nfsv4_testlock(&lfp->lf_locallock_lck) == 0)) { nfsrv_freenfslockfile(lfp); ret = 1; } else ret = 0; if (cansleep != 0) NFSUNLOCKSTATE(); FREE((caddr_t)stp, M_NFSDSTATE); nfsstatsv1.srvopens--; nfsrv_openpluslock--; return (ret); } /* * Frees a lockowner and all associated locks. */ static void nfsrv_freelockowner(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { LIST_REMOVE(stp, ls_hash); LIST_REMOVE(stp, ls_list); nfsrv_freeallnfslocks(stp, vp, cansleep, p); if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); FREE((caddr_t)stp, M_NFSDSTATE); nfsstatsv1.srvlockowners--; nfsrv_openpluslock--; } /* * Free all the nfs locks on a lockowner. */ static void nfsrv_freeallnfslocks(struct nfsstate *stp, vnode_t vp, int cansleep, NFSPROC_T *p) { struct nfslock *lop, *nlop; struct nfsrollback *rlp, *nrlp; struct nfslockfile *lfp = NULL; int gottvp = 0; vnode_t tvp = NULL; uint64_t first, end; if (vp != NULL) ASSERT_VOP_UNLOCKED(vp, "nfsrv_freeallnfslocks: vnode locked"); lop = LIST_FIRST(&stp->ls_lock); while (lop != LIST_END(&stp->ls_lock)) { nlop = LIST_NEXT(lop, lo_lckowner); /* * Since all locks should be for the same file, lfp should * not change. */ if (lfp == NULL) lfp = lop->lo_lfp; else if (lfp != lop->lo_lfp) panic("allnfslocks"); /* * If vp is NULL and cansleep != 0, a vnode must be acquired * from the file handle. This only occurs when called from * nfsrv_cleanclient(). */ if (gottvp == 0) { if (nfsrv_dolocallocks == 0) tvp = NULL; else if (vp == NULL && cansleep != 0) { tvp = nfsvno_getvp(&lfp->lf_fh); NFSVOPUNLOCK(tvp, 0); } else tvp = vp; gottvp = 1; } if (tvp != NULL) { if (cansleep == 0) panic("allnfs2"); first = lop->lo_first; end = lop->lo_end; nfsrv_freenfslock(lop); nfsrv_localunlock(tvp, lfp, first, end, p); LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } else nfsrv_freenfslock(lop); lop = nlop; } if (vp == NULL && tvp != NULL) vrele(tvp); } /* * Free an nfslock structure. */ static void nfsrv_freenfslock(struct nfslock *lop) { if (lop->lo_lckfile.le_prev != NULL) { LIST_REMOVE(lop, lo_lckfile); nfsstatsv1.srvlocks--; nfsrv_openpluslock--; } LIST_REMOVE(lop, lo_lckowner); FREE((caddr_t)lop, M_NFSDLOCK); } /* * This function frees an nfslockfile structure. */ static void nfsrv_freenfslockfile(struct nfslockfile *lfp) { LIST_REMOVE(lfp, lf_hash); FREE((caddr_t)lfp, M_NFSDLOCKFILE); } /* * This function looks up an nfsstate structure via stateid. */ static int nfsrv_getstate(struct nfsclient *clp, nfsv4stateid_t *stateidp, __unused u_int32_t flags, struct nfsstate **stpp) { struct nfsstate *stp; struct nfsstatehead *hp; int error = 0; *stpp = NULL; hp = NFSSTATEHASH(clp, *stateidp); LIST_FOREACH(stp, hp, ls_hash) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } /* * If no state id in list, return NFSERR_BADSTATEID. */ if (stp == LIST_END(hp)) { error = NFSERR_BADSTATEID; goto out; } *stpp = stp; out: NFSEXITCODE(error); return (error); } /* * This function gets an nfsstate structure via owner string. */ static void nfsrv_getowner(struct nfsstatehead *hp, struct nfsstate *new_stp, struct nfsstate **stpp) { struct nfsstate *stp; *stpp = NULL; LIST_FOREACH(stp, hp, ls_list) { if (new_stp->ls_ownerlen == stp->ls_ownerlen && !NFSBCMP(new_stp->ls_owner,stp->ls_owner,stp->ls_ownerlen)) { *stpp = stp; return; } } } /* * Lock control function called to update lock status. * Returns 0 upon success, -1 if there is no lock and the flags indicate * that one isn't to be created and an NFSERR_xxx for other errors. * The structures new_stp and new_lop are passed in as pointers that should * be set to NULL if the structure is used and shouldn't be free'd. * For the NFSLCK_TEST and NFSLCK_CHECK cases, the structures are * never used and can safely be allocated on the stack. For all other * cases, *new_stpp and *new_lopp should be malloc'd before the call, * in case they are used. */ APPLESTATIC int nfsrv_lockctrl(vnode_t vp, struct nfsstate **new_stpp, struct nfslock **new_lopp, struct nfslockconflict *cfp, nfsquad_t clientid, nfsv4stateid_t *stateidp, __unused struct nfsexstuff *exp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfslock *lop; struct nfsstate *new_stp = *new_stpp; struct nfslock *new_lop = *new_lopp; struct nfsstate *tstp, *mystp, *nstp; int specialid = 0; struct nfslockfile *lfp; struct nfslock *other_lop = NULL; struct nfsstate *stp, *lckstp = NULL; struct nfsclient *clp = NULL; u_int32_t bits; int error = 0, haslock = 0, ret, reterr; int getlckret, delegation = 0, filestruct_locked, vnode_unlocked = 0; fhandle_t nfh; uint64_t first, end; uint32_t lock_flags; if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Note the special cases of "all 1s" or "all 0s" stateids and * let reads with all 1s go ahead. */ if (new_stp->ls_stateid.seqid == 0x0 && new_stp->ls_stateid.other[0] == 0x0 && new_stp->ls_stateid.other[1] == 0x0 && new_stp->ls_stateid.other[2] == 0x0) specialid = 1; else if (new_stp->ls_stateid.seqid == 0xffffffff && new_stp->ls_stateid.other[0] == 0xffffffff && new_stp->ls_stateid.other[1] == 0xffffffff && new_stp->ls_stateid.other[2] == 0xffffffff) specialid = 2; } /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, specialid); if (error) goto out; /* * Check for state resource limit exceeded. */ if ((new_stp->ls_flags & NFSLCK_LOCK) && nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } /* * For the lock case, get another nfslock structure, * just in case we need it. * Malloc now, before we start sifting through the linked lists, * in case we have to wait for memory. */ tryagain: if (new_stp->ls_flags & NFSLCK_LOCK) MALLOC(other_lop, struct nfslock *, sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); filestruct_locked = 0; reterr = 0; lfp = NULL; /* * Get the lockfile structure for CFH now, so we can do a sanity * check against the stateid, before incrementing the seqid#, since * we want to return NFSERR_BADSTATEID on failure and the seqid# * shouldn't be incremented for this case. * If nfsrv_getlockfile() returns -1, it means "not found", which * will be handled later. * If we are doing Lock/LockU and local locking is enabled, sleep * lock the nfslockfile structure. */ getlckret = nfsrv_getlockfh(vp, new_stp->ls_flags, NULL, &nfh, p); NFSLOCKSTATE(); if (getlckret == 0) { if ((new_stp->ls_flags & (NFSLCK_LOCK | NFSLCK_UNLOCK)) != 0 && nfsrv_dolocallocks != 0 && nd->nd_repstat == 0) { getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 1); if (getlckret == 0) filestruct_locked = 1; } else getlckret = nfsrv_getlockfile(new_stp->ls_flags, NULL, &lfp, &nfh, 0); } if (getlckret != 0 && getlckret != -1) reterr = getlckret; if (filestruct_locked != 0) { LIST_INIT(&lfp->lf_rollback); if ((new_stp->ls_flags & NFSLCK_LOCK)) { /* * For local locking, do the advisory locking now, so * that any conflict can be detected. A failure later * can be rolled back locally. If an error is returned, * struct nfslockfile has been unlocked and any local * locking rolled back. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl1"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } reterr = nfsrv_locallock(vp, lfp, (new_lop->lo_flags & (NFSLCK_READ | NFSLCK_WRITE)), new_lop->lo_first, new_lop->lo_end, cfp, p); NFSLOCKSTATE(); } } if (specialid == 0) { if (new_stp->ls_flags & NFSLCK_TEST) { /* * RFC 3530 does not list LockT as an op that renews a * lease, but the consensus seems to be that it is ok * for a server to do so. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Since NFSERR_EXPIRED, NFSERR_ADMINREVOKED are not valid * error returns for LockT, just go ahead and test for a lock, * since there are no locks for this client, but other locks * can conflict. (ie. same client will always be false) */ if (error == NFSERR_EXPIRED || error == NFSERR_ADMINREVOKED) error = 0; lckstp = new_stp; } else { error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) /* * Look up the stateid */ error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * do some sanity checks for an unconfirmed open or a * stateid that refers to the wrong file, for an open stateid */ if (error == 0 && (stp->ls_flags & NFSLCK_OPEN) && ((stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM) || (getlckret == 0 && stp->ls_lfp != lfp))){ /* * NFSLCK_SETATTR should return OK rather than NFSERR_BADSTATEID * The only exception is using SETATTR with SIZE. * */ if ((new_stp->ls_flags & (NFSLCK_SETATTR | NFSLCK_CHECK)) != NFSLCK_SETATTR) error = NFSERR_BADSTATEID; } if (error == 0 && (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) && getlckret == 0 && stp->ls_lfp != lfp) error = NFSERR_BADSTATEID; /* * If the lockowner stateid doesn't refer to the same file, * I believe that is considered ok, since some clients will * only create a single lockowner and use that for all locks * on all files. * For now, log it as a diagnostic, instead of considering it * a BadStateid. */ if (error == 0 && (stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) == 0 && getlckret == 0 && stp->ls_lfp != lfp) { #ifdef DIAGNOSTIC printf("Got a lock statid for different file open\n"); #endif /* error = NFSERR_BADSTATEID; */ } if (error == 0) { if (new_stp->ls_flags & NFSLCK_OPENTOLOCK) { /* * If haslock set, we've already checked the seqid. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); else error = NFSERR_BADSTATEID; } if (!error) nfsrv_getowner(&stp->ls_open, new_stp, &lckstp); if (lckstp) /* * I believe this should be an error, but it * isn't obvious what NFSERR_xxx would be * appropriate, so I'll use NFSERR_INVAL for now. */ error = NFSERR_INVAL; else lckstp = new_stp; } else if (new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK)) { /* * If haslock set, ditto above. */ if (!haslock) { if (stp->ls_flags & NFSLCK_OPEN) error = NFSERR_BADSTATEID; else error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp, new_stp->ls_op); } lckstp = stp; } else { lckstp = stp; } } /* * If the seqid part of the stateid isn't the same, return * NFSERR_OLDSTATEID for cases other than I/O Ops. * For I/O Ops, only return NFSERR_OLDSTATEID if * nfsrv_returnoldstateid is set. (The consensus on the email * list was that most clients would prefer to not receive * NFSERR_OLDSTATEID for I/O Ops, but the RFC suggests that that * is what will happen, so I use the nfsrv_returnoldstateid to * allow for either server configuration.) */ if (!error && stp->ls_stateid.seqid!=new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && (!(new_stp->ls_flags & NFSLCK_CHECK) || nfsrv_returnoldstateid)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; } } /* * Now we can check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If we successfully Reclaimed state, note that. */ if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error) nfsrv_markstable(clp); /* * At this point, either error == NFSERR_BADSTATEID or the * seqid# has been updated, so we can return any error. * If error == 0, there may be an error in: * nd_repstat - Set by the calling function. * reterr - Set above, if getting the nfslockfile structure * or acquiring the local lock failed. * (If both of these are set, nd_repstat should probably be * returned, since that error was detected before this * function call.) */ if (error != 0 || nd->nd_repstat != 0 || reterr != 0) { if (error == 0) { if (nd->nd_repstat != 0) error = nd->nd_repstat; else error = reterr; } if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl2"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Check the nfsrv_getlockfile return. * Returned -1 if no structure found. */ if (getlckret == -1) { error = NFSERR_EXPIRED; /* * Called from lockt, so no lock is OK. */ if (new_stp->ls_flags & NFSLCK_TEST) { error = 0; } else if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_SETATTR)) { /* * Called to check for a lock, OK if the stateid is all * 1s or all 0s, but there should be an nfsstate * otherwise. * (ie. If there is no open, I'll assume no share * deny bits.) */ if (specialid) error = 0; else error = NFSERR_BADSTATEID; } NFSUNLOCKSTATE(); goto out; } /* * For NFSLCK_CHECK and NFSLCK_LOCK, test for a share conflict. * For NFSLCK_CHECK, allow a read if write access is granted, * but check for a deny. For NFSLCK_LOCK, require correct access, * which implies a conflicting deny can't exist. */ if (new_stp->ls_flags & (NFSLCK_CHECK | NFSLCK_LOCK)) { /* * Four kinds of state id: * - specialid (all 0s or all 1s), only for NFSLCK_CHECK * - stateid for an open * - stateid for a delegation * - stateid for a lock owner */ if (!specialid) { if (stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { delegation = 1; mystp = stp; nfsrv_delaydelegtimeout(stp); } else if (stp->ls_flags & NFSLCK_OPEN) { mystp = stp; } else { mystp = stp->ls_openstp; } /* * If locking or checking, require correct access * bit set. */ if (((new_stp->ls_flags & NFSLCK_LOCK) && !((new_lop->lo_flags >> NFSLCK_LOCKSHIFT) & mystp->ls_flags & NFSLCK_ACCESSBITS)) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_READACCESS)) == (NFSLCK_CHECK | NFSLCK_READACCESS) && !(mystp->ls_flags & NFSLCK_READACCESS) && nfsrv_allowreadforwriteopen == 0) || ((new_stp->ls_flags & (NFSLCK_CHECK|NFSLCK_WRITEACCESS)) == (NFSLCK_CHECK | NFSLCK_WRITEACCESS) && !(mystp->ls_flags & NFSLCK_WRITEACCESS))) { if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl3"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); error = NFSERR_OPENMODE; goto out; } } else mystp = NULL; if ((new_stp->ls_flags & NFSLCK_CHECK) && !delegation) { /* * Check for a conflicting deny bit. */ LIST_FOREACH(tstp, &lfp->lf_open, ls_file) { if (tstp != mystp) { bits = tstp->ls_flags; bits >>= NFSLCK_SHIFT; if (new_stp->ls_flags & bits & NFSLCK_ACCESSBITS) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked1")); ret = nfsrv_clientconflict(tstp->ls_clp, &haslock, vp, p); if (ret == 1) { /* * nfsrv_clientconflict unlocks state * when it returns non-zero. */ lckstp = NULL; goto tryagain; } if (ret == 0) NFSUNLOCKSTATE(); if (ret == 2) error = NFSERR_PERM; else error = NFSERR_OPENMODE; goto out; } } } /* We're outta here */ NFSUNLOCKSTATE(); goto out; } } /* * For setattr, just get rid of all the Delegations for other clients. */ if (new_stp->ls_flags & NFSLCK_SETATTR) { KASSERT(vnode_unlocked == 0, ("nfsrv_lockctrl: vnode unlocked2")); ret = nfsrv_cleandeleg(vp, lfp, clp, &haslock, p); if (ret) { /* * nfsrv_cleandeleg() unlocks state when it * returns non-zero. */ if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } if (!(new_stp->ls_flags & NFSLCK_CHECK) || (LIST_EMPTY(&lfp->lf_open) && LIST_EMPTY(&lfp->lf_lock) && LIST_EMPTY(&lfp->lf_deleg))) { NFSUNLOCKSTATE(); goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * I currently believe the conflict algorithm to be: * For Lock Ops (Lock/LockT/LockU) * - there is a conflict iff a different client has a write delegation * For Reading (Read Op) * - there is a conflict iff a different client has a write delegation * (the specialids are always a different client) * For Writing (Write/Setattr of size) * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (I don't understand why this isn't allowed, but that seems to be * the current consensus?) */ tstp = LIST_FIRST(&lfp->lf_deleg); while (tstp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(tstp, ls_file); if ((((new_stp->ls_flags&(NFSLCK_LOCK|NFSLCK_UNLOCK|NFSLCK_TEST))|| ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_READ))) && clp != tstp->ls_clp && (tstp->ls_flags & NFSLCK_DELEGWRITE)) || ((new_stp->ls_flags & NFSLCK_CHECK) && (new_lop->lo_flags & NFSLCK_WRITE) && (clp != tstp->ls_clp || (tstp->ls_flags & NFSLCK_DELEGREAD)))) { ret = 0; if (filestruct_locked != 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl4"); NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) ret = NFSERR_SERVERFAULT; NFSLOCKSTATE(); } if (ret == 0) ret = nfsrv_delegconflict(tstp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict unlocks state when it * returns non-zero, which it always does. */ if (other_lop) { FREE((caddr_t)other_lop, M_NFSDLOCK); other_lop = NULL; } if (ret == -1) { lckstp = NULL; goto tryagain; } error = ret; goto out; } /* Never gets here. */ } tstp = nstp; } /* * Handle the unlock case by calling nfsrv_updatelock(). * (Should I have done some access checking above for unlock? For now, * just let it happen.) */ if (new_stp->ls_flags & NFSLCK_UNLOCK) { first = new_lop->lo_first; end = new_lop->lo_end; nfsrv_updatelock(stp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(stp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = stp->ls_stateid.seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; if (filestruct_locked != 0) { NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl5"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } /* Update the local locks. */ nfsrv_localunlock(vp, lfp, first, end, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); goto out; } /* * Search for a conflicting lock. A lock conflicts if: * - the lock range overlaps and * - at least one lock is a write lock and * - it is not owned by the same lock owner */ if (!delegation) { LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { if (new_lop->lo_end > lop->lo_first && new_lop->lo_first < lop->lo_end && (new_lop->lo_flags == NFSLCK_WRITE || lop->lo_flags == NFSLCK_WRITE) && lckstp != lop->lo_stp && (clp != lop->lo_stp->ls_clp || lckstp->ls_ownerlen != lop->lo_stp->ls_ownerlen || NFSBCMP(lckstp->ls_owner, lop->lo_stp->ls_owner, lckstp->ls_ownerlen))) { if (other_lop) { FREE((caddr_t)other_lop, M_NFSDLOCK); other_lop = NULL; } if (vnode_unlocked != 0) ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, NULL, p); else ret = nfsrv_clientconflict(lop->lo_stp->ls_clp, &haslock, vp, p); if (ret == 1) { if (filestruct_locked != 0) { if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl6"); NFSVOPUNLOCK(vp, 0); } /* Roll back local locks. */ nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); vnode_unlocked = 0; if ((vp->v_iflag & VI_DOOMED) != 0) { error = NFSERR_SERVERFAULT; goto out; } } /* * nfsrv_clientconflict() unlocks state when it * returns non-zero. */ lckstp = NULL; goto tryagain; } /* * Found a conflicting lock, so record the conflict and * return the error. */ if (cfp != NULL && ret == 0) { cfp->cl_clientid.lval[0]=lop->lo_stp->ls_stateid.other[0]; cfp->cl_clientid.lval[1]=lop->lo_stp->ls_stateid.other[1]; cfp->cl_first = lop->lo_first; cfp->cl_end = lop->lo_end; cfp->cl_flags = lop->lo_flags; cfp->cl_ownerlen = lop->lo_stp->ls_ownerlen; NFSBCOPY(lop->lo_stp->ls_owner, cfp->cl_owner, cfp->cl_ownerlen); } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else if (new_stp->ls_flags & NFSLCK_CHECK) error = NFSERR_LOCKED; else error = NFSERR_DENIED; if (filestruct_locked != 0 && ret == 0) { /* Roll back local locks. */ NFSUNLOCKSTATE(); if (vnode_unlocked == 0) { ASSERT_VOP_ELOCKED(vp, "nfsrv_lockctrl7"); vnode_unlocked = 1; NFSVOPUNLOCK(vp, 0); } nfsrv_locallock_rollback(vp, lfp, p); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } if (ret == 0) NFSUNLOCKSTATE(); goto out; } } } /* * We only get here if there was no lock that conflicted. */ if (new_stp->ls_flags & (NFSLCK_TEST | NFSLCK_CHECK)) { NFSUNLOCKSTATE(); goto out; } /* * We only get here when we are creating or modifying a lock. * There are two variants: * - exist_lock_owner where lock_owner exists * - open_to_lock_owner with new lock_owner */ first = new_lop->lo_first; end = new_lop->lo_end; lock_flags = new_lop->lo_flags; if (!(new_stp->ls_flags & NFSLCK_OPENTOLOCK)) { nfsrv_updatelock(lckstp, new_lopp, &other_lop, lfp); stateidp->seqid = ++(lckstp->ls_stateid.seqid); if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = lckstp->ls_stateid.seqid = 1; stateidp->other[0] = lckstp->ls_stateid.other[0]; stateidp->other[1] = lckstp->ls_stateid.other[1]; stateidp->other[2] = lckstp->ls_stateid.other[2]; } else { /* * The new open_to_lock_owner case. * Link the new nfsstate into the lists. */ new_stp->ls_seq = new_stp->ls_opentolockseq; nfsrvd_refcache(new_stp->ls_op); stateidp->seqid = new_stp->ls_stateid.seqid = 1; stateidp->other[0] = new_stp->ls_stateid.other[0] = clp->lc_clientid.lval[0]; stateidp->other[1] = new_stp->ls_stateid.other[1] = clp->lc_clientid.lval[1]; stateidp->other[2] = new_stp->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_stp->ls_clp = clp; LIST_INIT(&new_stp->ls_lock); new_stp->ls_openstp = stp; new_stp->ls_lfp = lfp; nfsrv_insertlock(new_lop, (struct nfslock *)new_stp, new_stp, lfp); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_stp->ls_stateid), new_stp, ls_hash); LIST_INSERT_HEAD(&stp->ls_open, new_stp, ls_list); *new_lopp = NULL; *new_stpp = NULL; nfsstatsv1.srvlockowners++; nfsrv_openpluslock++; } if (filestruct_locked != 0) { NFSUNLOCKSTATE(); nfsrv_locallock_commit(lfp, lock_flags, first, end); NFSLOCKSTATE(); nfsrv_unlocklf(lfp); } NFSUNLOCKSTATE(); out: if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (vnode_unlocked != 0) { NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); if (error == 0 && (vp->v_iflag & VI_DOOMED) != 0) error = NFSERR_SERVERFAULT; } if (other_lop) FREE((caddr_t)other_lop, M_NFSDLOCK); NFSEXITCODE2(error, nd); return (error); } /* * Check for state errors for Open. * repstat is passed back out as an error if more critical errors * are not detected. */ APPLESTATIC int nfsrv_opencheck(nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsstate *new_stp, vnode_t vp, struct nfsrv_descript *nd, NFSPROC_T *p, int repstat) { struct nfsstate *stp, *nstp; struct nfsclient *clp; struct nfsstate *ownerstp; struct nfslockfile *lfp, *new_lfp; int error = 0, haslock = 0, ret, readonly = 0, getfhret = 0; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; /* * Check for state resource limit exceeded. * Technically this should be SMP protected, but the worst * case error is "out by one or two" on the count when it * returns NFSERR_RESOURCE and the limit is just a rather * arbitrary high water mark, so no harm is done. */ if (nfsrv_openpluslock > nfsrv_v4statelimit) { error = NFSERR_RESOURCE; goto out; } tryagain: MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); if (vp) getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the nfsclient structure. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (!error && ownerstp) { error = nfsrv_checkseqid(nd, new_stp->ls_seq, ownerstp, new_stp->ls_op); /* * If the OpenOwner hasn't been confirmed, assume the * old one was a replay and this one is ok. * See: RFC3530 Sec. 14.2.18. */ if (error == NFSERR_BADSEQID && (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM)) error = 0; } /* * Check for grace. */ if (!error) error = nfsrv_checkgrace(nd, clp, new_stp->ls_flags); if ((new_stp->ls_flags & NFSLCK_RECLAIM) && !error && nfsrv_checkstable(clp)) error = NFSERR_NOGRACE; /* * If none of the above errors occurred, let repstat be * returned. */ if (repstat && !error) error = repstat; if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free((caddr_t)new_lfp, M_NFSDLOCKFILE); goto out; } /* * If vp == NULL, the file doesn't exist yet, so return ok. * (This always happens on the first pass, so haslock must be 0.) */ if (vp == NULL) { NFSUNLOCKSTATE(); FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); goto out; } /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } } /* * Check for access/deny bit conflicts. I check for the same * owner as well, in case the client didn't bother. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (!(new_stp->ls_flags & NFSLCK_DELEGCUR) && (((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS)))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks * state when it returns non-zero. */ goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) * Don't check for a Reclaim, since that will be dealt with * by nfsrv_openctrl(). */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR | NFSLCK_RECLAIM))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ if (ret == -1) goto tryagain; error = ret; goto out; } } stp = nstp; } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE2(error, nd); return (error); } /* * Open control function to create/update open state for an open. */ APPLESTATIC int nfsrv_openctrl(struct nfsrv_descript *nd, vnode_t vp, struct nfsstate **new_stpp, nfsquad_t clientid, nfsv4stateid_t *stateidp, nfsv4stateid_t *delegstateidp, u_int32_t *rflagsp, struct nfsexstuff *exp, NFSPROC_T *p, u_quad_t filerev) { struct nfsstate *new_stp = *new_stpp; struct nfsstate *stp, *nstp; struct nfsstate *openstp = NULL, *new_open, *ownerstp, *new_deleg; struct nfslockfile *lfp, *new_lfp; struct nfsclient *clp; int error = 0, haslock = 0, ret, delegate = 1, writedeleg = 1; int readonly = 0, cbret = 1, getfhret = 0; int gotstate = 0, len = 0; u_char *clidp = NULL; if ((new_stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) readonly = 1; /* * Check for restart conditions (client and server). * (Paranoia, should have been detected by nfsrv_opencheck().) * If an error does show up, return NFSERR_EXPIRED, since the * the seqid# has already been incremented. */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) { printf("Nfsd: openctrl unexpected restart err=%d\n", error); error = NFSERR_EXPIRED; goto out; } clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); tryagain: MALLOC(new_lfp, struct nfslockfile *, sizeof (struct nfslockfile), M_NFSDLOCKFILE, M_WAITOK); MALLOC(new_open, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); MALLOC(new_deleg, struct nfsstate *, sizeof (struct nfsstate), M_NFSDSTATE, M_WAITOK); getfhret = nfsrv_getlockfh(vp, new_stp->ls_flags, new_lfp, NULL, p); NFSLOCKSTATE(); /* * Get the client structure. Since the linked lists could be changed * by other nfsd processes if this process does a tsleep(), one of * two things must be done. * 1 - don't tsleep() * or * 2 - get the nfsv4_lock() { indicated by haslock == 1 } * before using the lists, since this lock stops the other * nfsd. This should only be used for rare cases, since it * essentially single threads the nfsd. * At this time, it is only done for cases where the stable * storage file must be written prior to completion of state * expiration. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error && (clp->lc_flags & LCL_NEEDSCBNULL) && clp->lc_program) { /* * This happens on the first open for a client * that supports callbacks. */ NFSUNLOCKSTATE(); /* * Although nfsrv_docallback() will sleep, clp won't * go away, since they are only removed when the * nfsv4_lock() has blocked the nfsd threads. The * fields in clp can change, but having multiple * threads do this Null callback RPC should be * harmless. */ cbret = nfsrv_docallback(clp, NFSV4PROC_CBNULL, NULL, 0, NULL, NULL, NULL, p); NFSLOCKSTATE(); clp->lc_flags &= ~LCL_NEEDSCBNULL; if (!cbret) clp->lc_flags |= LCL_CALLBACKSON; } /* * Look up the open owner. See if it needs confirmation and * check the seq#, as required. */ if (!error) nfsrv_getowner(&clp->lc_open, new_stp, &ownerstp); if (error) { NFSUNLOCKSTATE(); printf("Nfsd: openctrl unexpected state err=%d\n", error); free((caddr_t)new_lfp, M_NFSDLOCKFILE); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } if (new_stp->ls_flags & NFSLCK_RECLAIM) nfsrv_markstable(clp); /* * Get the structure for the underlying file. */ if (getfhret) error = getfhret; else error = nfsrv_getlockfile(new_stp->ls_flags, &new_lfp, &lfp, NULL, 0); if (new_lfp) FREE((caddr_t)new_lfp, M_NFSDLOCKFILE); if (error) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected getlockfile err=%d\n", error); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Search for a conflicting open/share. */ if (new_stp->ls_flags & NFSLCK_DELEGCUR) { /* * For Delegate_Cur, search for the matching Delegation, * which indicates no conflict. * An old delegation should have been recovered by the * client doing a Claim_DELEGATE_Prev, so I won't let * it match and return NFSERR_EXPIRED. Should I let it * match? */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (!(stp->ls_flags & NFSLCK_OLDDELEG) && (((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) || stateidp->seqid == stp->ls_stateid.seqid) && !NFSBCMP(stateidp->other, stp->ls_stateid.other, NFSX_STATEIDOTHER)) break; } if (stp == LIST_END(&lfp->lf_deleg) || ((new_stp->ls_flags & NFSLCK_WRITEACCESS) && (stp->ls_flags & NFSLCK_DELEGREAD))) { NFSUNLOCKSTATE(); printf("Nfsd openctrl unexpected expiry\n"); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_EXPIRED; goto out; } /* * Don't issue a Delegation, since one already exists and * delay delegation timeout, as required. */ delegate = 0; nfsrv_delaydelegtimeout(stp); } /* * Check for access/deny bit conflicts. I also check for the * same owner, since the client might not have bothered to check. * Also, note an open for the same file and owner, if found, * which is all we do here for Delegate_Cur, since conflict * checking is already done. */ LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (ownerstp && stp->ls_openowner == ownerstp) openstp = stp; if (!(new_stp->ls_flags & NFSLCK_DELEGCUR)) { /* * If another client has the file open, the only * delegation that can be issued is a Read delegation * and only if it is a Read open with Deny none. */ if (clp != stp->ls_clp) { if ((stp->ls_flags & NFSLCK_SHAREBITS) == NFSLCK_READACCESS) writedeleg = 0; else delegate = 0; } if(((new_stp->ls_flags & NFSLCK_ACCESSBITS) & ((stp->ls_flags>>NFSLCK_SHIFT) & NFSLCK_ACCESSBITS))|| ((stp->ls_flags & NFSLCK_ACCESSBITS) & ((new_stp->ls_flags>>NFSLCK_SHIFT)&NFSLCK_ACCESSBITS))){ ret = nfsrv_clientconflict(stp->ls_clp,&haslock,vp,p); if (ret == 1) { /* * nfsrv_clientconflict() unlocks state * when it returns non-zero. */ free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); openstp = NULL; goto tryagain; } if (ret == 2) error = NFSERR_PERM; else if (new_stp->ls_flags & NFSLCK_RECLAIM) error = NFSERR_RECLAIMCONFLICT; else error = NFSERR_SHAREDENIED; if (ret == 0) NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); printf("nfsd openctrl unexpected client cnfl\n"); goto out; } } } /* * Check for a conflicting delegation. If one is found, call * nfsrv_delegconflict() to handle it. If the v4root lock hasn't * been set yet, it will get the lock. Otherwise, it will recall * the delegation. Then, we try try again... * (If NFSLCK_DELEGCUR is set, it has a delegation, so there * isn't a conflict.) * I currently believe the conflict algorithm to be: * For Open with Read Access and Deny None * - there is a conflict iff a different client has a write delegation * For Open with other Write Access or any Deny except None * - there is a conflict if a different client has any delegation * - there is a conflict if the same client has a read delegation * (The current consensus is that this last case should be * considered a conflict since the client with a read delegation * could have done an Open with ReadAccess and WriteDeny * locally and then not have checked for the WriteDeny.) */ if (!(new_stp->ls_flags & (NFSLCK_DELEGPREV | NFSLCK_DELEGCUR))) { stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGREAD)) writedeleg = 0; else delegate = 0; if ((readonly && stp->ls_clp != clp && (stp->ls_flags & NFSLCK_DELEGWRITE)) || (!readonly && (stp->ls_clp != clp || (stp->ls_flags & NFSLCK_DELEGREAD)))) { if (new_stp->ls_flags & NFSLCK_RECLAIM) { delegate = 2; } else { ret = nfsrv_delegconflict(stp, &haslock, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ printf("Nfsd openctrl unexpected deleg cnfl\n"); free((caddr_t)new_open, M_NFSDSTATE); free((caddr_t)new_deleg, M_NFSDSTATE); if (ret == -1) { openstp = NULL; goto tryagain; } error = ret; goto out; } } } stp = nstp; } } /* * We only get here if there was no open that conflicted. * If an open for the owner exists, or in the access/deny bits. * Otherwise it is a new open. If the open_owner hasn't been * confirmed, replace the open with the new one needing confirmation, * otherwise add the open. */ if (new_stp->ls_flags & NFSLCK_DELEGPREV) { /* * Handle NFSLCK_DELEGPREV by searching the old delegations for * a match. If found, just move the old delegation to the current * delegation list and issue open. If not found, return * NFSERR_EXPIRED. */ LIST_FOREACH(stp, &clp->lc_olddeleg, ls_list) { if (stp->ls_lfp == lfp) { /* Found it */ if (stp->ls_clp != clp) panic("olddeleg clp"); LIST_REMOVE(stp, ls_list); LIST_REMOVE(stp, ls_hash); stp->ls_flags &= ~NFSLCK_OLDDELEG; stp->ls_stateid.seqid = delegstateidp->seqid = 1; stp->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; stp->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; stp->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); stp->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&clp->lc_deleg, stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, stp->ls_stateid), stp, ls_hash); if (stp->ls_flags & NFSLCK_DELEGWRITE) *rflagsp |= NFSV4OPEN_WRITEDELEGATE; else *rflagsp |= NFSV4OPEN_READDELEGATE; clp->lc_delegtime = NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags&NFSLCK_DENYBITS)| NFSLCK_OPEN; if (stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open,new_open,ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; break; } } if (stp == LIST_END(&clp->lc_olddeleg)) error = NFSERR_EXPIRED; } else if (new_stp->ls_flags & (NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) { /* * Scan to see that no delegation for this client and file * doesn't already exist. * There also shouldn't yet be an Open for this file and * openowner. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_clp == clp) break; } if (stp == LIST_END(&lfp->lf_deleg) && openstp == NULL) { /* * This is the Claim_Previous case with a delegation * type != Delegate_None. */ /* * First, add the delegation. (Although we must issue the * delegation, we can also ask for an immediate return.) */ new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (new_stp->ls_flags & NFSLCK_DELEGWRITE) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; if (delegate == 2 || nfsrv_issuedelegs == 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) || !NFSVNO_DELEGOK(vp)) *rflagsp |= NFSV4OPEN_RECALL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; /* * Now, do the associated open. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_DENYBITS) | NFSLCK_OPEN; if (new_stp->ls_flags & NFSLCK_DELEGWRITE) new_open->ls_flags |= (NFSLCK_READACCESS | NFSLCK_WRITEACCESS); else new_open->ls_flags |= NFSLCK_READACCESS; new_open->ls_uid = new_stp->ls_uid; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); /* * and handle the open owner */ if (ownerstp) { new_open->ls_openowner = ownerstp; LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); } else { new_open->ls_openowner = new_stp; new_stp->ls_flags = 0; nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); *new_stpp = NULL; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; } else { error = NFSERR_RECLAIMCONFLICT; } } else if (ownerstp) { if (ownerstp->ls_flags & NFSLCK_NEEDSCONFIRM) { /* Replace the open */ if (ownerstp->ls_op) nfsrvd_derefcache(ownerstp->ls_op); ownerstp->ls_op = new_stp->ls_op; nfsrvd_refcache(ownerstp->ls_op); ownerstp->ls_seq = new_stp->ls_seq; *rflagsp |= NFSV4OPEN_RESULTCONFIRM; stp = LIST_FIRST(&ownerstp->ls_open); stp->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; stp->ls_stateid.seqid = 1; stp->ls_uid = new_stp->ls_uid; if (lfp != stp->ls_lfp) { LIST_REMOVE(stp, ls_file); LIST_INSERT_HEAD(&lfp->lf_open, stp, ls_file); stp->ls_lfp = lfp; } openstp = stp; } else if (openstp) { openstp->ls_flags |= (new_stp->ls_flags & NFSLCK_SHAREBITS); openstp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && openstp->ls_stateid.seqid == 0) openstp->ls_stateid.seqid = 1; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || writedeleg == 0 || NFSVNO_EXRDONLY(exp) || (readonly != 0 && nfsrv_writedelegifpos == 0) || !NFSVNO_DELEGOK(vp) || (new_stp->ls_flags & NFSLCK_WANTRDELEG) != 0 || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } else { new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS)| NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; new_open->ls_openowner = ownerstp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INIT(&new_open->ls_open); LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); LIST_INSERT_HEAD(&ownerstp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; /* * This is where we can choose to issue a delegation. */ if (delegate == 0 || (writedeleg == 0 && readonly == 0) || !NFSVNO_DELEGOK(vp) || (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) != LCL_CALLBACKSON) *rflagsp |= NFSV4OPEN_WDCONTENTION; else if (nfsrv_issuedelegs == 0 || NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt)) *rflagsp |= NFSV4OPEN_WDRESOURCE; else if ((new_stp->ls_flags & NFSLCK_WANTNODELEG) != 0) *rflagsp |= NFSV4OPEN_WDNOTWANTED; else { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } } } else { /* * New owner case. Start the open_owner sequence with a * Needs confirmation (unless a reclaim) and hang the * new open off it. */ new_open->ls_stateid.seqid = 1; new_open->ls_stateid.other[0] = clp->lc_clientid.lval[0]; new_open->ls_stateid.other[1] = clp->lc_clientid.lval[1]; new_open->ls_stateid.other[2] = nfsrv_nextstateindex(clp); new_open->ls_flags = (new_stp->ls_flags & NFSLCK_SHAREBITS) | NFSLCK_OPEN; new_open->ls_uid = new_stp->ls_uid; LIST_INIT(&new_open->ls_open); new_open->ls_openowner = new_stp; new_open->ls_lfp = lfp; new_open->ls_clp = clp; LIST_INSERT_HEAD(&lfp->lf_open, new_open, ls_file); if (new_stp->ls_flags & NFSLCK_RECLAIM) { new_stp->ls_flags = 0; } else if ((nd->nd_flag & ND_NFSV41) != 0) { /* NFSv4.1 never needs confirmation. */ new_stp->ls_flags = 0; /* * This is where we can choose to issue a delegation. */ if (delegate && nfsrv_issuedelegs && (writedeleg || readonly) && (clp->lc_flags & (LCL_CALLBACKSON | LCL_CBDOWN)) == LCL_CALLBACKSON && !NFSRV_V4DELEGLIMIT(nfsrv_delegatecnt) && NFSVNO_DELEGOK(vp) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTNODELEG) == 0)) { new_deleg->ls_stateid.seqid = delegstateidp->seqid = 1; new_deleg->ls_stateid.other[0] = delegstateidp->other[0] = clp->lc_clientid.lval[0]; new_deleg->ls_stateid.other[1] = delegstateidp->other[1] = clp->lc_clientid.lval[1]; new_deleg->ls_stateid.other[2] = delegstateidp->other[2] = nfsrv_nextstateindex(clp); if (writedeleg && !NFSVNO_EXRDONLY(exp) && (nfsrv_writedelegifpos || !readonly) && ((nd->nd_flag & ND_NFSV41) == 0 || (new_stp->ls_flags & NFSLCK_WANTRDELEG) == 0)) { new_deleg->ls_flags = (NFSLCK_DELEGWRITE | NFSLCK_READACCESS | NFSLCK_WRITEACCESS); *rflagsp |= NFSV4OPEN_WRITEDELEGATE; } else { new_deleg->ls_flags = (NFSLCK_DELEGREAD | NFSLCK_READACCESS); *rflagsp |= NFSV4OPEN_READDELEGATE; } new_deleg->ls_uid = new_stp->ls_uid; new_deleg->ls_lfp = lfp; new_deleg->ls_clp = clp; new_deleg->ls_filerev = filerev; new_deleg->ls_compref = nd->nd_compref; LIST_INSERT_HEAD(&lfp->lf_deleg, new_deleg, ls_file); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_deleg->ls_stateid), new_deleg, ls_hash); LIST_INSERT_HEAD(&clp->lc_deleg, new_deleg, ls_list); new_deleg = NULL; nfsstatsv1.srvdelegates++; nfsrv_openpluslock++; nfsrv_delegatecnt++; } /* * Since NFSv4.1 never does an OpenConfirm, the first * open state will be acquired here. */ if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } } else { *rflagsp |= NFSV4OPEN_RESULTCONFIRM; new_stp->ls_flags = NFSLCK_NEEDSCONFIRM; } nfsrvd_refcache(new_stp->ls_op); new_stp->ls_noopens = 0; LIST_INIT(&new_stp->ls_open); LIST_INSERT_HEAD(&new_stp->ls_open, new_open, ls_list); LIST_INSERT_HEAD(&clp->lc_open, new_stp, ls_list); LIST_INSERT_HEAD(NFSSTATEHASH(clp, new_open->ls_stateid), new_open, ls_hash); openstp = new_open; new_open = NULL; *new_stpp = NULL; nfsstatsv1.srvopens++; nfsrv_openpluslock++; nfsstatsv1.srvopenowners++; nfsrv_openpluslock++; } if (!error) { stateidp->seqid = openstp->ls_stateid.seqid; stateidp->other[0] = openstp->ls_stateid.other[0]; stateidp->other[1] = openstp->ls_stateid.other[1]; stateidp->other[2] = openstp->ls_stateid.other[2]; } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (new_open) FREE((caddr_t)new_open, M_NFSDSTATE); if (new_deleg) FREE((caddr_t)new_deleg, M_NFSDSTATE); /* * If the NFSv4.1 client just acquired its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Open update. Does the confirm, downgrade and close. */ APPLESTATIC int nfsrv_openupdate(vnode_t vp, struct nfsstate *new_stp, nfsquad_t clientid, nfsv4stateid_t *stateidp, struct nfsrv_descript *nd, NFSPROC_T *p) { struct nfsstate *stp, *ownerstp; struct nfsclient *clp; struct nfslockfile *lfp; u_int32_t bits; int error = 0, gotstate = 0, len = 0; u_char *clidp = NULL; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; clidp = malloc(NFSV4_OPAQUELIMIT, M_TEMP, M_WAITOK); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (!error) error = nfsrv_getstate(clp, &new_stp->ls_stateid, new_stp->ls_flags, &stp); /* * Sanity check the open. */ if (!error && (!(stp->ls_flags & NFSLCK_OPEN) || (!(new_stp->ls_flags & NFSLCK_CONFIRM) && (stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) || ((new_stp->ls_flags & NFSLCK_CONFIRM) && (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM))))) error = NFSERR_BADSTATEID; if (!error) error = nfsrv_checkseqid(nd, new_stp->ls_seq, stp->ls_openowner, new_stp->ls_op); if (!error && stp->ls_stateid.seqid != new_stp->ls_stateid.seqid && (((nd->nd_flag & ND_NFSV41) == 0 && !(new_stp->ls_flags & NFSLCK_CONFIRM)) || ((nd->nd_flag & ND_NFSV41) != 0 && new_stp->ls_stateid.seqid != 0))) error = NFSERR_OLDSTATEID; if (!error && vnode_vtype(vp) != VREG) { if (vnode_vtype(vp) == VDIR) error = NFSERR_ISDIR; else error = NFSERR_INVAL; } if (error) { /* * If a client tries to confirm an Open with a bad * seqid# and there are no byte range locks or other Opens * on the openowner, just throw it away, so the next use of the * openowner will start a fresh seq#. */ if (error == NFSERR_BADSEQID && (new_stp->ls_flags & NFSLCK_CONFIRM) && nfsrv_nootherstate(stp)) nfsrv_freeopenowner(stp->ls_openowner, 0, p); NFSUNLOCKSTATE(); goto out; } /* * Set the return stateid. */ stateidp->seqid = stp->ls_stateid.seqid + 1; if ((nd->nd_flag & ND_NFSV41) != 0 && stateidp->seqid == 0) stateidp->seqid = 1; stateidp->other[0] = stp->ls_stateid.other[0]; stateidp->other[1] = stp->ls_stateid.other[1]; stateidp->other[2] = stp->ls_stateid.other[2]; /* * Now, handle the three cases. */ if (new_stp->ls_flags & NFSLCK_CONFIRM) { /* * If the open doesn't need confirmation, it seems to me that * there is a client error, but I'll just log it and keep going? */ if (!(stp->ls_openowner->ls_flags & NFSLCK_NEEDSCONFIRM)) printf("Nfsv4d: stray open confirm\n"); stp->ls_openowner->ls_flags = 0; stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; if (!(clp->lc_flags & LCL_STAMPEDSTABLE)) { clp->lc_flags |= LCL_STAMPEDSTABLE; len = clp->lc_idlen; NFSBCOPY(clp->lc_id, clidp, len); gotstate = 1; } NFSUNLOCKSTATE(); } else if (new_stp->ls_flags & NFSLCK_CLOSE) { ownerstp = stp->ls_openowner; lfp = stp->ls_lfp; if (nfsrv_dolocallocks != 0 && !LIST_EMPTY(&stp->ls_open)) { /* Get the lf lock */ nfsrv_locklf(lfp); NFSUNLOCKSTATE(); ASSERT_VOP_ELOCKED(vp, "nfsrv_openupdate"); NFSVOPUNLOCK(vp, 0); if (nfsrv_freeopen(stp, vp, 1, p) == 0) { NFSLOCKSTATE(); nfsrv_unlocklf(lfp); NFSUNLOCKSTATE(); } NFSVOPLOCK(vp, LK_EXCLUSIVE | LK_RETRY); } else { (void) nfsrv_freeopen(stp, NULL, 0, p); NFSUNLOCKSTATE(); } } else { /* * Update the share bits, making sure that the new set are a * subset of the old ones. */ bits = (new_stp->ls_flags & NFSLCK_SHAREBITS); if (~(stp->ls_flags) & bits) { NFSUNLOCKSTATE(); error = NFSERR_INVAL; goto out; } stp->ls_flags = (bits | NFSLCK_OPEN); stp->ls_stateid.seqid++; if ((nd->nd_flag & ND_NFSV41) != 0 && stp->ls_stateid.seqid == 0) stp->ls_stateid.seqid = 1; NFSUNLOCKSTATE(); } /* * If the client just confirmed its first open, write a timestamp * to the stable storage file. */ if (gotstate != 0) { nfsrv_writestable(clidp, len, NFSNST_NEWSTATE, p); nfsrv_backupstable(); } out: free(clidp, M_TEMP); NFSEXITCODE2(error, nd); return (error); } /* * Delegation update. Does the purge and return. */ APPLESTATIC int nfsrv_delegupdate(struct nfsrv_descript *nd, nfsquad_t clientid, nfsv4stateid_t *stateidp, vnode_t vp, int op, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfsclient *clp; int error = 0; fhandle_t fh; /* * Do a sanity check against the file handle for DelegReturn. */ if (vp) { error = nfsvno_getfh(vp, &fh, p); if (error) goto out; } /* * Check for restart conditions (client and server). */ if (op == NFSV4OP_DELEGRETURN) error = nfsrv_checkrestart(clientid, NFSLCK_DELEGRETURN, stateidp, 0); else error = nfsrv_checkrestart(clientid, NFSLCK_DELEGPURGE, stateidp, 0); NFSLOCKSTATE(); /* * Get the open structure via clientid and stateid. */ if (!error) error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error) { if (error == NFSERR_CBPATHDOWN) error = 0; if (error == NFSERR_STALECLIENTID && op == NFSV4OP_DELEGRETURN) error = NFSERR_STALESTATEID; } if (!error && op == NFSV4OP_DELEGRETURN) { error = nfsrv_getstate(clp, stateidp, NFSLCK_DELEGRETURN, &stp); if (!error && stp->ls_stateid.seqid != stateidp->seqid && ((nd->nd_flag & ND_NFSV41) == 0 || stateidp->seqid != 0)) error = NFSERR_OLDSTATEID; } /* * NFSERR_EXPIRED means that the state has gone away, * so Delegations have been purged. Just return ok. */ if (error == NFSERR_EXPIRED && op == NFSV4OP_DELEGPURGE) { NFSUNLOCKSTATE(); error = 0; goto out; } if (error) { NFSUNLOCKSTATE(); goto out; } if (op == NFSV4OP_DELEGRETURN) { if (NFSBCMP((caddr_t)&fh, (caddr_t)&stp->ls_lfp->lf_fh, sizeof (fhandle_t))) { NFSUNLOCKSTATE(); error = NFSERR_BADSTATEID; goto out; } nfsrv_freedeleg(stp); } else { nfsrv_freedeleglist(&clp->lc_olddeleg); } NFSUNLOCKSTATE(); error = 0; out: NFSEXITCODE(error); return (error); } /* * Release lock owner. */ APPLESTATIC int nfsrv_releaselckown(struct nfsstate *new_stp, nfsquad_t clientid, NFSPROC_T *p) { struct nfsstate *stp, *nstp, *openstp, *ownstp; struct nfsclient *clp; int error = 0; /* * Check for restart conditions (client and server). */ error = nfsrv_checkrestart(clientid, new_stp->ls_flags, &new_stp->ls_stateid, 0); if (error) goto out; NFSLOCKSTATE(); /* * Get the lock owner by name. */ error = nfsrv_getclient(clientid, CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, NULL, p); if (error) { NFSUNLOCKSTATE(); goto out; } LIST_FOREACH(ownstp, &clp->lc_open, ls_list) { LIST_FOREACH(openstp, &ownstp->ls_open, ls_list) { stp = LIST_FIRST(&openstp->ls_open); while (stp != LIST_END(&openstp->ls_open)) { nstp = LIST_NEXT(stp, ls_list); /* * If the owner matches, check for locks and * then free or return an error. */ if (stp->ls_ownerlen == new_stp->ls_ownerlen && !NFSBCMP(stp->ls_owner, new_stp->ls_owner, stp->ls_ownerlen)){ if (LIST_EMPTY(&stp->ls_lock)) { nfsrv_freelockowner(stp, NULL, 0, p); } else { NFSUNLOCKSTATE(); error = NFSERR_LOCKSHELD; goto out; } } stp = nstp; } } } NFSUNLOCKSTATE(); out: NFSEXITCODE(error); return (error); } /* * Get the file handle for a lock structure. */ static int nfsrv_getlockfh(vnode_t vp, u_short flags, struct nfslockfile *new_lfp, fhandle_t *nfhp, NFSPROC_T *p) { fhandle_t *fhp = NULL; int error; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { KASSERT(new_lfp != NULL, ("nfsrv_getlockfh: new_lfp NULL")); fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfh"); } error = nfsvno_getfh(vp, fhp, p); NFSEXITCODE(error); return (error); } /* * Get an nfs lock structure. Allocate one, as required, and return a * pointer to it. * Returns an NFSERR_xxx upon failure or -1 to indicate no current lock. */ static int nfsrv_getlockfile(u_short flags, struct nfslockfile **new_lfpp, struct nfslockfile **lfpp, fhandle_t *nfhp, int lockit) { struct nfslockfile *lfp; fhandle_t *fhp = NULL, *tfhp; struct nfslockhashhead *hp; struct nfslockfile *new_lfp = NULL; /* * For lock, use the new nfslock structure, otherwise just * a fhandle_t on the stack. */ if (flags & NFSLCK_OPEN) { new_lfp = *new_lfpp; fhp = &new_lfp->lf_fh; } else if (nfhp) { fhp = nfhp; } else { panic("nfsrv_getlockfile"); } hp = NFSLOCKHASH(fhp); LIST_FOREACH(lfp, hp, lf_hash) { tfhp = &lfp->lf_fh; if (NFSVNO_CMPFH(fhp, tfhp)) { if (lockit) nfsrv_locklf(lfp); *lfpp = lfp; return (0); } } if (!(flags & NFSLCK_OPEN)) return (-1); /* * No match, so chain the new one into the list. */ LIST_INIT(&new_lfp->lf_open); LIST_INIT(&new_lfp->lf_lock); LIST_INIT(&new_lfp->lf_deleg); LIST_INIT(&new_lfp->lf_locallock); LIST_INIT(&new_lfp->lf_rollback); new_lfp->lf_locallock_lck.nfslock_usecnt = 0; new_lfp->lf_locallock_lck.nfslock_lock = 0; new_lfp->lf_usecount = 0; LIST_INSERT_HEAD(hp, new_lfp, lf_hash); *lfpp = new_lfp; *new_lfpp = NULL; return (0); } /* * This function adds a nfslock lock structure to the list for the associated * nfsstate and nfslockfile structures. It will be inserted after the * entry pointed at by insert_lop. */ static void nfsrv_insertlock(struct nfslock *new_lop, struct nfslock *insert_lop, struct nfsstate *stp, struct nfslockfile *lfp) { struct nfslock *lop, *nlop; new_lop->lo_stp = stp; new_lop->lo_lfp = lfp; if (stp != NULL) { /* Insert in increasing lo_first order */ lop = LIST_FIRST(&lfp->lf_lock); if (lop == LIST_END(&lfp->lf_lock) || new_lop->lo_first <= lop->lo_first) { LIST_INSERT_HEAD(&lfp->lf_lock, new_lop, lo_lckfile); } else { nlop = LIST_NEXT(lop, lo_lckfile); while (nlop != LIST_END(&lfp->lf_lock) && nlop->lo_first < new_lop->lo_first) { lop = nlop; nlop = LIST_NEXT(lop, lo_lckfile); } LIST_INSERT_AFTER(lop, new_lop, lo_lckfile); } } else { new_lop->lo_lckfile.le_prev = NULL; /* list not used */ } /* * Insert after insert_lop, which is overloaded as stp or lfp for * an empty list. */ if (stp == NULL && (struct nfslockfile *)insert_lop == lfp) LIST_INSERT_HEAD(&lfp->lf_locallock, new_lop, lo_lckowner); else if ((struct nfsstate *)insert_lop == stp) LIST_INSERT_HEAD(&stp->ls_lock, new_lop, lo_lckowner); else LIST_INSERT_AFTER(insert_lop, new_lop, lo_lckowner); if (stp != NULL) { nfsstatsv1.srvlocks++; nfsrv_openpluslock++; } } /* * This function updates the locking for a lock owner and given file. It * maintains a list of lock ranges ordered on increasing file offset that * are NFSLCK_READ or NFSLCK_WRITE and non-overlapping (aka POSIX style). * It always adds new_lop to the list and sometimes uses the one pointed * at by other_lopp. */ static void nfsrv_updatelock(struct nfsstate *stp, struct nfslock **new_lopp, struct nfslock **other_lopp, struct nfslockfile *lfp) { struct nfslock *new_lop = *new_lopp; struct nfslock *lop, *tlop, *ilop; struct nfslock *other_lop = *other_lopp; int unlock = 0, myfile = 0; u_int64_t tmp; /* * Work down the list until the lock is merged. */ if (new_lop->lo_flags & NFSLCK_UNLOCK) unlock = 1; if (stp != NULL) { ilop = (struct nfslock *)stp; lop = LIST_FIRST(&stp->ls_lock); } else { ilop = (struct nfslock *)lfp; lop = LIST_FIRST(&lfp->lf_locallock); } while (lop != NULL) { /* * Only check locks for this file that aren't before the start of * new lock's range. */ if (lop->lo_lfp == lfp) { myfile = 1; if (lop->lo_end >= new_lop->lo_first) { if (new_lop->lo_end < lop->lo_first) { /* * If the new lock ends before the start of the * current lock's range, no merge, just insert * the new lock. */ break; } if (new_lop->lo_flags == lop->lo_flags || (new_lop->lo_first <= lop->lo_first && new_lop->lo_end >= lop->lo_end)) { /* * This lock can be absorbed by the new lock/unlock. * This happens when it covers the entire range * of the old lock or is contiguous * with the old lock and is of the same type or an * unlock. */ if (lop->lo_first < new_lop->lo_first) new_lop->lo_first = lop->lo_first; if (lop->lo_end > new_lop->lo_end) new_lop->lo_end = lop->lo_end; tlop = lop; lop = LIST_NEXT(lop, lo_lckowner); nfsrv_freenfslock(tlop); continue; } /* * All these cases are for contiguous locks that are not the * same type, so they can't be merged. */ if (new_lop->lo_first <= lop->lo_first) { /* * This case is where the new lock overlaps with the * first part of the old lock. Move the start of the * old lock to just past the end of the new lock. The * new lock will be inserted in front of the old, since * ilop hasn't been updated. (We are done now.) */ lop->lo_first = new_lop->lo_end; break; } if (new_lop->lo_end >= lop->lo_end) { /* * This case is where the new lock overlaps with the * end of the old lock's range. Move the old lock's * end to just before the new lock's first and insert * the new lock after the old lock. * Might not be done yet, since the new lock could * overlap further locks with higher ranges. */ lop->lo_end = new_lop->lo_first; ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); continue; } /* * The final case is where the new lock's range is in the * middle of the current lock's and splits the current lock * up. Use *other_lopp to handle the second part of the * split old lock range. (We are done now.) * For unlock, we use new_lop as other_lop and tmp, since * other_lop and new_lop are the same for this case. * We noted the unlock case above, so we don't need * new_lop->lo_flags any longer. */ tmp = new_lop->lo_first; if (other_lop == NULL) { if (!unlock) panic("nfsd srv update unlock"); other_lop = new_lop; *new_lopp = NULL; } other_lop->lo_first = new_lop->lo_end; other_lop->lo_end = lop->lo_end; other_lop->lo_flags = lop->lo_flags; other_lop->lo_stp = stp; other_lop->lo_lfp = lfp; lop->lo_end = tmp; nfsrv_insertlock(other_lop, lop, stp, lfp); *other_lopp = NULL; ilop = lop; break; } } ilop = lop; lop = LIST_NEXT(lop, lo_lckowner); if (myfile && (lop == NULL || lop->lo_lfp != lfp)) break; } /* * Insert the new lock in the list at the appropriate place. */ if (!unlock) { nfsrv_insertlock(new_lop, ilop, stp, lfp); *new_lopp = NULL; } } /* * This function handles sequencing of locks, etc. * It returns an error that indicates what the caller should do. */ static int nfsrv_checkseqid(struct nfsrv_descript *nd, u_int32_t seqid, struct nfsstate *stp, struct nfsrvcache *op) { int error = 0; if ((nd->nd_flag & ND_NFSV41) != 0) /* NFSv4.1 ignores the open_seqid and lock_seqid. */ goto out; if (op != nd->nd_rp) panic("nfsrvstate checkseqid"); if (!(op->rc_flag & RC_INPROG)) panic("nfsrvstate not inprog"); if (stp->ls_op && stp->ls_op->rc_refcnt <= 0) { printf("refcnt=%d\n", stp->ls_op->rc_refcnt); panic("nfsrvstate op refcnt"); } if ((stp->ls_seq + 1) == seqid) { if (stp->ls_op) nfsrvd_derefcache(stp->ls_op); stp->ls_op = op; nfsrvd_refcache(op); stp->ls_seq = seqid; goto out; } else if (stp->ls_seq == seqid && stp->ls_op && op->rc_xid == stp->ls_op->rc_xid && op->rc_refcnt == 0 && op->rc_reqlen == stp->ls_op->rc_reqlen && op->rc_cksum == stp->ls_op->rc_cksum) { if (stp->ls_op->rc_flag & RC_INPROG) { error = NFSERR_DONTREPLY; goto out; } nd->nd_rp = stp->ls_op; nd->nd_rp->rc_flag |= RC_INPROG; nfsrvd_delcache(op); error = NFSERR_REPLYFROMCACHE; goto out; } error = NFSERR_BADSEQID; out: NFSEXITCODE2(error, nd); return (error); } /* * Get the client ip address for callbacks. If the strings can't be parsed, * just set lc_program to 0 to indicate no callbacks are possible. * (For cases where the address can't be parsed or is 0.0.0.0.0.0, set * the address to the client's transport address. This won't be used * for callbacks, but can be printed out by nfsstats for info.) * Return error if the xdr can't be parsed, 0 otherwise. */ APPLESTATIC int nfsrv_getclientipaddr(struct nfsrv_descript *nd, struct nfsclient *clp) { u_int32_t *tl; u_char *cp, *cp2; int i, j; struct sockaddr_in *rad, *sad; u_char protocol[5], addr[24]; int error = 0, cantparse = 0; union { in_addr_t ival; u_char cval[4]; } ip; union { in_port_t sval; u_char cval[2]; } port; rad = NFSSOCKADDR(clp->lc_req.nr_nam, struct sockaddr_in *); rad->sin_family = AF_INET; rad->sin_len = sizeof (struct sockaddr_in); rad->sin_addr.s_addr = 0; rad->sin_port = 0; clp->lc_req.nr_client = NULL; clp->lc_req.nr_lock = 0; NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i >= 3 && i <= 4) { error = nfsrv_mtostr(nd, protocol, i); if (error) goto nfsmout; if (!strcmp(protocol, "tcp")) { clp->lc_flags |= LCL_TCPCALLBACK; clp->lc_req.nr_sotype = SOCK_STREAM; clp->lc_req.nr_soproto = IPPROTO_TCP; } else if (!strcmp(protocol, "udp")) { clp->lc_req.nr_sotype = SOCK_DGRAM; clp->lc_req.nr_soproto = IPPROTO_UDP; } else { cantparse = 1; } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } NFSM_DISSECT(tl, u_int32_t *, NFSX_UNSIGNED); i = fxdr_unsigned(int, *tl); if (i < 0) { error = NFSERR_BADXDR; goto nfsmout; } else if (i == 0) { cantparse = 1; } else if (!cantparse && i <= 23 && i >= 11) { error = nfsrv_mtostr(nd, addr, i); if (error) goto nfsmout; /* * Parse out the address fields. We expect 6 decimal numbers * separated by '.'s. */ cp = addr; i = 0; while (*cp && i < 6) { cp2 = cp; while (*cp2 && *cp2 != '.') cp2++; if (*cp2) *cp2++ = '\0'; else if (i != 5) { cantparse = 1; break; } j = nfsrv_getipnumber(cp); if (j >= 0) { if (i < 4) ip.cval[3 - i] = j; else port.cval[5 - i] = j; } else { cantparse = 1; break; } cp = cp2; i++; } if (!cantparse) { if (ip.ival != 0x0) { rad->sin_addr.s_addr = htonl(ip.ival); rad->sin_port = htons(port.sval); } else { cantparse = 1; } } } else { cantparse = 1; if (i > 0) { error = nfsm_advance(nd, NFSM_RNDUP(i), -1); if (error) goto nfsmout; } } if (cantparse) { sad = NFSSOCKADDR(nd->nd_nam, struct sockaddr_in *); if (sad->sin_family == AF_INET) { rad->sin_addr.s_addr = sad->sin_addr.s_addr; rad->sin_port = 0x0; } clp->lc_program = 0; } nfsmout: NFSEXITCODE2(error, nd); return (error); } /* * Turn a string of up to three decimal digits into a number. Return -1 upon * error. */ static int nfsrv_getipnumber(u_char *cp) { int i = 0, j = 0; while (*cp) { if (j > 2 || *cp < '0' || *cp > '9') return (-1); i *= 10; i += (*cp - '0'); cp++; j++; } if (i < 256) return (i); return (-1); } /* * This function checks for restart conditions. */ static int nfsrv_checkrestart(nfsquad_t clientid, u_int32_t flags, nfsv4stateid_t *stateidp, int specialid) { int ret = 0; /* * First check for a server restart. Open, LockT, ReleaseLockOwner * and DelegPurge have a clientid, the rest a stateid. */ if (flags & (NFSLCK_OPEN | NFSLCK_TEST | NFSLCK_RELEASE | NFSLCK_DELEGPURGE)) { if (clientid.lval[0] != nfsrvboottime) { ret = NFSERR_STALECLIENTID; goto out; } } else if (stateidp->other[0] != nfsrvboottime && specialid == 0) { ret = NFSERR_STALESTATEID; goto out; } /* * Read, Write, Setattr and LockT can return NFSERR_GRACE and do * not use a lock/open owner seqid#, so the check can be done now. * (The others will be checked, as required, later.) */ if (!(flags & (NFSLCK_CHECK | NFSLCK_TEST))) goto out; NFSLOCKSTATE(); ret = nfsrv_checkgrace(NULL, NULL, flags); NFSUNLOCKSTATE(); out: NFSEXITCODE(ret); return (ret); } /* * Check for grace. */ static int nfsrv_checkgrace(struct nfsrv_descript *nd, struct nfsclient *clp, u_int32_t flags) { int error = 0; if ((nfsrv_stablefirst.nsf_flags & NFSNSF_GRACEOVER) != 0) { if (flags & NFSLCK_RECLAIM) { error = NFSERR_NOGRACE; goto out; } } else { if (!(flags & NFSLCK_RECLAIM)) { error = NFSERR_GRACE; goto out; } if (nd != NULL && clp != NULL && (nd->nd_flag & ND_NFSV41) != 0 && (clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) { error = NFSERR_NOGRACE; goto out; } /* * If grace is almost over and we are still getting Reclaims, * extend grace a bit. */ if ((NFSD_MONOSEC + NFSRV_LEASEDELTA) > nfsrv_stablefirst.nsf_eograce) nfsrv_stablefirst.nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; } out: NFSEXITCODE(error); return (error); } /* * Do a server callback. */ static int nfsrv_docallback(struct nfsclient *clp, int procnum, nfsv4stateid_t *stateidp, int trunc, fhandle_t *fhp, struct nfsvattr *nap, nfsattrbit_t *attrbitp, NFSPROC_T *p) { mbuf_t m; u_int32_t *tl; struct nfsrv_descript nfsd, *nd = &nfsd; struct ucred *cred; int error = 0; u_int32_t callback; struct nfsdsession *sep = NULL; cred = newnfs_getcred(); NFSLOCKSTATE(); /* mostly for lc_cbref++ */ if (clp->lc_flags & LCL_NEEDSCONFIRM) { NFSUNLOCKSTATE(); panic("docallb"); } clp->lc_cbref++; /* * Fill the callback program# and version into the request * structure for newnfs_connect() to use. */ clp->lc_req.nr_prog = clp->lc_program; #ifdef notnow if ((clp->lc_flags & LCL_NFSV41) != 0) clp->lc_req.nr_vers = NFSV41_CBVERS; else #endif clp->lc_req.nr_vers = NFSV4_CBVERS; /* * First, fill in some of the fields of nd and cr. */ nd->nd_flag = ND_NFSV4; if (clp->lc_flags & LCL_GSS) nd->nd_flag |= ND_KERBV; if ((clp->lc_flags & LCL_NFSV41) != 0) nd->nd_flag |= ND_NFSV41; nd->nd_repstat = 0; cred->cr_uid = clp->lc_uid; cred->cr_gid = clp->lc_gid; callback = clp->lc_callback; NFSUNLOCKSTATE(); cred->cr_ngroups = 1; /* * Get the first mbuf for the request. */ MGET(m, M_WAITOK, MT_DATA); mbuf_setlen(m, 0); nd->nd_mreq = nd->nd_mb = m; nd->nd_bpos = NFSMTOD(m, caddr_t); /* * and build the callback request. */ if (procnum == NFSV4OP_CBGETATTR) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBGETATTR, "CB Getattr", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); (void)nfsrv_putattrbit(nd, attrbitp); } else if (procnum == NFSV4OP_CBRECALL) { nd->nd_procnum = NFSV4PROC_CBCOMPOUND; error = nfsrv_cbcallargs(nd, clp, callback, NFSV4OP_CBRECALL, "CB Recall", &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED + NFSX_STATEID); *tl++ = txdr_unsigned(stateidp->seqid); NFSBCOPY((caddr_t)stateidp->other, (caddr_t)tl, NFSX_STATEIDOTHER); tl += (NFSX_STATEIDOTHER / NFSX_UNSIGNED); if (trunc) *tl = newnfs_true; else *tl = newnfs_false; (void)nfsm_fhtom(nd, (u_int8_t *)fhp, NFSX_MYFH, 0); } else if (procnum == NFSV4PROC_CBNULL) { nd->nd_procnum = NFSV4PROC_CBNULL; if ((clp->lc_flags & LCL_NFSV41) != 0) { error = nfsv4_getcbsession(clp, &sep); if (error != 0) { mbuf_freem(nd->nd_mreq); goto errout; } } } else { error = NFSERR_SERVERFAULT; mbuf_freem(nd->nd_mreq); goto errout; } /* * Call newnfs_connect(), as required, and then newnfs_request(). */ (void) newnfs_sndlock(&clp->lc_req.nr_lock); if (clp->lc_req.nr_client == NULL) { if ((clp->lc_flags & LCL_NFSV41) != 0) error = ECONNREFUSED; else if (nd->nd_procnum == NFSV4PROC_CBNULL) error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 1); else error = newnfs_connect(NULL, &clp->lc_req, cred, NULL, 3); } newnfs_sndunlock(&clp->lc_req.nr_lock); if (!error) { if ((nd->nd_flag & ND_NFSV41) != 0) { KASSERT(sep != NULL, ("sep NULL")); if (sep->sess_cbsess.nfsess_xprt != NULL) error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, &sep->sess_cbsess); else { /* * This should probably never occur, but if a * client somehow does an RPC without a * SequenceID Op that causes a callback just * after the nfsd threads have been terminated * and restared we could conceivably get here * without a backchannel xprt. */ printf("nfsrv_docallback: no xprt\n"); error = ECONNREFUSED; } nfsrv_freesession(sep, NULL); } else error = newnfs_request(nd, NULL, clp, &clp->lc_req, NULL, NULL, cred, clp->lc_program, clp->lc_req.nr_vers, NULL, 1, NULL, NULL); } errout: NFSFREECRED(cred); /* * If error is set here, the Callback path isn't working * properly, so twiddle the appropriate LCL_ flags. * (nd_repstat != 0 indicates the Callback path is working, * but the callback failed on the client.) */ if (error) { /* * Mark the callback pathway down, which disabled issuing * of delegations and gets Renew to return NFSERR_CBPATHDOWN. */ NFSLOCKSTATE(); clp->lc_flags |= LCL_CBDOWN; NFSUNLOCKSTATE(); } else { /* * Callback worked. If the callback path was down, disable * callbacks, so no more delegations will be issued. (This * is done on the assumption that the callback pathway is * flakey.) */ NFSLOCKSTATE(); if (clp->lc_flags & LCL_CBDOWN) clp->lc_flags &= ~(LCL_CBDOWN | LCL_CALLBACKSON); NFSUNLOCKSTATE(); if (nd->nd_repstat) error = nd->nd_repstat; else if (error == 0 && procnum == NFSV4OP_CBGETATTR) error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL, 0, NULL, NULL, NULL, p, NULL); mbuf_freem(nd->nd_mrep); } NFSLOCKSTATE(); clp->lc_cbref--; if ((clp->lc_flags & LCL_WAKEUPWANTED) && clp->lc_cbref == 0) { clp->lc_flags &= ~LCL_WAKEUPWANTED; wakeup(clp); } NFSUNLOCKSTATE(); NFSEXITCODE(error); return (error); } /* * Set up the compound RPC for the callback. */ static int nfsrv_cbcallargs(struct nfsrv_descript *nd, struct nfsclient *clp, uint32_t callback, int op, const char *optag, struct nfsdsession **sepp) { uint32_t *tl; int error, len; len = strlen(optag); (void)nfsm_strtom(nd, optag, len); NFSM_BUILD(tl, uint32_t *, 4 * NFSX_UNSIGNED); if ((nd->nd_flag & ND_NFSV41) != 0) { *tl++ = txdr_unsigned(NFSV41_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(2); *tl = txdr_unsigned(NFSV4OP_CBSEQUENCE); error = nfsv4_setcbsequence(nd, clp, 1, sepp); if (error != 0) return (error); NFSM_BUILD(tl, u_int32_t *, NFSX_UNSIGNED); *tl = txdr_unsigned(op); } else { *tl++ = txdr_unsigned(NFSV4_MINORVERSION); *tl++ = txdr_unsigned(callback); *tl++ = txdr_unsigned(1); *tl = txdr_unsigned(op); } return (0); } /* * Return the next index# for a clientid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around, * it should be rebooted. * At an average rate of one new client per second, it will wrap around in * approximately 136 years. (I think the server will have been shut * down or rebooted before then.) */ static u_int32_t nfsrv_nextclientindex(void) { static u_int32_t client_index = 0; client_index++; if (client_index != 0) return (client_index); printf("%s: out of clientids\n", __func__); return (client_index); } /* * Return the next index# for a stateid. Mostly just increment and return * the next one, but... if the 32bit unsigned does actually wrap around * (will a BSD server stay up that long?), find * new start and end values. */ static u_int32_t nfsrv_nextstateindex(struct nfsclient *clp) { struct nfsstate *stp; int i; u_int32_t canuse, min_index, max_index; if (!(clp->lc_flags & LCL_INDEXNOTOK)) { clp->lc_stateindex++; if (clp->lc_stateindex != clp->lc_statemaxindex) return (clp->lc_stateindex); } /* * Yuck, we've hit the end. * Look for a new min and max. */ min_index = 0; max_index = 0xffffffff; for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] > 0x80000000) { if (stp->ls_stateid.other[2] < max_index) max_index = stp->ls_stateid.other[2]; } else { if (stp->ls_stateid.other[2] > min_index) min_index = stp->ls_stateid.other[2]; } } } /* * Yikes, highly unlikely, but I'll handle it anyhow. */ if (min_index == 0x80000000 && max_index == 0x80000001) { canuse = 0; /* * Loop around until we find an unused entry. Return that * and set LCL_INDEXNOTOK, so the search will continue next time. * (This is one of those rare cases where a goto is the * cleanest way to code the loop.) */ tryagain: for (i = 0; i < nfsrv_statehashsize; i++) { LIST_FOREACH(stp, &clp->lc_stateid[i], ls_hash) { if (stp->ls_stateid.other[2] == canuse) { canuse++; goto tryagain; } } } clp->lc_flags |= LCL_INDEXNOTOK; return (canuse); } /* * Ok to start again from min + 1. */ clp->lc_stateindex = min_index + 1; clp->lc_statemaxindex = max_index; clp->lc_flags &= ~LCL_INDEXNOTOK; return (clp->lc_stateindex); } /* * The following functions handle the stable storage file that deals with * the edge conditions described in RFC3530 Sec. 8.6.3. * The file is as follows: * - a single record at the beginning that has the lease time of the * previous server instance (before the last reboot) and the nfsrvboottime * values for the previous server boots. * These previous boot times are used to ensure that the current * nfsrvboottime does not, somehow, get set to a previous one. * (This is important so that Stale ClientIDs and StateIDs can * be recognized.) * The number of previous nfsvrboottime values precedes the list. * - followed by some number of appended records with: * - client id string * - flag that indicates it is a record revoking state via lease * expiration or similar * OR has successfully acquired state. * These structures vary in length, with the client string at the end, up * to NFSV4_OPAQUELIMIT in size. * * At the end of the grace period, the file is truncated, the first * record is rewritten with updated information and any acquired state * records for successful reclaims of state are written. * * Subsequent records are appended when the first state is issued to * a client and when state is revoked for a client. * * When reading the file in, state issued records that come later in * the file override older ones, since the append log is in cronological order. * If, for some reason, the file can't be read, the grace period is * immediately terminated and all reclaims get NFSERR_NOGRACE. */ /* * Read in the stable storage file. Called by nfssvc() before the nfsd * processes start servicing requests. */ APPLESTATIC void nfsrv_setupstable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; struct nfst_rec *tsp; int error, i, tryagain; off_t off = 0; ssize_t aresid, len; /* * If NFSNSF_UPDATEDONE is set, this is a restart of the nfsds without * a reboot, so state has not been lost. */ if (sf->nsf_flags & NFSNSF_UPDATEDONE) return; /* * Set Grace over just until the file reads successfully. */ nfsrvboottime = time_second; LIST_INIT(&sf->nsf_head); sf->nsf_flags = (NFSNSF_GRACEOVER | NFSNSF_NEEDLOCK); sf->nsf_eograce = NFSD_MONOSEC + NFSRV_LEASEDELTA; if (sf->nsf_fp == NULL) return; error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid || sf->nsf_numboots == 0 || sf->nsf_numboots > NFSNSF_MAXNUMBOOTS) return; /* * Now, read in the boottimes. */ sf->nsf_bootvals = (time_t *)malloc((sf->nsf_numboots + 1) * sizeof (time_t), M_TEMP, M_WAITOK); off = sizeof (struct nfsf_rec); error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); if (error || aresid) { free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } /* * Make sure this nfsrvboottime is different from all recorded * previous ones. */ do { tryagain = 0; for (i = 0; i < sf->nsf_numboots; i++) { if (nfsrvboottime == sf->nsf_bootvals[i]) { nfsrvboottime++; tryagain = 1; break; } } } while (tryagain); sf->nsf_flags |= NFSNSF_OK; off += (sf->nsf_numboots * sizeof (time_t)); /* * Read through the file, building a list of records for grace * checking. * Each record is between sizeof (struct nfst_rec) and * sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1 * and is actually sizeof (struct nfst_rec) + nst_len - 1. */ tsp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, M_TEMP, M_WAITOK); do { error = NFSD_RDWR(UIO_READ, NFSFPVNODE(sf->nsf_fp), (caddr_t)tsp, sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1, off, UIO_SYSSPACE, 0, NFSFPCRED(sf->nsf_fp), &aresid, p); len = (sizeof (struct nfst_rec) + NFSV4_OPAQUELIMIT - 1) - aresid; if (error || (len > 0 && (len < sizeof (struct nfst_rec) || len < (sizeof (struct nfst_rec) + tsp->len - 1)))) { /* * Yuck, the file has been corrupted, so just return * after clearing out any restart state, so the grace period * is over. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { LIST_REMOVE(sp, nst_list); free((caddr_t)sp, M_TEMP); } free((caddr_t)tsp, M_TEMP); sf->nsf_flags &= ~NFSNSF_OK; free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; return; } if (len > 0) { off += sizeof (struct nfst_rec) + tsp->len - 1; /* * Search the list for a matching client. */ LIST_FOREACH(sp, &sf->nsf_head, nst_list) { if (tsp->len == sp->nst_len && !NFSBCMP(tsp->client, sp->nst_client, tsp->len)) break; } if (sp == LIST_END(&sf->nsf_head)) { sp = (struct nfsrv_stable *)malloc(tsp->len + sizeof (struct nfsrv_stable) - 1, M_TEMP, M_WAITOK); NFSBCOPY((caddr_t)tsp, (caddr_t)&sp->nst_rec, sizeof (struct nfst_rec) + tsp->len - 1); LIST_INSERT_HEAD(&sf->nsf_head, sp, nst_list); } else { if (tsp->flag == NFSNST_REVOKE) sp->nst_flag |= NFSNST_REVOKE; else /* * A subsequent timestamp indicates the client * did a setclientid/confirm and any previous * revoke is no longer relevant. */ sp->nst_flag &= ~NFSNST_REVOKE; } } } while (len > 0); free((caddr_t)tsp, M_TEMP); sf->nsf_flags = NFSNSF_OK; sf->nsf_eograce = NFSD_MONOSEC + sf->nsf_lease + NFSRV_LEASEDELTA; } /* * Update the stable storage file, now that the grace period is over. */ APPLESTATIC void nfsrv_updatestable(NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfsrv_stable *sp, *nsp; int i; struct nfsvattr nva; vnode_t vp; #if defined(__FreeBSD_version) && (__FreeBSD_version >= 500000) mount_t mp = NULL; #endif int error; if (sf->nsf_fp == NULL || (sf->nsf_flags & NFSNSF_UPDATEDONE)) return; sf->nsf_flags |= NFSNSF_UPDATEDONE; /* * Ok, we need to rewrite the stable storage file. * - truncate to 0 length * - write the new first structure * - loop through the data structures, writing out any that * have timestamps older than the old boot */ if (sf->nsf_bootvals) { sf->nsf_numboots++; for (i = sf->nsf_numboots - 2; i >= 0; i--) sf->nsf_bootvals[i + 1] = sf->nsf_bootvals[i]; } else { sf->nsf_numboots = 1; sf->nsf_bootvals = (time_t *)malloc(sizeof (time_t), M_TEMP, M_WAITOK); } sf->nsf_bootvals[0] = nfsrvboottime; sf->nsf_lease = nfsrv_lease; NFSVNO_ATTRINIT(&nva); NFSVNO_SETATTRVAL(&nva, size, 0); vp = NFSFPVNODE(sf->nsf_fp); vn_start_write(vp, &mp, V_WAIT); if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsvno_setattr(vp, &nva, NFSFPCRED(sf->nsf_fp), p, NULL); NFSVOPUNLOCK(vp, 0); } else error = EPERM; vn_finished_write(mp); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)&sf->nsf_rec, sizeof (struct nfsf_rec), (off_t)0, UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); if (!error) error = NFSD_RDWR(UIO_WRITE, vp, (caddr_t)sf->nsf_bootvals, sf->nsf_numboots * sizeof (time_t), (off_t)(sizeof (struct nfsf_rec)), UIO_SYSSPACE, IO_SYNC, NFSFPCRED(sf->nsf_fp), NULL, p); free((caddr_t)sf->nsf_bootvals, M_TEMP); sf->nsf_bootvals = NULL; if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); return; } sf->nsf_flags |= NFSNSF_OK; /* * Loop through the list and write out timestamp records for * any clients that successfully reclaimed state. */ LIST_FOREACH_SAFE(sp, &sf->nsf_head, nst_list, nsp) { if (sp->nst_flag & NFSNST_GOTSTATE) { nfsrv_writestable(sp->nst_client, sp->nst_len, NFSNST_NEWSTATE, p); sp->nst_clp->lc_flags |= LCL_STAMPEDSTABLE; } LIST_REMOVE(sp, nst_list); free((caddr_t)sp, M_TEMP); } nfsrv_backupstable(); } /* * Append a record to the stable storage file. */ APPLESTATIC void nfsrv_writestable(u_char *client, int len, int flag, NFSPROC_T *p) { struct nfsrv_stablefirst *sf = &nfsrv_stablefirst; struct nfst_rec *sp; int error; if (!(sf->nsf_flags & NFSNSF_OK) || sf->nsf_fp == NULL) return; sp = (struct nfst_rec *)malloc(sizeof (struct nfst_rec) + len - 1, M_TEMP, M_WAITOK); sp->len = len; NFSBCOPY(client, sp->client, len); sp->flag = flag; error = NFSD_RDWR(UIO_WRITE, NFSFPVNODE(sf->nsf_fp), (caddr_t)sp, sizeof (struct nfst_rec) + len - 1, (off_t)0, UIO_SYSSPACE, (IO_SYNC | IO_APPEND), NFSFPCRED(sf->nsf_fp), NULL, p); free((caddr_t)sp, M_TEMP); if (error) { sf->nsf_flags &= ~NFSNSF_OK; printf("EEK! Can't write NfsV4 stable storage file\n"); } } /* * This function is called during the grace period to mark a client * that successfully reclaimed state. */ static void nfsrv_markstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First find the client structure. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } if (sp == LIST_END(&nfsrv_stablefirst.nsf_head)) return; /* * Now, just mark it and set the nfsclient back pointer. */ sp->nst_flag |= NFSNST_GOTSTATE; sp->nst_clp = clp; } /* * This function is called for a reclaim, to see if it gets grace. * It returns 0 if a reclaim is allowed, 1 otherwise. */ static int nfsrv_checkstable(struct nfsclient *clp) { struct nfsrv_stable *sp; /* * First, find the entry for the client. */ LIST_FOREACH(sp, &nfsrv_stablefirst.nsf_head, nst_list) { if (sp->nst_len == clp->lc_idlen && !NFSBCMP(sp->nst_client, clp->lc_id, sp->nst_len)) break; } /* * If not in the list, state was revoked or no state was issued * since the previous reboot, a reclaim is denied. */ if (sp == LIST_END(&nfsrv_stablefirst.nsf_head) || (sp->nst_flag & NFSNST_REVOKE) || !(nfsrv_stablefirst.nsf_flags & NFSNSF_OK)) return (1); return (0); } /* * Test for and try to clear out a conflicting client. This is called by * nfsrv_lockctrl() and nfsrv_openctrl() when conflicts with other clients * a found. * The trick here is that it can't revoke a conflicting client with an * expired lease unless it holds the v4root lock, so... * If no v4root lock, get the lock and return 1 to indicate "try again". * Return 0 to indicate the conflict can't be revoked and 1 to indicate * the revocation worked and the conflicting client is "bye, bye", so it * can be tried again. * Return 2 to indicate that the vnode is VI_DOOMED after NFSVOPLOCK(). * Unlocks State before a non-zero value is returned. */ static int nfsrv_clientconflict(struct nfsclient *clp, int *haslockp, vnode_t vp, NFSPROC_T *p) { int gotlock, lktype = 0; /* * If lease hasn't expired, we can't fix it. */ if (clp->lc_expiry >= NFSD_MONOSEC || !(nfsrv_stablefirst.nsf_flags & NFSNSF_UPDATEDONE)) return (0); if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) return (2); } return (1); } NFSUNLOCKSTATE(); /* * Ok, we can expire the conflicting client. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); nfsrv_zapclient(clp, p); return (1); } /* * Resolve a delegation conflict. * Returns 0 to indicate the conflict was resolved without sleeping. * Return -1 to indicate that the caller should check for conflicts again. * Return > 0 for an error that should be returned, normally NFSERR_DELAY. * * Also, manipulate the nfsv4root_lock, as required. It isn't changed * for a return of 0, since there was no sleep and it could be required * later. It is released for a return of NFSERR_DELAY, since the caller * will return that error. It is released when a sleep was done waiting * for the delegation to be returned or expire (so that other nfsds can * handle ops). Then, it must be acquired for the write to stable storage. * (This function is somewhat similar to nfsrv_clientconflict(), but * the semantics differ in a couple of subtle ways. The return of 0 * indicates the conflict was resolved without sleeping here, not * that the conflict can't be resolved and the handling of nfsv4root_lock * differs, as noted above.) * Unlocks State before returning a non-zero value. */ static int nfsrv_delegconflict(struct nfsstate *stp, int *haslockp, NFSPROC_T *p, vnode_t vp) { struct nfsclient *clp = stp->ls_clp; int gotlock, error, lktype = 0, retrycnt, zapped_clp; nfsv4stateid_t tstateid; fhandle_t tfh; /* * If the conflict is with an old delegation... */ if (stp->ls_flags & NFSLCK_OLDDELEG) { /* * You can delete it, if it has expired. */ if (clp->lc_delegtime < NFSD_MONOSEC) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); error = -1; goto out; } NFSUNLOCKSTATE(); /* * During this delay, the old delegation could expire or it * could be recovered by the client via an Open with * CLAIM_DELEGATE_PREV. * Release the nfsv4root_lock, if held. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * It's a current delegation, so: * - check to see if the delegation has expired * - if so, get the v4root lock and then expire it */ if (!(stp->ls_flags & NFSLCK_DELEGRECALL)) { /* * - do a recall callback, since not yet done * For now, never allow truncate to be set. To use * truncate safely, it must be guaranteed that the * Remove, Rename or Setattr with size of 0 will * succeed and that would require major changes to * the VFS/Vnode OPs. * Set the expiry time large enough so that it won't expire * until after the callback, then set it correctly, once * the callback is done. (The delegation will now time * out whether or not the Recall worked ok. The timeout * will be extended when ops are done on the delegation * stateid, up to the timelimit.) */ stp->ls_delegtime = NFSD_MONOSEC + (2 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_delegtimelimit = NFSD_MONOSEC + (6 * nfsrv_lease) + NFSRV_LEASEDELTA; stp->ls_flags |= NFSLCK_DELEGRECALL; /* * Loop NFSRV_CBRETRYCNT times while the CBRecall replies * NFSERR_BADSTATEID or NFSERR_BADHANDLE. This is done * in order to try and avoid a race that could happen * when a CBRecall request passed the Open reply with * the delegation in it when transitting the network. * Since nfsrv_docallback will sleep, don't use stp after * the call. */ NFSBCOPY((caddr_t)&stp->ls_stateid, (caddr_t)&tstateid, sizeof (tstateid)); NFSBCOPY((caddr_t)&stp->ls_lfp->lf_fh, (caddr_t)&tfh, sizeof (tfh)); NFSUNLOCKSTATE(); if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } retrycnt = 0; do { error = nfsrv_docallback(clp, NFSV4OP_CBRECALL, &tstateid, 0, &tfh, NULL, NULL, p); retrycnt++; } while ((error == NFSERR_BADSTATEID || error == NFSERR_BADHANDLE) && retrycnt < NFSV4_CBRETRYCNT); error = NFSERR_DELAY; goto out; } if (clp->lc_expiry >= NFSD_MONOSEC && stp->ls_delegtime >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); /* * A recall has been done, but it has not yet expired. * So, RETURN_DELAY. */ if (*haslockp) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } error = NFSERR_DELAY; goto out; } /* * If we don't yet have the lock, just get it and then return, * since we need that before deleting expired state, such as * this delegation. * When getting the lock, unlock the vnode, so other nfsds that * are in progress, won't get stuck waiting for the vnode lock. */ if (*haslockp == 0) { NFSUNLOCKSTATE(); if (vp != NULL) { lktype = NFSVOPISLOCKED(vp); NFSVOPUNLOCK(vp, 0); } NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); do { gotlock = nfsv4_lock(&nfsv4rootfs_lock, 1, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); } while (!gotlock); NFSUNLOCKV4ROOTMUTEX(); *haslockp = 1; if (vp != NULL) { NFSVOPLOCK(vp, lktype | LK_RETRY); if ((vp->v_iflag & VI_DOOMED) != 0) { *haslockp = 0; NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); error = NFSERR_PERM; goto out; } } error = -1; goto out; } NFSUNLOCKSTATE(); /* * Ok, we can delete the expired delegation. * First, write the Revoke record to stable storage and then * clear out the conflict. * Since all other nfsd threads are now blocked, we can safely * sleep without the state changing. */ nfsrv_writestable(clp->lc_id, clp->lc_idlen, NFSNST_REVOKE, p); nfsrv_backupstable(); if (clp->lc_expiry < NFSD_MONOSEC) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); LIST_REMOVE(clp, lc_hash); zapped_clp = 1; } else { nfsrv_freedeleg(stp); zapped_clp = 0; } if (zapped_clp) nfsrv_zapclient(clp, p); error = -1; out: NFSEXITCODE(error); return (error); } /* * Check for a remove allowed, if remove is set to 1 and get rid of * delegations. */ APPLESTATIC int nfsrv_checkremove(vnode_t vp, int remove, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; int error, haslock = 0; fhandle_t nfh; /* * First, get the lock file structure. * (A return of -1 means no associated state, so remove ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); tryagain: NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } if (error == -1) error = 0; goto out; } /* * Now, we must Recall any delegations. */ error = nfsrv_cleandeleg(vp, lfp, NULL, &haslock, p); if (error) { /* * nfsrv_cleandeleg() unlocks state for non-zero * return. */ if (error == -1) goto tryagain; if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } goto out; } /* * Now, look for a conflicting open share. */ if (remove) { /* * If the entry in the directory was the last reference to the * corresponding filesystem object, the object can be destroyed * */ if(lfp->lf_usecount>1) LIST_FOREACH(stp, &lfp->lf_open, ls_file) { if (stp->ls_flags & NFSLCK_WRITEDENY) { error = NFSERR_FILEOPEN; break; } } } NFSUNLOCKSTATE(); if (haslock) { NFSLOCKV4ROOTMUTEX(); nfsv4_unlock(&nfsv4rootfs_lock, 1); NFSUNLOCKV4ROOTMUTEX(); } out: NFSEXITCODE(error); return (error); } /* * Clear out all delegations for the file referred to by lfp. * May return NFSERR_DELAY, if there will be a delay waiting for * delegations to expire. * Returns -1 to indicate it slept while recalling a delegation. * This function has the side effect of deleting the nfslockfile structure, * if it no longer has associated state and didn't have to sleep. * Unlocks State before a non-zero value is returned. */ static int nfsrv_cleandeleg(vnode_t vp, struct nfslockfile *lfp, struct nfsclient *clp, int *haslockp, NFSPROC_T *p) { struct nfsstate *stp, *nstp; int ret = 0; stp = LIST_FIRST(&lfp->lf_deleg); while (stp != LIST_END(&lfp->lf_deleg)) { nstp = LIST_NEXT(stp, ls_file); if (stp->ls_clp != clp) { ret = nfsrv_delegconflict(stp, haslockp, p, vp); if (ret) { /* * nfsrv_delegconflict() unlocks state * when it returns non-zero. */ goto out; } } stp = nstp; } out: NFSEXITCODE(ret); return (ret); } /* * There are certain operations that, when being done outside of NFSv4, * require that any NFSv4 delegation for the file be recalled. * This function is to be called for those cases: * VOP_RENAME() - When a delegation is being recalled for any reason, * the client may have to do Opens against the server, using the file's * final component name. If the file has been renamed on the server, * that component name will be incorrect and the Open will fail. * VOP_REMOVE() - Theoretically, a client could Open a file after it has * been removed on the server, if there is a delegation issued to * that client for the file. I say "theoretically" since clients * normally do an Access Op before the Open and that Access Op will * fail with ESTALE. Note that NFSv2 and 3 don't even do Opens, so * they will detect the file's removal in the same manner. (There is * one case where RFC3530 allows a client to do an Open without first * doing an Access Op, which is passage of a check against the ACE * returned with a Write delegation, but current practice is to ignore * the ACE and always do an Access Op.) * Since the functions can only be called with an unlocked vnode, this * can't be done at this time. * VOP_ADVLOCK() - When a client holds a delegation, it can issue byte range * locks locally in the client, which are not visible to the server. To * deal with this, issuing of delegations for a vnode must be disabled * and all delegations for the vnode recalled. This is done via the * second function, using the VV_DISABLEDELEG vflag on the vnode. */ APPLESTATIC void nfsd_recalldelegation(vnode_t vp, NFSPROC_T *p) { time_t starttime; int error; /* * First, check to see if the server is currently running and it has * been called for a regular file when issuing delegations. */ if (newnfs_numnfsd == 0 || vp->v_type != VREG || nfsrv_issuedelegs == 0) return; KASSERT((NFSVOPISLOCKED(vp) != LK_EXCLUSIVE), ("vp %p is locked", vp)); /* * First, get a reference on the nfsv4rootfs_lock so that an * exclusive lock cannot be acquired by another thread. */ NFSLOCKV4ROOTMUTEX(); nfsv4_getref(&nfsv4rootfs_lock, NULL, NFSV4ROOTLOCKMUTEXPTR, NULL); NFSUNLOCKV4ROOTMUTEX(); /* * Now, call nfsrv_checkremove() in a loop while it returns * NFSERR_DELAY. Return upon any other error or when timed out. */ starttime = NFSD_MONOSEC; do { if (NFSVOPLOCK(vp, LK_EXCLUSIVE) == 0) { error = nfsrv_checkremove(vp, 0, p); NFSVOPUNLOCK(vp, 0); } else error = EPERM; if (error == NFSERR_DELAY) { if (NFSD_MONOSEC - starttime > NFS_REMOVETIMEO) break; /* Sleep for a short period of time */ (void) nfs_catnap(PZERO, 0, "nfsremove"); } } while (error == NFSERR_DELAY); NFSLOCKV4ROOTMUTEX(); nfsv4_relref(&nfsv4rootfs_lock); NFSUNLOCKV4ROOTMUTEX(); } APPLESTATIC void nfsd_disabledelegation(vnode_t vp, NFSPROC_T *p) { #ifdef VV_DISABLEDELEG /* * First, flag issuance of delegations disabled. */ atomic_set_long(&vp->v_vflag, VV_DISABLEDELEG); #endif /* * Then call nfsd_recalldelegation() to get rid of all extant * delegations. */ nfsd_recalldelegation(vp, p); } /* * Check for conflicting locks, etc. and then get rid of delegations. * (At one point I thought that I should get rid of delegations for any * Setattr, since it could potentially disallow the I/O op (read or write) * allowed by the delegation. However, Setattr Ops that aren't changing * the size get a stateid of all 0s, so you can't tell if it is a delegation * for the same client or a different one, so I decided to only get rid * of delegations for other clients when the size is being changed.) * In general, a Setattr can disable NFS I/O Ops that are outstanding, such * as Write backs, even if there is no delegation, so it really isn't any * different?) */ APPLESTATIC int nfsrv_checksetattr(vnode_t vp, struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct nfsexstuff *exp, NFSPROC_T *p) { struct nfsstate st, *stp = &st; struct nfslock lo, *lop = &lo; int error = 0; nfsquad_t clientid; if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_SIZE)) { stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); lop->lo_first = nvap->na_size; } else { stp->ls_flags = 0; lop->lo_first = 0; } if (NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNER) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_OWNERGROUP) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_MODE) || NFSISSET_ATTRBIT(attrbitp, NFSATTRBIT_ACL)) stp->ls_flags |= NFSLCK_SETATTR; if (stp->ls_flags == 0) goto out; lop->lo_end = NFS64BITSSET; lop->lo_flags = NFSLCK_WRITE; stp->ls_ownerlen = 0; stp->ls_op = NULL; stp->ls_uid = nd->nd_cred->cr_uid; stp->ls_stateid.seqid = stateidp->seqid; clientid.lval[0] = stp->ls_stateid.other[0] = stateidp->other[0]; clientid.lval[1] = stp->ls_stateid.other[1] = stateidp->other[1]; stp->ls_stateid.other[2] = stateidp->other[2]; error = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, stateidp, exp, nd, p); out: NFSEXITCODE2(error, nd); return (error); } /* * Check for a write delegation and do a CBGETATTR if there is one, updating * the attributes, as required. * Should I return an error if I can't get the attributes? (For now, I'll * just return ok. */ APPLESTATIC int nfsrv_checkgetattr(struct nfsrv_descript *nd, vnode_t vp, struct nfsvattr *nvap, nfsattrbit_t *attrbitp, struct ucred *cred, NFSPROC_T *p) { struct nfsstate *stp; struct nfslockfile *lfp; struct nfsclient *clp; struct nfsvattr nva; fhandle_t nfh; int error = 0; nfsattrbit_t cbbits; u_quad_t delegfilerev; NFSCBGETATTR_ATTRBIT(attrbitp, &cbbits); if (!NFSNONZERO_ATTRBIT(&cbbits)) goto out; /* * Get the lock file structure. * (A return of -1 means no associated state, so return ok.) */ error = nfsrv_getlockfh(vp, NFSLCK_CHECK, NULL, &nfh, p); NFSLOCKSTATE(); if (!error) error = nfsrv_getlockfile(NFSLCK_CHECK, NULL, &lfp, &nfh, 0); if (error) { NFSUNLOCKSTATE(); if (error == -1) error = 0; goto out; } /* * Now, look for a write delegation. */ LIST_FOREACH(stp, &lfp->lf_deleg, ls_file) { if (stp->ls_flags & NFSLCK_DELEGWRITE) break; } if (stp == LIST_END(&lfp->lf_deleg)) { NFSUNLOCKSTATE(); goto out; } clp = stp->ls_clp; delegfilerev = stp->ls_filerev; /* * If the Write delegation was issued as a part of this Compound RPC * or if we have an Implied Clientid (used in a previous Op in this * compound) and it is the client the delegation was issued to, * just return ok. * I also assume that it is from the same client iff the network * host IP address is the same as the callback address. (Not * exactly correct by the RFC, but avoids a lot of Getattr * callbacks.) */ if (nd->nd_compref == stp->ls_compref || ((nd->nd_flag & ND_IMPLIEDCLID) && clp->lc_clientid.qval == nd->nd_clientid.qval) || nfsaddr2_match(clp->lc_req.nr_nam, nd->nd_nam)) { NFSUNLOCKSTATE(); goto out; } /* * We are now done with the delegation state structure, * so the statelock can be released and we can now tsleep(). */ /* * Now, we must do the CB Getattr callback, to see if Change or Size * has changed. */ if (clp->lc_expiry >= NFSD_MONOSEC) { NFSUNLOCKSTATE(); NFSVNO_ATTRINIT(&nva); nva.na_filerev = NFS64BITSSET; error = nfsrv_docallback(clp, NFSV4OP_CBGETATTR, NULL, 0, &nfh, &nva, &cbbits, p); if (!error) { if ((nva.na_filerev != NFS64BITSSET && nva.na_filerev > delegfilerev) || (NFSVNO_ISSETSIZE(&nva) && nva.na_size != nvap->na_size)) { error = nfsvno_updfilerev(vp, nvap, cred, p); if (NFSVNO_ISSETSIZE(&nva)) nvap->na_size = nva.na_size; } } else error = 0; /* Ignore callback errors for now. */ } else { NFSUNLOCKSTATE(); } out: NFSEXITCODE2(error, nd); return (error); } /* * This function looks for openowners that haven't had any opens for * a while and throws them away. Called by an nfsd when NFSNSF_NOOPENS * is set. */ APPLESTATIC void nfsrv_throwawayopens(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfsstate *stp, *nstp; int i; NFSLOCKSTATE(); nfsrv_stablefirst.nsf_flags &= ~NFSNSF_NOOPENS; /* * For each client... */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { LIST_FOREACH_SAFE(stp, &clp->lc_open, ls_list, nstp) { if (LIST_EMPTY(&stp->ls_open) && (stp->ls_noopens > NFSNOOPEN || (nfsrv_openpluslock * 2) > nfsrv_v4statelimit)) nfsrv_freeopenowner(stp, 0, p); } } } NFSUNLOCKSTATE(); } /* * This function checks to see if the credentials are the same. * Returns 1 for not same, 0 otherwise. */ static int nfsrv_notsamecredname(struct nfsrv_descript *nd, struct nfsclient *clp) { if (nd->nd_flag & ND_GSS) { if (!(clp->lc_flags & LCL_GSS)) return (1); if (clp->lc_flags & LCL_NAME) { if (nd->nd_princlen != clp->lc_namelen || NFSBCMP(nd->nd_principal, clp->lc_name, clp->lc_namelen)) return (1); else return (0); } if (nd->nd_cred->cr_uid == clp->lc_uid) return (0); else return (1); } else if (clp->lc_flags & LCL_GSS) return (1); /* * For AUTH_SYS, allow the same uid or root. (This is underspecified * in RFC3530, which talks about principals, but doesn't say anything * about uids for AUTH_SYS.) */ if (nd->nd_cred->cr_uid == clp->lc_uid || nd->nd_cred->cr_uid == 0) return (0); else return (1); } /* * Calculate the lease expiry time. */ static time_t nfsrv_leaseexpiry(void) { if (nfsrv_stablefirst.nsf_eograce > NFSD_MONOSEC) return (NFSD_MONOSEC + 2 * (nfsrv_lease + NFSRV_LEASEDELTA)); return (NFSD_MONOSEC + nfsrv_lease + NFSRV_LEASEDELTA); } /* * Delay the delegation timeout as far as ls_delegtimelimit, as required. */ static void nfsrv_delaydelegtimeout(struct nfsstate *stp) { if ((stp->ls_flags & NFSLCK_DELEGRECALL) == 0) return; if ((stp->ls_delegtime + 15) > NFSD_MONOSEC && stp->ls_delegtime < stp->ls_delegtimelimit) { stp->ls_delegtime += nfsrv_lease; if (stp->ls_delegtime > stp->ls_delegtimelimit) stp->ls_delegtime = stp->ls_delegtimelimit; } } /* * This function checks to see if there is any other state associated * with the openowner for this Open. * It returns 1 if there is no other state, 0 otherwise. */ static int nfsrv_nootherstate(struct nfsstate *stp) { struct nfsstate *tstp; LIST_FOREACH(tstp, &stp->ls_openowner->ls_open, ls_list) { if (tstp != stp || !LIST_EMPTY(&tstp->ls_lock)) return (0); } return (1); } /* * Create a list of lock deltas (changes to local byte range locking * that can be rolled back using the list) and apply the changes via * nfsvno_advlock(). Optionally, lock the list. It is expected that either * the rollback or update function will be called after this. * It returns an error (and rolls back, as required), if any nfsvno_advlock() * call fails. If it returns an error, it will unlock the list. */ static int nfsrv_locallock(vnode_t vp, struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfslock *lop, *nlop; int error = 0; /* Loop through the list of locks. */ lop = LIST_FIRST(&lfp->lf_locallock); while (first < end && lop != NULL) { nlop = LIST_NEXT(lop, lo_lckowner); if (first >= lop->lo_end) { /* not there yet */ lop = nlop; } else if (first < lop->lo_first) { /* new one starts before entry in list */ if (end <= lop->lo_first) { /* no overlap between old and new */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, lop->lo_first, cfp, p); if (error != 0) break; first = lop->lo_first; } } else { /* new one overlaps this entry in list */ if (end <= lop->lo_end) { /* overlaps all of new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, end, cfp, p); if (error != 0) break; first = end; } else { /* handle fragment overlapped with new one */ error = nfsrv_dolocal(vp, lfp, flags, lop->lo_flags, first, lop->lo_end, cfp, p); if (error != 0) break; first = lop->lo_end; lop = nlop; } } } if (first < end && error == 0) /* handle fragment past end of list */ error = nfsrv_dolocal(vp, lfp, flags, NFSLCK_UNLOCK, first, end, cfp, p); NFSEXITCODE(error); return (error); } /* * Local lock unlock. Unlock all byte ranges that are no longer locked * by NFSv4. To do this, unlock any subranges of first-->end that * do not overlap with the byte ranges of any lock in the lfp->lf_lock * list. This list has all locks for the file held by other * tuples. The list is ordered by increasing * lo_first value, but may have entries that overlap each other, for * the case of read locks. */ static void nfsrv_localunlock(vnode_t vp, struct nfslockfile *lfp, uint64_t init_first, uint64_t init_end, NFSPROC_T *p) { struct nfslock *lop; uint64_t first, end, prevfirst; first = init_first; end = init_end; while (first < init_end) { /* Loop through all nfs locks, adjusting first and end */ prevfirst = 0; LIST_FOREACH(lop, &lfp->lf_lock, lo_lckfile) { KASSERT(prevfirst <= lop->lo_first, ("nfsv4 locks out of order")); KASSERT(lop->lo_first < lop->lo_end, ("nfsv4 bogus lock")); prevfirst = lop->lo_first; if (first >= lop->lo_first && first < lop->lo_end) /* * Overlaps with initial part, so trim * off that initial part by moving first past * it. */ first = lop->lo_end; else if (end > lop->lo_first && lop->lo_first > first) { /* * This lock defines the end of the * segment to unlock, so set end to the * start of it and break out of the loop. */ end = lop->lo_first; break; } if (first >= end) /* * There is no segment left to do, so * break out of this loop and then exit * the outer while() since first will be set * to end, which must equal init_end here. */ break; } if (first < end) { /* Unlock this segment */ (void) nfsrv_dolocal(vp, lfp, NFSLCK_UNLOCK, NFSLCK_READ, first, end, NULL, p); nfsrv_locallock_commit(lfp, NFSLCK_UNLOCK, first, end); } /* * Now move past this segment and look for any further * segment in the range, if there is one. */ first = end; end = init_end; } } /* * Do the local lock operation and update the rollback list, as required. * Perform the rollback and return the error if nfsvno_advlock() fails. */ static int nfsrv_dolocal(vnode_t vp, struct nfslockfile *lfp, int flags, int oldflags, uint64_t first, uint64_t end, struct nfslockconflict *cfp, NFSPROC_T *p) { struct nfsrollback *rlp; int error = 0, ltype, oldltype; if (flags & NFSLCK_WRITE) ltype = F_WRLCK; else if (flags & NFSLCK_READ) ltype = F_RDLCK; else ltype = F_UNLCK; if (oldflags & NFSLCK_WRITE) oldltype = F_WRLCK; else if (oldflags & NFSLCK_READ) oldltype = F_RDLCK; else oldltype = F_UNLCK; if (ltype == oldltype || (oldltype == F_WRLCK && ltype == F_RDLCK)) /* nothing to do */ goto out; error = nfsvno_advlock(vp, ltype, first, end, p); if (error != 0) { if (cfp != NULL) { cfp->cl_clientid.lval[0] = 0; cfp->cl_clientid.lval[1] = 0; cfp->cl_first = 0; cfp->cl_end = NFS64BITSSET; cfp->cl_flags = NFSLCK_WRITE; cfp->cl_ownerlen = 5; NFSBCOPY("LOCAL", cfp->cl_owner, 5); } nfsrv_locallock_rollback(vp, lfp, p); } else if (ltype != F_UNLCK) { rlp = malloc(sizeof (struct nfsrollback), M_NFSDROLLBACK, M_WAITOK); rlp->rlck_first = first; rlp->rlck_end = end; rlp->rlck_type = oldltype; LIST_INSERT_HEAD(&lfp->lf_rollback, rlp, rlck_list); } out: NFSEXITCODE(error); return (error); } /* * Roll back local lock changes and free up the rollback list. */ static void nfsrv_locallock_rollback(vnode_t vp, struct nfslockfile *lfp, NFSPROC_T *p) { struct nfsrollback *rlp, *nrlp; LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) { (void) nfsvno_advlock(vp, rlp->rlck_type, rlp->rlck_first, rlp->rlck_end, p); free(rlp, M_NFSDROLLBACK); } LIST_INIT(&lfp->lf_rollback); } /* * Update local lock list and delete rollback list (ie now committed to the * local locks). Most of the work is done by the internal function. */ static void nfsrv_locallock_commit(struct nfslockfile *lfp, int flags, uint64_t first, uint64_t end) { struct nfsrollback *rlp, *nrlp; struct nfslock *new_lop, *other_lop; new_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); if (flags & (NFSLCK_READ | NFSLCK_WRITE)) other_lop = malloc(sizeof (struct nfslock), M_NFSDLOCK, M_WAITOK); else other_lop = NULL; new_lop->lo_flags = flags; new_lop->lo_first = first; new_lop->lo_end = end; nfsrv_updatelock(NULL, &new_lop, &other_lop, lfp); if (new_lop != NULL) free(new_lop, M_NFSDLOCK); if (other_lop != NULL) free(other_lop, M_NFSDLOCK); /* and get rid of the rollback list */ LIST_FOREACH_SAFE(rlp, &lfp->lf_rollback, rlck_list, nrlp) free(rlp, M_NFSDROLLBACK); LIST_INIT(&lfp->lf_rollback); } /* * Lock the struct nfslockfile for local lock updating. */ static void nfsrv_locklf(struct nfslockfile *lfp) { int gotlock; /* lf_usecount ensures *lfp won't be free'd */ lfp->lf_usecount++; do { gotlock = nfsv4_lock(&lfp->lf_locallock_lck, 1, NULL, NFSSTATEMUTEXPTR, NULL); } while (gotlock == 0); lfp->lf_usecount--; } /* * Unlock the struct nfslockfile after local lock updating. */ static void nfsrv_unlocklf(struct nfslockfile *lfp) { nfsv4_unlock(&lfp->lf_locallock_lck, 0); } /* * Clear out all state for the NFSv4 server. * Must be called by a thread that can sleep when no nfsds are running. */ void nfsrv_throwawayallstate(NFSPROC_T *p) { struct nfsclient *clp, *nclp; struct nfslockfile *lfp, *nlfp; int i; /* * For each client, clean out the state and then free the structure. */ for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH_SAFE(clp, &nfsclienthash[i], lc_hash, nclp) { nfsrv_cleanclient(clp, p); nfsrv_freedeleglist(&clp->lc_deleg); nfsrv_freedeleglist(&clp->lc_olddeleg); free(clp->lc_stateid, M_NFSDCLIENT); free(clp, M_NFSDCLIENT); } } /* * Also, free up any remaining lock file structures. */ for (i = 0; i < nfsrv_lockhashsize; i++) { LIST_FOREACH_SAFE(lfp, &nfslockhash[i], lf_hash, nlfp) { printf("nfsd unload: fnd a lock file struct\n"); nfsrv_freenfslockfile(lfp); } } } /* * Check the sequence# for the session and slot provided as an argument. * Also, renew the lease if the session will return NFS_OK. */ int nfsrv_checksequence(struct nfsrv_descript *nd, uint32_t sequenceid, uint32_t *highest_slotidp, uint32_t *target_highest_slotidp, int cache_this, uint32_t *sflagsp, NFSPROC_T *p) { struct nfsdsession *sep; struct nfssessionhash *shp; int error; SVCXPRT *savxprt; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); return (NFSERR_BADSESSION); } error = nfsv4_seqsession(sequenceid, nd->nd_slotid, *highest_slotidp, sep->sess_slots, NULL, NFSV4_SLOTS - 1); if (error != 0) { NFSUNLOCKSESSION(shp); return (error); } if (cache_this != 0) nd->nd_flag |= ND_SAVEREPLY; /* Renew the lease. */ sep->sess_clp->lc_expiry = nfsrv_leaseexpiry(); nd->nd_clientid.qval = sep->sess_clp->lc_clientid.qval; nd->nd_flag |= ND_IMPLIEDCLID; /* * If this session handles the backchannel, save the nd_xprt for this * RPC, since this is the one being used. */ if (sep->sess_clp->lc_req.nr_client != NULL && (sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) { savxprt = sep->sess_cbsess.nfsess_xprt; SVC_ACQUIRE(nd->nd_xprt); nd->nd_xprt->xp_p2 = sep->sess_clp->lc_req.nr_client->cl_private; nd->nd_xprt->xp_idletimeout = 0; /* Disable timeout. */ sep->sess_cbsess.nfsess_xprt = nd->nd_xprt; if (savxprt != NULL) SVC_RELEASE(savxprt); } *sflagsp = 0; if (sep->sess_clp->lc_req.nr_client == NULL) *sflagsp |= NFSV4SEQ_CBPATHDOWN; NFSUNLOCKSESSION(shp); if (error == NFSERR_EXPIRED) { *sflagsp |= NFSV4SEQ_EXPIREDALLSTATEREVOKED; error = 0; } else if (error == NFSERR_ADMINREVOKED) { *sflagsp |= NFSV4SEQ_ADMINSTATEREVOKED; error = 0; } *highest_slotidp = *target_highest_slotidp = NFSV4_SLOTS - 1; return (0); } /* * Check/set reclaim complete for this session/clientid. */ int nfsrv_checkreclaimcomplete(struct nfsrv_descript *nd) { struct nfsdsession *sep; struct nfssessionhash *shp; int error = 0; shp = NFSSESSIONHASH(nd->nd_sessionid); NFSLOCKSTATE(); NFSLOCKSESSION(shp); sep = nfsrv_findsession(nd->nd_sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } /* Check to see if reclaim complete has already happened. */ if ((sep->sess_clp->lc_flags & LCL_RECLAIMCOMPLETE) != 0) error = NFSERR_COMPLETEALREADY; else sep->sess_clp->lc_flags |= LCL_RECLAIMCOMPLETE; NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (error); } /* * Cache the reply in a session slot. */ void nfsrv_cache_session(uint8_t *sessionid, uint32_t slotid, int repstat, struct mbuf **m) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); if (sep == NULL) { NFSUNLOCKSESSION(shp); printf("nfsrv_cache_session: no session\n"); m_freem(*m); return; } nfsv4_seqsess_cacherep(slotid, sep->sess_slots, repstat, m); NFSUNLOCKSESSION(shp); } /* * Search for a session that matches the sessionid. */ static struct nfsdsession * nfsrv_findsession(uint8_t *sessionid) { struct nfsdsession *sep; struct nfssessionhash *shp; shp = NFSSESSIONHASH(sessionid); LIST_FOREACH(sep, &shp->list, sess_hash) { if (!NFSBCMP(sessionid, sep->sess_sessionid, NFSX_V4SESSIONID)) break; } return (sep); } /* * Destroy a session. */ int nfsrv_destroysession(struct nfsrv_descript *nd, uint8_t *sessionid) { int error, samesess; samesess = 0; if (!NFSBCMP(sessionid, nd->nd_sessionid, NFSX_V4SESSIONID)) { samesess = 1; if ((nd->nd_flag & ND_LASTOP) == 0) return (NFSERR_BADSESSION); } error = nfsrv_freesession(NULL, sessionid); if (error == 0 && samesess != 0) nd->nd_flag &= ~ND_HASSEQUENCE; return (error); } /* * Free up a session structure. */ static int nfsrv_freesession(struct nfsdsession *sep, uint8_t *sessionid) { struct nfssessionhash *shp; int i; NFSLOCKSTATE(); if (sep == NULL) { shp = NFSSESSIONHASH(sessionid); NFSLOCKSESSION(shp); sep = nfsrv_findsession(sessionid); } else { shp = NFSSESSIONHASH(sep->sess_sessionid); NFSLOCKSESSION(shp); } if (sep != NULL) { sep->sess_refcnt--; if (sep->sess_refcnt > 0) { NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); return (0); } LIST_REMOVE(sep, sess_hash); LIST_REMOVE(sep, sess_list); } NFSUNLOCKSESSION(shp); NFSUNLOCKSTATE(); if (sep == NULL) return (NFSERR_BADSESSION); for (i = 0; i < NFSV4_SLOTS; i++) if (sep->sess_slots[i].nfssl_reply != NULL) m_freem(sep->sess_slots[i].nfssl_reply); if (sep->sess_cbsess.nfsess_xprt != NULL) SVC_RELEASE(sep->sess_cbsess.nfsess_xprt); free(sep, M_NFSDSESSION); return (0); } /* * Free a stateid. * RFC5661 says that it should fail when there are associated opens, locks * or delegations. Since stateids represent opens, I don't see how you can * free an open stateid (it will be free'd when closed), so this function * only works for lock stateids (freeing the lock_owner) or delegations. */ int nfsrv_freestateid(struct nfsrv_descript *nd, nfsv4stateid_t *stateidp, NFSPROC_T *p) { struct nfsclient *clp; struct nfsstate *stp; int error; NFSLOCKSTATE(); /* * Look up the stateid */ error = nfsrv_getclient((nfsquad_t)((u_quad_t)0), CLOPS_RENEW, &clp, NULL, (nfsquad_t)((u_quad_t)0), 0, nd, p); if (error == 0) { /* First, check for a delegation. */ LIST_FOREACH(stp, &clp->lc_deleg, ls_list) { if (!NFSBCMP(stp->ls_stateid.other, stateidp->other, NFSX_STATEIDOTHER)) break; } if (stp != NULL) { nfsrv_freedeleg(stp); NFSUNLOCKSTATE(); return (error); } } /* Not a delegation, try for a lock_owner. */ if (error == 0) error = nfsrv_getstate(clp, stateidp, 0, &stp); if (error == 0 && ((stp->ls_flags & (NFSLCK_OPEN | NFSLCK_DELEGREAD | NFSLCK_DELEGWRITE)) != 0 || (stp->ls_flags & NFSLCK_LOCK) == 0)) /* Not a lock_owner stateid. */ error = NFSERR_LOCKSHELD; if (error == 0 && !LIST_EMPTY(&stp->ls_lock)) error = NFSERR_LOCKSHELD; if (error == 0) nfsrv_freelockowner(stp, NULL, 0, p); NFSUNLOCKSTATE(); return (error); } /* * Generate the xdr for an NFSv4.1 CBSequence Operation. */ static int nfsv4_setcbsequence(struct nfsrv_descript *nd, struct nfsclient *clp, int dont_replycache, struct nfsdsession **sepp) { struct nfsdsession *sep; uint32_t *tl, slotseq = 0; int maxslot, slotpos; uint8_t sessionid[NFSX_V4SESSIONID]; int error; error = nfsv4_getcbsession(clp, sepp); if (error != 0) return (error); sep = *sepp; (void)nfsv4_sequencelookup(NULL, &sep->sess_cbsess, &slotpos, &maxslot, &slotseq, sessionid); KASSERT(maxslot >= 0, ("nfsv4_setcbsequence neg maxslot")); /* Build the Sequence arguments. */ NFSM_BUILD(tl, uint32_t *, NFSX_V4SESSIONID + 5 * NFSX_UNSIGNED); bcopy(sessionid, tl, NFSX_V4SESSIONID); tl += NFSX_V4SESSIONID / NFSX_UNSIGNED; nd->nd_slotseq = tl; *tl++ = txdr_unsigned(slotseq); *tl++ = txdr_unsigned(slotpos); *tl++ = txdr_unsigned(maxslot); if (dont_replycache == 0) *tl++ = newnfs_true; else *tl++ = newnfs_false; *tl = 0; /* No referring call list, for now. */ nd->nd_flag |= ND_HASSEQUENCE; return (0); } /* * Get a session for the callback. */ static int nfsv4_getcbsession(struct nfsclient *clp, struct nfsdsession **sepp) { struct nfsdsession *sep; NFSLOCKSTATE(); LIST_FOREACH(sep, &clp->lc_session, sess_list) { if ((sep->sess_crflags & NFSV4CRSESS_CONNBACKCHAN) != 0) break; } if (sep == NULL) { NFSUNLOCKSTATE(); return (NFSERR_BADSESSION); } sep->sess_refcnt++; *sepp = sep; NFSUNLOCKSTATE(); return (0); } /* * Free up all backchannel xprts. This needs to be done when the nfsd threads * exit, since those transports will all be going away. * This is only called after all the nfsd threads are done performing RPCs, * so locking shouldn't be an issue. */ APPLESTATIC void nfsrv_freeallbackchannel_xprts(void) { struct nfsdsession *sep; struct nfsclient *clp; SVCXPRT *xprt; int i; for (i = 0; i < nfsrv_clienthashsize; i++) { LIST_FOREACH(clp, &nfsclienthash[i], lc_hash) { LIST_FOREACH(sep, &clp->lc_session, sess_list) { xprt = sep->sess_cbsess.nfsess_xprt; sep->sess_cbsess.nfsess_xprt = NULL; if (xprt != NULL) SVC_RELEASE(xprt); } } } } Index: head/sys/fs/procfs/procfs_ioctl.c =================================================================== --- head/sys/fs/procfs/procfs_ioctl.c (revision 326267) +++ head/sys/fs/procfs/procfs_ioctl.c (revision 326268) @@ -1,220 +1,222 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #ifdef COMPAT_FREEBSD32 struct procfs_status32 { int state; /* Running, stopped, something else? */ int flags; /* Any flags */ unsigned int events; /* Events to stop on */ int why; /* What event, if any, proc stopped on */ unsigned int val; /* Any extra data */ }; #define PIOCWAIT32 _IOR('p', 4, struct procfs_status32) #define PIOCSTATUS32 _IOR('p', 6, struct procfs_status32) #endif /* * Process ioctls */ int procfs_ioctl(PFS_IOCTL_ARGS) { struct procfs_status *ps; #ifdef COMPAT_FREEBSD32 struct procfs_status32 *ps32; #endif int error, flags, sig; #ifdef COMPAT_FREEBSD6 int ival; #endif KASSERT(p != NULL, ("%s() called without a process", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); error = 0; switch (cmd) { #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IOC(IOC_IN, 'p', 1, 0): #endif #ifdef COMPAT_FREEBSD6 case _IO('p', 1): ival = IOCPARM_IVAL(data); data = &ival; #endif case PIOCBIS: p->p_stops |= *(unsigned int *)data; break; #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IOC(IOC_IN, 'p', 2, 0): #endif #ifdef COMPAT_FREEBSD6 case _IO('p', 2): ival = IOCPARM_IVAL(data); data = &ival; #endif case PIOCBIC: p->p_stops &= ~*(unsigned int *)data; break; #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IOC(IOC_IN, 'p', 3, 0): #endif #ifdef COMPAT_FREEBSD6 case _IO('p', 3): ival = IOCPARM_IVAL(data); data = &ival; #endif case PIOCSFL: flags = *(unsigned int *)data; if (flags & PF_ISUGID) { /* * XXXRW: Is this specific check required here, as * p_candebug() should implement it, or other checks * are missing. */ error = priv_check(td, PRIV_DEBUG_SUGID); if (error) break; } p->p_pfsflags = flags; break; case PIOCGFL: *(unsigned int *)data = p->p_pfsflags; break; case PIOCWAIT: while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) { /* sleep until p stops */ _PHOLD(p); error = msleep(&p->p_stype, &p->p_mtx, PWAIT|PCATCH, "pioctl", 0); _PRELE(p); if (error != 0) break; } /* fall through to PIOCSTATUS */ case PIOCSTATUS: ps = (struct procfs_status *)data; ps->state = (p->p_step == 0); ps->flags = 0; /* nope */ ps->events = p->p_stops; ps->why = p->p_step ? p->p_stype : 0; ps->val = p->p_step ? p->p_xsig : 0; break; #ifdef COMPAT_FREEBSD32 case PIOCWAIT32: while (p->p_step == 0 && (p->p_flag & P_WEXIT) == 0) { /* sleep until p stops */ _PHOLD(p); error = msleep(&p->p_stype, &p->p_mtx, PWAIT|PCATCH, "pioctl", 0); _PRELE(p); if (error != 0) break; } /* fall through to PIOCSTATUS32 */ case PIOCSTATUS32: ps32 = (struct procfs_status32 *)data; ps32->state = (p->p_step == 0); ps32->flags = 0; /* nope */ ps32->events = p->p_stops; ps32->why = p->p_step ? p->p_stype : 0; ps32->val = p->p_step ? p->p_xsig : 0; break; #endif #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) case _IOC(IOC_IN, 'p', 5, 0): #endif #ifdef COMPAT_FREEBSD6 case _IO('p', 5): ival = IOCPARM_IVAL(data); data = &ival; #endif case PIOCCONT: if (p->p_step == 0) break; sig = *(unsigned int *)data; if (sig != 0 && !_SIG_VALID(sig)) { error = EINVAL; break; } #if 0 p->p_step = 0; if (P_SHOULDSTOP(p)) { p->p_xsig = sig; p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG); PROC_SLOCK(p); thread_unsuspend(p); PROC_SUNLOCK(p); } else if (sig) kern_psignal(p, sig); #else if (sig) kern_psignal(p, sig); p->p_step = 0; wakeup(&p->p_step); #endif break; default: error = (ENOTTY); } return (error); } /* * Clean up on last close */ int procfs_close(PFS_CLOSE_ARGS) { if (p != NULL && (p->p_pfsflags & PF_LINGER) == 0) { PROC_LOCK_ASSERT(p, MA_OWNED); p->p_pfsflags = 0; p->p_stops = 0; p->p_step = 0; wakeup(&p->p_step); } return (0); } Index: head/sys/fs/pseudofs/pseudofs.c =================================================================== --- head/sys/fs/pseudofs/pseudofs.c (revision 326267) +++ head/sys/fs/pseudofs/pseudofs.c (revision 326268) @@ -1,454 +1,456 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pseudofs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PFSNODES, "pfs_nodes", "pseudofs nodes"); SYSCTL_NODE(_vfs, OID_AUTO, pfs, CTLFLAG_RW, 0, "pseudofs"); #ifdef PSEUDOFS_TRACE int pfs_trace; SYSCTL_INT(_vfs_pfs, OID_AUTO, trace, CTLFLAG_RW, &pfs_trace, 0, "enable tracing of pseudofs vnode operations"); #endif #if PFS_FSNAMELEN != MFSNAMELEN #error "PFS_FSNAMELEN is not equal to MFSNAMELEN" #endif /* * Allocate and initialize a node */ static struct pfs_node * pfs_alloc_node(struct pfs_info *pi, const char *name, pfs_type_t type) { struct pfs_node *pn; KASSERT(strlen(name) < PFS_NAMELEN, ("%s(): node name is too long", __func__)); pn = malloc(sizeof *pn, M_PFSNODES, M_WAITOK|M_ZERO); mtx_init(&pn->pn_mutex, "pfs_node", NULL, MTX_DEF | MTX_DUPOK); strlcpy(pn->pn_name, name, sizeof pn->pn_name); pn->pn_type = type; pn->pn_info = pi; return (pn); } /* * Add a node to a directory */ static void pfs_add_node(struct pfs_node *parent, struct pfs_node *pn) { #ifdef INVARIANTS struct pfs_node *iter; #endif KASSERT(parent != NULL, ("%s(): parent is NULL", __func__)); KASSERT(pn->pn_parent == NULL, ("%s(): node already has a parent", __func__)); KASSERT(parent->pn_info != NULL, ("%s(): parent has no pn_info", __func__)); KASSERT(parent->pn_type == pfstype_dir || parent->pn_type == pfstype_procdir || parent->pn_type == pfstype_root, ("%s(): parent is not a directory", __func__)); #ifdef INVARIANTS /* XXX no locking! */ if (pn->pn_type == pfstype_procdir) for (iter = parent; iter != NULL; iter = iter->pn_parent) KASSERT(iter->pn_type != pfstype_procdir, ("%s(): nested process directories", __func__)); for (iter = parent->pn_nodes; iter != NULL; iter = iter->pn_next) { KASSERT(strcmp(pn->pn_name, iter->pn_name) != 0, ("%s(): homonymous siblings", __func__)); if (pn->pn_type == pfstype_procdir) KASSERT(iter->pn_type != pfstype_procdir, ("%s(): sibling process directories", __func__)); } #endif pn->pn_parent = parent; pfs_fileno_alloc(pn); pfs_lock(parent); pn->pn_next = parent->pn_nodes; if ((parent->pn_flags & PFS_PROCDEP) != 0) pn->pn_flags |= PFS_PROCDEP; parent->pn_nodes = pn; pfs_unlock(parent); } /* * Detach a node from its aprent */ static void pfs_detach_node(struct pfs_node *pn) { struct pfs_node *parent = pn->pn_parent; struct pfs_node **iter; KASSERT(parent != NULL, ("%s(): node has no parent", __func__)); KASSERT(parent->pn_info == pn->pn_info, ("%s(): parent has different pn_info", __func__)); pfs_lock(parent); iter = &parent->pn_nodes; while (*iter != NULL) { if (*iter == pn) { *iter = pn->pn_next; break; } iter = &(*iter)->pn_next; } pn->pn_parent = NULL; pfs_unlock(parent); } /* * Add . and .. to a directory */ static void pfs_fixup_dir(struct pfs_node *parent) { struct pfs_node *pn; pn = pfs_alloc_node(parent->pn_info, ".", pfstype_this); pfs_add_node(parent, pn); pn = pfs_alloc_node(parent->pn_info, "..", pfstype_parent); pfs_add_node(parent, pn); } /* * Create a directory */ struct pfs_node * pfs_create_dir(struct pfs_node *parent, const char *name, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags) { struct pfs_node *pn; pn = pfs_alloc_node(parent->pn_info, name, (flags & PFS_PROCDEP) ? pfstype_procdir : pfstype_dir); pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; pfs_add_node(parent, pn); pfs_fixup_dir(pn); return (pn); } /* * Create a file */ struct pfs_node * pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags) { struct pfs_node *pn; pn = pfs_alloc_node(parent->pn_info, name, pfstype_file); pn->pn_fill = fill; pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; pfs_add_node(parent, pn); return (pn); } /* * Create a symlink */ struct pfs_node * pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags) { struct pfs_node *pn; pn = pfs_alloc_node(parent->pn_info, name, pfstype_symlink); pn->pn_fill = fill; pn->pn_attr = attr; pn->pn_vis = vis; pn->pn_destroy = destroy; pn->pn_flags = flags; pfs_add_node(parent, pn); return (pn); } /* * Locate a node by name */ struct pfs_node * pfs_find_node(struct pfs_node *parent, const char *name) { struct pfs_node *pn; pfs_lock(parent); for (pn = parent->pn_nodes; pn != NULL; pn = pn->pn_next) if (strcmp(pn->pn_name, name) == 0) break; pfs_unlock(parent); return (pn); } /* * Destroy a node and all its descendants. If the node to be destroyed * has a parent, the parent's mutex must be held. */ int pfs_destroy(struct pfs_node *pn) { struct pfs_node *iter; KASSERT(pn != NULL, ("%s(): node is NULL", __func__)); KASSERT(pn->pn_info != NULL, ("%s(): node has no pn_info", __func__)); if (pn->pn_parent) pfs_detach_node(pn); /* destroy children */ if (pn->pn_type == pfstype_dir || pn->pn_type == pfstype_procdir || pn->pn_type == pfstype_root) { pfs_lock(pn); while (pn->pn_nodes != NULL) { iter = pn->pn_nodes; pn->pn_nodes = iter->pn_next; iter->pn_parent = NULL; pfs_unlock(pn); pfs_destroy(iter); pfs_lock(pn); } pfs_unlock(pn); } /* revoke vnodes and fileno */ pfs_purge(pn); /* callback to free any private resources */ if (pn->pn_destroy != NULL) pn_destroy(pn); /* destroy the node */ pfs_fileno_free(pn); mtx_destroy(&pn->pn_mutex); free(pn, M_PFSNODES); return (0); } /* * Mount a pseudofs instance */ int pfs_mount(struct pfs_info *pi, struct mount *mp) { struct statfs *sbp; if (mp->mnt_flag & MNT_UPDATE) return (EOPNOTSUPP); MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; MNT_IUNLOCK(mp); mp->mnt_data = pi; vfs_getnewfsid(mp); sbp = &mp->mnt_stat; vfs_mountedfrom(mp, pi->pi_name); sbp->f_bsize = PAGE_SIZE; sbp->f_iosize = PAGE_SIZE; sbp->f_blocks = 1; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 1; sbp->f_ffree = 0; return (0); } /* * Compatibility shim for old mount(2) system call */ int pfs_cmount(struct mntarg *ma, void *data, uint64_t flags) { int error; error = kernel_mount(ma, flags); return (error); } /* * Unmount a pseudofs instance */ int pfs_unmount(struct mount *mp, int mntflags) { int error; error = vflush(mp, 0, (mntflags & MNT_FORCE) ? FORCECLOSE : 0, curthread); return (error); } /* * Return a root vnode */ int pfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct pfs_info *pi; pi = (struct pfs_info *)mp->mnt_data; return (pfs_vncache_alloc(mp, vpp, pi->pi_root, NO_PID)); } /* * Return filesystem stats */ int pfs_statfs(struct mount *mp, struct statfs *sbp) { /* no-op: always called with mp->mnt_stat */ return (0); } /* * Initialize a pseudofs instance */ int pfs_init(struct pfs_info *pi, struct vfsconf *vfc) { struct pfs_node *root; int error; pfs_fileno_init(pi); /* set up the root directory */ root = pfs_alloc_node(pi, "/", pfstype_root); pi->pi_root = root; pfs_fileno_alloc(root); pfs_fixup_dir(root); /* construct file hierarchy */ error = (pi->pi_init)(pi, vfc); if (error) { pfs_destroy(root); pi->pi_root = NULL; return (error); } if (bootverbose) printf("%s registered\n", pi->pi_name); return (0); } /* * Destroy a pseudofs instance */ int pfs_uninit(struct pfs_info *pi, struct vfsconf *vfc) { int error; pfs_destroy(pi->pi_root); pi->pi_root = NULL; pfs_fileno_uninit(pi); if (bootverbose) printf("%s unregistered\n", pi->pi_name); error = (pi->pi_uninit)(pi, vfc); return (error); } /* * Handle load / unload events */ static int pfs_modevent(module_t mod, int evt, void *arg) { switch (evt) { case MOD_LOAD: pfs_vncache_load(); break; case MOD_UNLOAD: case MOD_SHUTDOWN: pfs_vncache_unload(); break; default: return EOPNOTSUPP; break; } return 0; } /* * Module declaration */ static moduledata_t pseudofs_data = { "pseudofs", pfs_modevent, NULL }; DECLARE_MODULE(pseudofs, pseudofs_data, SI_SUB_EXEC, SI_ORDER_FIRST); MODULE_VERSION(pseudofs, 1); Index: head/sys/fs/pseudofs/pseudofs.h =================================================================== --- head/sys/fs/pseudofs/pseudofs.h (revision 326267) +++ head/sys/fs/pseudofs/pseudofs.h (revision 326268) @@ -1,311 +1,313 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PSEUDOFS_H_INCLUDED #define _PSEUDOFS_H_INCLUDED #include /* * Opaque structures */ struct mntarg; struct mount; struct nameidata; struct proc; struct sbuf; struct statfs; struct thread; struct uio; struct vfsconf; struct vnode; /* * Limits and constants */ #define PFS_NAMELEN 48 #define PFS_FSNAMELEN 16 /* equal to MFSNAMELEN */ #define PFS_DELEN (offsetof(struct dirent, d_name) + PFS_NAMELEN) typedef enum { pfstype_none = 0, pfstype_root, pfstype_dir, pfstype_this, pfstype_parent, pfstype_file, pfstype_symlink, pfstype_procdir } pfs_type_t; /* * Flags */ #define PFS_RD 0x0001 /* readable */ #define PFS_WR 0x0002 /* writeable */ #define PFS_RDWR (PFS_RD|PFS_WR) #define PFS_RAWRD 0x0004 /* raw reader */ #define PFS_RAWWR 0x0008 /* raw writer */ #define PFS_RAW (PFS_RAWRD|PFS_RAWWR) #define PFS_PROCDEP 0x0010 /* process-dependent */ /* * Data structures */ struct pfs_info; struct pfs_node; /* * Init / uninit callback */ #define PFS_INIT_ARGS \ struct pfs_info *pi, struct vfsconf *vfc #define PFS_INIT_ARGNAMES \ pi, vfc #define PFS_INIT_PROTO(name) \ int name(PFS_INIT_ARGS); typedef int (*pfs_init_t)(PFS_INIT_ARGS); /* * Filler callback * Called with proc held but unlocked */ #define PFS_FILL_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn, \ struct sbuf *sb, struct uio *uio #define PFS_FILL_ARGNAMES \ td, p, pn, sb, uio #define PFS_FILL_PROTO(name) \ int name(PFS_FILL_ARGS); typedef int (*pfs_fill_t)(PFS_FILL_ARGS); /* * Attribute callback * Called with proc locked */ struct vattr; #define PFS_ATTR_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn, \ struct vattr *vap #define PFS_ATTR_ARGNAMES \ td, p, pn, vap #define PFS_ATTR_PROTO(name) \ int name(PFS_ATTR_ARGS); typedef int (*pfs_attr_t)(PFS_ATTR_ARGS); /* * Visibility callback * Called with proc locked */ #define PFS_VIS_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn #define PFS_VIS_ARGNAMES \ td, p, pn #define PFS_VIS_PROTO(name) \ int name(PFS_VIS_ARGS); typedef int (*pfs_vis_t)(PFS_VIS_ARGS); /* * Ioctl callback * Called with proc locked */ #define PFS_IOCTL_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn, \ unsigned long cmd, void *data #define PFS_IOCTL_ARGNAMES \ td, p, pn, cmd, data #define PFS_IOCTL_PROTO(name) \ int name(PFS_IOCTL_ARGS); typedef int (*pfs_ioctl_t)(PFS_IOCTL_ARGS); /* * Getextattr callback * Called with proc locked */ #define PFS_GETEXTATTR_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn, \ int attrnamespace, const char *name, struct uio *uio, \ size_t *size, struct ucred *cred #define PFS_GETEXTATTR_ARGNAMES \ td, p, pn, attrnamespace, name, uio, size, cred #define PFS_GETEXTATTR_PROTO(name) \ int name(PFS_GETEXTATTR_ARGS); struct ucred; typedef int (*pfs_getextattr_t)(PFS_GETEXTATTR_ARGS); /* * Last-close callback * Called with proc locked */ #define PFS_CLOSE_ARGS \ struct thread *td, struct proc *p, struct pfs_node *pn #define PFS_CLOSE_ARGNAMES \ td, p, pn #define PFS_CLOSE_PROTO(name) \ int name(PFS_CLOSE_ARGS); typedef int (*pfs_close_t)(PFS_CLOSE_ARGS); /* * Destroy callback */ #define PFS_DESTROY_ARGS \ struct pfs_node *pn #define PFS_DESTROY_ARGNAMES \ pn #define PFS_DESTROY_PROTO(name) \ int name(PFS_DESTROY_ARGS); typedef int (*pfs_destroy_t)(PFS_DESTROY_ARGS); /* * pfs_info: describes a pseudofs instance * * The pi_mutex is only used to avoid using the global subr_unit lock * for unrhdr. The rest of struct pfs_info is only modified during * vfs_init() and vfs_uninit() of the consumer filesystem. */ struct pfs_info { char pi_name[PFS_FSNAMELEN]; pfs_init_t pi_init; pfs_init_t pi_uninit; /* members below this line are initialized at run time */ struct pfs_node *pi_root; struct mtx pi_mutex; struct unrhdr *pi_unrhdr; }; /* * pfs_node: describes a node (file or directory) within a pseudofs * * - Fields marked (o) are protected by the node's own mutex. * - Fields marked (p) are protected by the node's parent's mutex. * - Remaining fields are not protected by any lock and are assumed to be * immutable once the node has been created. * * To prevent deadlocks, if a node's mutex is to be held at the same time * as its parent's (e.g. when adding or removing nodes to a directory), * the parent's mutex must always be acquired first. Unfortunately, this * is not enforcable by WITNESS. */ struct pfs_node { char pn_name[PFS_NAMELEN]; pfs_type_t pn_type; int pn_flags; struct mtx pn_mutex; void *pn_data; /* (o) */ pfs_fill_t pn_fill; pfs_ioctl_t pn_ioctl; pfs_close_t pn_close; pfs_attr_t pn_attr; pfs_vis_t pn_vis; pfs_getextattr_t pn_getextattr; pfs_destroy_t pn_destroy; struct pfs_info *pn_info; u_int32_t pn_fileno; /* (o) */ struct pfs_node *pn_parent; /* (o) */ struct pfs_node *pn_nodes; /* (o) */ struct pfs_node *pn_next; /* (p) */ }; /* * VFS interface */ int pfs_mount (struct pfs_info *pi, struct mount *mp); int pfs_cmount (struct mntarg *ma, void *data, uint64_t flags); int pfs_unmount (struct mount *mp, int mntflags); int pfs_root (struct mount *mp, int flags, struct vnode **vpp); int pfs_statfs (struct mount *mp, struct statfs *sbp); int pfs_init (struct pfs_info *pi, struct vfsconf *vfc); int pfs_uninit (struct pfs_info *pi, struct vfsconf *vfc); /* * Directory structure construction and manipulation */ struct pfs_node *pfs_create_dir (struct pfs_node *parent, const char *name, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags); struct pfs_node *pfs_create_file(struct pfs_node *parent, const char *name, pfs_fill_t fill, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags); struct pfs_node *pfs_create_link(struct pfs_node *parent, const char *name, pfs_fill_t fill, pfs_attr_t attr, pfs_vis_t vis, pfs_destroy_t destroy, int flags); struct pfs_node *pfs_find_node (struct pfs_node *parent, const char *name); void pfs_purge (struct pfs_node *pn); int pfs_destroy (struct pfs_node *pn); /* * Now for some initialization magic... */ #define PSEUDOFS(name, version, jflag) \ \ static struct pfs_info name##_info = { \ #name, \ name##_init, \ name##_uninit, \ }; \ \ static int \ _##name##_mount(struct mount *mp) { \ if (jflag && !prison_allow(curthread->td_ucred, jflag)) \ return (EPERM); \ return (pfs_mount(&name##_info, mp)); \ } \ \ static int \ _##name##_init(struct vfsconf *vfc) { \ return (pfs_init(&name##_info, vfc)); \ } \ \ static int \ _##name##_uninit(struct vfsconf *vfc) { \ return (pfs_uninit(&name##_info, vfc)); \ } \ \ static struct vfsops name##_vfsops = { \ .vfs_cmount = pfs_cmount, \ .vfs_init = _##name##_init, \ .vfs_mount = _##name##_mount, \ .vfs_root = pfs_root, \ .vfs_statfs = pfs_statfs, \ .vfs_uninit = _##name##_uninit, \ .vfs_unmount = pfs_unmount, \ }; \ VFS_SET(name##_vfsops, name, VFCF_SYNTHETIC | (jflag ? VFCF_JAIL : 0)); \ MODULE_VERSION(name, version); \ MODULE_DEPEND(name, pseudofs, 1, 1, 1); #endif Index: head/sys/fs/pseudofs/pseudofs_fileno.c =================================================================== --- head/sys/fs/pseudofs/pseudofs_fileno.c (revision 326267) +++ head/sys/fs/pseudofs/pseudofs_fileno.c (revision 326268) @@ -1,157 +1,159 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pseudofs.h" #include #include #include #include #include #include #include #include #include #include #include #include /* * Initialize fileno bitmap */ void pfs_fileno_init(struct pfs_info *pi) { mtx_init(&pi->pi_mutex, "pfs_fileno", NULL, MTX_DEF); pi->pi_unrhdr = new_unrhdr(3, INT_MAX / NO_PID, &pi->pi_mutex); } /* * Tear down fileno bitmap */ void pfs_fileno_uninit(struct pfs_info *pi) { delete_unrhdr(pi->pi_unrhdr); pi->pi_unrhdr = NULL; mtx_destroy(&pi->pi_mutex); } /* * Allocate a file number */ void pfs_fileno_alloc(struct pfs_node *pn) { if (pn->pn_parent) PFS_TRACE(("%s/%s", pn->pn_parent->pn_name, pn->pn_name)); else PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); switch (pn->pn_type) { case pfstype_root: /* root must always be 2 */ pn->pn_fileno = 2; break; case pfstype_dir: case pfstype_file: case pfstype_symlink: case pfstype_procdir: pn->pn_fileno = alloc_unr(pn->pn_info->pi_unrhdr); break; case pfstype_this: KASSERT(pn->pn_parent != NULL, ("%s(): pfstype_this node has no parent", __func__)); pn->pn_fileno = pn->pn_parent->pn_fileno; break; case pfstype_parent: KASSERT(pn->pn_parent != NULL, ("%s(): pfstype_parent node has no parent", __func__)); if (pn->pn_parent->pn_type == pfstype_root) { pn->pn_fileno = pn->pn_parent->pn_fileno; break; } KASSERT(pn->pn_parent->pn_parent != NULL, ("%s(): pfstype_parent node has no grandparent", __func__)); pn->pn_fileno = pn->pn_parent->pn_parent->pn_fileno; break; case pfstype_none: KASSERT(0, ("%s(): pfstype_none node", __func__)); break; } #if 0 printf("%s(): %s: ", __func__, pn->pn_info->pi_name); if (pn->pn_parent) { if (pn->pn_parent->pn_parent) { printf("%s/", pn->pn_parent->pn_parent->pn_name); } printf("%s/", pn->pn_parent->pn_name); } printf("%s -> %d\n", pn->pn_name, pn->pn_fileno); #endif } /* * Release a file number */ void pfs_fileno_free(struct pfs_node *pn) { pfs_assert_not_owned(pn); switch (pn->pn_type) { case pfstype_root: /* not allocated from unrhdr */ return; case pfstype_dir: case pfstype_file: case pfstype_symlink: case pfstype_procdir: free_unr(pn->pn_info->pi_unrhdr, pn->pn_fileno); break; case pfstype_this: case pfstype_parent: /* ignore these, as they don't "own" their file number */ break; case pfstype_none: KASSERT(0, ("pfs_fileno_free() called for pfstype_none node")); break; } } Index: head/sys/fs/pseudofs/pseudofs_internal.h =================================================================== --- head/sys/fs/pseudofs/pseudofs_internal.h (revision 326267) +++ head/sys/fs/pseudofs/pseudofs_internal.h (revision 326268) @@ -1,211 +1,213 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _PSEUDOFS_INTERNAL_H_INCLUDED #define _PSEUDOFS_INTERNAL_H_INCLUDED /* * Sysctl subtree */ SYSCTL_DECL(_vfs_pfs); /* * Vnode data */ struct pfs_vdata { struct pfs_node *pvd_pn; pid_t pvd_pid; struct vnode *pvd_vnode; struct pfs_vdata*pvd_prev, *pvd_next; int pvd_dead:1; }; /* * Vnode cache */ void pfs_vncache_load (void); void pfs_vncache_unload (void); int pfs_vncache_alloc (struct mount *, struct vnode **, struct pfs_node *, pid_t pid); int pfs_vncache_free (struct vnode *); /* * File number bitmap */ void pfs_fileno_init (struct pfs_info *); void pfs_fileno_uninit (struct pfs_info *); void pfs_fileno_alloc (struct pfs_node *); void pfs_fileno_free (struct pfs_node *); /* * Debugging */ #ifdef PSEUDOFS_TRACE extern int pfs_trace; #define PFS_TRACE(foo) \ do { \ if (pfs_trace) { \ printf("%s(): line %d: ", __func__, __LINE__); \ printf foo ; \ printf("\n"); \ } \ } while (0) #define PFS_RETURN(err) \ do { \ if (pfs_trace) { \ printf("%s(): line %d: returning %d\n", \ __func__, __LINE__, err); \ } \ return (err); \ } while (0) #else #define PFS_TRACE(foo) \ do { /* nothing */ } while (0) #define PFS_RETURN(err) \ return (err) #endif /* * Inline helpers for locking */ static inline void pfs_lock(struct pfs_node *pn) { mtx_lock(&pn->pn_mutex); } static inline void pfs_unlock(struct pfs_node *pn) { mtx_unlock(&pn->pn_mutex); } static inline void pfs_assert_owned(struct pfs_node *pn) { mtx_assert(&pn->pn_mutex, MA_OWNED); } static inline void pfs_assert_not_owned(struct pfs_node *pn) { mtx_assert(&pn->pn_mutex, MA_NOTOWNED); } static inline int pn_fill(PFS_FILL_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_fill != NULL, ("%s(): no callback", __func__)); if (p != NULL) { PROC_LOCK_ASSERT(p, MA_NOTOWNED); PROC_ASSERT_HELD(p); } pfs_assert_not_owned(pn); return ((pn->pn_fill)(PFS_FILL_ARGNAMES)); } static inline int pn_attr(PFS_ATTR_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_attr != NULL, ("%s(): no callback", __func__)); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); pfs_assert_not_owned(pn); return ((pn->pn_attr)(PFS_ATTR_ARGNAMES)); } static inline int pn_vis(PFS_VIS_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_vis != NULL, ("%s(): no callback", __func__)); KASSERT(p != NULL, ("%s(): no process", __func__)); PROC_LOCK_ASSERT(p, MA_OWNED); pfs_assert_not_owned(pn); return ((pn->pn_vis)(PFS_VIS_ARGNAMES)); } static inline int pn_ioctl(PFS_IOCTL_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_ioctl != NULL, ("%s(): no callback", __func__)); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); pfs_assert_not_owned(pn); return ((pn->pn_ioctl)(PFS_IOCTL_ARGNAMES)); } static inline int pn_getextattr(PFS_GETEXTATTR_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_getextattr != NULL, ("%s(): no callback", __func__)); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); pfs_assert_not_owned(pn); return ((pn->pn_getextattr)(PFS_GETEXTATTR_ARGNAMES)); } static inline int pn_close(PFS_CLOSE_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_close != NULL, ("%s(): no callback", __func__)); if (p != NULL) PROC_LOCK_ASSERT(p, MA_OWNED); pfs_assert_not_owned(pn); return ((pn->pn_close)(PFS_CLOSE_ARGNAMES)); } static inline int pn_destroy(PFS_DESTROY_ARGS) { PFS_TRACE(("%s", pn->pn_name)); KASSERT(pn->pn_destroy != NULL, ("%s(): no callback", __func__)); pfs_assert_not_owned(pn); return ((pn->pn_destroy)(PFS_DESTROY_ARGNAMES)); } #endif Index: head/sys/fs/pseudofs/pseudofs_vncache.c =================================================================== --- head/sys/fs/pseudofs/pseudofs_vncache.c (revision 326267) +++ head/sys/fs/pseudofs/pseudofs_vncache.c (revision 326268) @@ -1,331 +1,333 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pseudofs.h" #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_PFSVNCACHE, "pfs_vncache", "pseudofs vnode cache"); static struct mtx pfs_vncache_mutex; static struct pfs_vdata *pfs_vncache; static eventhandler_tag pfs_exit_tag; static void pfs_exit(void *arg, struct proc *p); static void pfs_purge_locked(struct pfs_node *pn, bool force); static SYSCTL_NODE(_vfs_pfs, OID_AUTO, vncache, CTLFLAG_RW, 0, "pseudofs vnode cache"); static int pfs_vncache_entries; SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, entries, CTLFLAG_RD, &pfs_vncache_entries, 0, "number of entries in the vnode cache"); static int pfs_vncache_maxentries; SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, maxentries, CTLFLAG_RD, &pfs_vncache_maxentries, 0, "highest number of entries in the vnode cache"); static int pfs_vncache_hits; SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, hits, CTLFLAG_RD, &pfs_vncache_hits, 0, "number of cache hits since initialization"); static int pfs_vncache_misses; SYSCTL_INT(_vfs_pfs_vncache, OID_AUTO, misses, CTLFLAG_RD, &pfs_vncache_misses, 0, "number of cache misses since initialization"); extern struct vop_vector pfs_vnodeops; /* XXX -> .h file */ /* * Initialize vnode cache */ void pfs_vncache_load(void) { mtx_init(&pfs_vncache_mutex, "pfs_vncache", NULL, MTX_DEF); pfs_exit_tag = EVENTHANDLER_REGISTER(process_exit, pfs_exit, NULL, EVENTHANDLER_PRI_ANY); } /* * Tear down vnode cache */ void pfs_vncache_unload(void) { EVENTHANDLER_DEREGISTER(process_exit, pfs_exit_tag); mtx_lock(&pfs_vncache_mutex); pfs_purge_locked(NULL, true); mtx_unlock(&pfs_vncache_mutex); KASSERT(pfs_vncache_entries == 0, ("%d vncache entries remaining", pfs_vncache_entries)); mtx_destroy(&pfs_vncache_mutex); } /* * Allocate a vnode */ int pfs_vncache_alloc(struct mount *mp, struct vnode **vpp, struct pfs_node *pn, pid_t pid) { struct pfs_vdata *pvd, *pvd2; struct vnode *vp; int error; /* * See if the vnode is in the cache. * XXX linear search is not very efficient. */ retry: mtx_lock(&pfs_vncache_mutex); for (pvd = pfs_vncache; pvd; pvd = pvd->pvd_next) { if (pvd->pvd_pn == pn && pvd->pvd_pid == pid && pvd->pvd_vnode->v_mount == mp) { vp = pvd->pvd_vnode; VI_LOCK(vp); mtx_unlock(&pfs_vncache_mutex); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { ++pfs_vncache_hits; *vpp = vp; /* * Some callers cache_enter(vp) later, so * we have to make sure it's not in the * VFS cache so it doesn't get entered * twice. A better solution would be to * make pfs_vncache_alloc() responsible * for entering the vnode in the VFS * cache. */ cache_purge(vp); return (0); } goto retry; } } mtx_unlock(&pfs_vncache_mutex); /* nope, get a new one */ pvd = malloc(sizeof *pvd, M_PFSVNCACHE, M_WAITOK); pvd->pvd_next = pvd->pvd_prev = NULL; error = getnewvnode("pseudofs", mp, &pfs_vnodeops, vpp); if (error) { free(pvd, M_PFSVNCACHE); return (error); } pvd->pvd_pn = pn; pvd->pvd_pid = pid; (*vpp)->v_data = pvd; switch (pn->pn_type) { case pfstype_root: (*vpp)->v_vflag = VV_ROOT; #if 0 printf("root vnode allocated\n"); #endif /* fall through */ case pfstype_dir: case pfstype_this: case pfstype_parent: case pfstype_procdir: (*vpp)->v_type = VDIR; break; case pfstype_file: (*vpp)->v_type = VREG; break; case pfstype_symlink: (*vpp)->v_type = VLNK; break; case pfstype_none: KASSERT(0, ("pfs_vncache_alloc called for null node\n")); default: panic("%s has unexpected type: %d", pn->pn_name, pn->pn_type); } /* * Propagate flag through to vnode so users know it can change * if the process changes (i.e. execve) */ if ((pn->pn_flags & PFS_PROCDEP) != 0) (*vpp)->v_vflag |= VV_PROCDEP; pvd->pvd_vnode = *vpp; vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY); VN_LOCK_AREC(*vpp); error = insmntque(*vpp, mp); if (error != 0) { free(pvd, M_PFSVNCACHE); *vpp = NULLVP; return (error); } retry2: mtx_lock(&pfs_vncache_mutex); /* * Other thread may race with us, creating the entry we are * going to insert into the cache. Recheck after * pfs_vncache_mutex is reacquired. */ for (pvd2 = pfs_vncache; pvd2; pvd2 = pvd2->pvd_next) { if (pvd2->pvd_pn == pn && pvd2->pvd_pid == pid && pvd2->pvd_vnode->v_mount == mp) { vp = pvd2->pvd_vnode; VI_LOCK(vp); mtx_unlock(&pfs_vncache_mutex); if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, curthread) == 0) { ++pfs_vncache_hits; vgone(*vpp); vput(*vpp); *vpp = vp; cache_purge(vp); return (0); } goto retry2; } } ++pfs_vncache_misses; if (++pfs_vncache_entries > pfs_vncache_maxentries) pfs_vncache_maxentries = pfs_vncache_entries; pvd->pvd_prev = NULL; pvd->pvd_next = pfs_vncache; if (pvd->pvd_next) pvd->pvd_next->pvd_prev = pvd; pfs_vncache = pvd; mtx_unlock(&pfs_vncache_mutex); return (0); } /* * Free a vnode */ int pfs_vncache_free(struct vnode *vp) { struct pfs_vdata *pvd; mtx_lock(&pfs_vncache_mutex); pvd = (struct pfs_vdata *)vp->v_data; KASSERT(pvd != NULL, ("pfs_vncache_free(): no vnode data\n")); if (pvd->pvd_next) pvd->pvd_next->pvd_prev = pvd->pvd_prev; if (pvd->pvd_prev) { pvd->pvd_prev->pvd_next = pvd->pvd_next; --pfs_vncache_entries; } else if (pfs_vncache == pvd) { pfs_vncache = pvd->pvd_next; --pfs_vncache_entries; } mtx_unlock(&pfs_vncache_mutex); free(pvd, M_PFSVNCACHE); vp->v_data = NULL; return (0); } /* * Purge the cache of dead entries * * This is extremely inefficient due to the fact that vgone() not only * indirectly modifies the vnode cache, but may also sleep. We can * neither hold pfs_vncache_mutex across a vgone() call, nor make any * assumptions about the state of the cache after vgone() returns. In * consequence, we must start over after every vgone() call, and keep * trying until we manage to traverse the entire cache. * * The only way to improve this situation is to change the data structure * used to implement the cache. */ static void pfs_purge_locked(struct pfs_node *pn, bool force) { struct pfs_vdata *pvd; struct vnode *vnp; mtx_assert(&pfs_vncache_mutex, MA_OWNED); pvd = pfs_vncache; while (pvd != NULL) { if (force || pvd->pvd_dead || (pn != NULL && pvd->pvd_pn == pn)) { vnp = pvd->pvd_vnode; vhold(vnp); mtx_unlock(&pfs_vncache_mutex); VOP_LOCK(vnp, LK_EXCLUSIVE); vgone(vnp); VOP_UNLOCK(vnp, 0); mtx_lock(&pfs_vncache_mutex); vdrop(vnp); pvd = pfs_vncache; } else { pvd = pvd->pvd_next; } } } void pfs_purge(struct pfs_node *pn) { mtx_lock(&pfs_vncache_mutex); pfs_purge_locked(pn, false); mtx_unlock(&pfs_vncache_mutex); } /* * Free all vnodes associated with a defunct process */ static void pfs_exit(void *arg, struct proc *p) { struct pfs_vdata *pvd; int dead; if (pfs_vncache == NULL) return; mtx_lock(&pfs_vncache_mutex); for (pvd = pfs_vncache, dead = 0; pvd != NULL; pvd = pvd->pvd_next) if (pvd->pvd_pid == p->p_pid) dead = pvd->pvd_dead = 1; if (dead) pfs_purge_locked(NULL, false); mtx_unlock(&pfs_vncache_mutex); } Index: head/sys/fs/pseudofs/pseudofs_vnops.c =================================================================== --- head/sys/fs/pseudofs/pseudofs_vnops.c (revision 326267) +++ head/sys/fs/pseudofs/pseudofs_vnops.c (revision 326268) @@ -1,1059 +1,1061 @@ /*- + * SPDX-License-Identifier: BSD-3-Clause + * * Copyright (c) 2001 Dag-Erling Coïdan Smørgrav * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer * in this position and unchanged. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the author may not be used to endorse or promote products * derived from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_pseudofs.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define KASSERT_PN_IS_DIR(pn) \ KASSERT((pn)->pn_type == pfstype_root || \ (pn)->pn_type == pfstype_dir || \ (pn)->pn_type == pfstype_procdir, \ ("%s(): VDIR vnode refers to non-directory pfs_node", __func__)) #define KASSERT_PN_IS_FILE(pn) \ KASSERT((pn)->pn_type == pfstype_file, \ ("%s(): VREG vnode refers to non-file pfs_node", __func__)) #define KASSERT_PN_IS_LINK(pn) \ KASSERT((pn)->pn_type == pfstype_symlink, \ ("%s(): VLNK vnode refers to non-link pfs_node", __func__)) /* * Returns the fileno, adjusted for target pid */ static uint32_t pn_fileno(struct pfs_node *pn, pid_t pid) { KASSERT(pn->pn_fileno > 0, ("%s(): no fileno allocated", __func__)); if (pid != NO_PID) return (pn->pn_fileno * NO_PID + pid); return (pn->pn_fileno); } /* * Returns non-zero if given file is visible to given thread. */ static int pfs_visible_proc(struct thread *td, struct pfs_node *pn, struct proc *proc) { int visible; if (proc == NULL) return (0); PROC_LOCK_ASSERT(proc, MA_OWNED); visible = ((proc->p_flag & P_WEXIT) == 0); if (visible) visible = (p_cansee(td, proc) == 0); if (visible && pn->pn_vis != NULL) visible = pn_vis(td, proc, pn); if (!visible) return (0); return (1); } static int pfs_visible(struct thread *td, struct pfs_node *pn, pid_t pid, bool allproc_locked, struct proc **p) { struct proc *proc; PFS_TRACE(("%s (pid: %d, req: %d)", pn->pn_name, pid, td->td_proc->p_pid)); if (p) *p = NULL; if (pid == NO_PID) PFS_RETURN (1); proc = allproc_locked ? pfind_locked(pid) : pfind(pid); if (proc == NULL) PFS_RETURN (0); if (pfs_visible_proc(td, pn, proc)) { if (p) *p = proc; else PROC_UNLOCK(proc); PFS_RETURN (1); } PROC_UNLOCK(proc); PFS_RETURN (0); } /* * Verify permissions */ static int pfs_access(struct vop_access_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct vattr vattr; int error; PFS_TRACE(("%s", pvd->pvd_pn->pn_name)); (void)pvd; error = VOP_GETATTR(vn, &vattr, va->a_cred); if (error) PFS_RETURN (error); error = vaccess(vn->v_type, vattr.va_mode, vattr.va_uid, vattr.va_gid, va->a_accmode, va->a_cred, NULL); PFS_RETURN (error); } /* * Close a file or directory */ static int pfs_close(struct vop_close_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct proc *proc; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); /* * Do nothing unless this is the last close and the node has a * last-close handler. */ if (vrefcnt(vn) > 1 || pn->pn_close == NULL) PFS_RETURN (0); if (pvd->pvd_pid != NO_PID) { proc = pfind(pvd->pvd_pid); } else { proc = NULL; } error = pn_close(va->a_td, proc, pn); if (proc != NULL) PROC_UNLOCK(proc); PFS_RETURN (error); } /* * Get file attributes */ static int pfs_getattr(struct vop_getattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct vattr *vap = va->a_vap; struct proc *proc; int error = 0; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (!pfs_visible(curthread, pn, pvd->pvd_pid, false, &proc)) PFS_RETURN (ENOENT); vap->va_type = vn->v_type; vap->va_fileid = pn_fileno(pn, pvd->pvd_pid); vap->va_flags = 0; vap->va_blocksize = PAGE_SIZE; vap->va_bytes = vap->va_size = 0; vap->va_filerev = 0; vap->va_fsid = vn->v_mount->mnt_stat.f_fsid.val[0]; vap->va_nlink = 1; nanotime(&vap->va_ctime); vap->va_atime = vap->va_mtime = vap->va_ctime; switch (pn->pn_type) { case pfstype_procdir: case pfstype_root: case pfstype_dir: #if 0 pfs_lock(pn); /* compute link count */ pfs_unlock(pn); #endif vap->va_mode = 0555; break; case pfstype_file: case pfstype_symlink: vap->va_mode = 0444; break; default: printf("shouldn't be here!\n"); vap->va_mode = 0; break; } if (proc != NULL) { vap->va_uid = proc->p_ucred->cr_ruid; vap->va_gid = proc->p_ucred->cr_rgid; } else { vap->va_uid = 0; vap->va_gid = 0; } if (pn->pn_attr != NULL) error = pn_attr(curthread, proc, pn, vap); if(proc != NULL) PROC_UNLOCK(proc); PFS_RETURN (error); } /* * Perform an ioctl */ static int pfs_ioctl(struct vop_ioctl_args *va) { struct vnode *vn; struct pfs_vdata *pvd; struct pfs_node *pn; struct proc *proc; int error; vn = va->a_vp; vn_lock(vn, LK_SHARED | LK_RETRY); if (vn->v_iflag & VI_DOOMED) { VOP_UNLOCK(vn, 0); return (EBADF); } pvd = vn->v_data; pn = pvd->pvd_pn; PFS_TRACE(("%s: %lx", pn->pn_name, va->a_command)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) { VOP_UNLOCK(vn, 0); PFS_RETURN (EINVAL); } KASSERT_PN_IS_FILE(pn); if (pn->pn_ioctl == NULL) { VOP_UNLOCK(vn, 0); PFS_RETURN (ENOTTY); } /* * This is necessary because process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, false, &proc)) { VOP_UNLOCK(vn, 0); PFS_RETURN (EIO); } error = pn_ioctl(curthread, proc, pn, va->a_command, va->a_data); if (proc != NULL) PROC_UNLOCK(proc); VOP_UNLOCK(vn, 0); PFS_RETURN (error); } /* * Perform getextattr */ static int pfs_getextattr(struct vop_getextattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct proc *proc; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, false, &proc)) PFS_RETURN (EIO); if (pn->pn_getextattr == NULL) error = EOPNOTSUPP; else error = pn_getextattr(curthread, proc, pn, va->a_attrnamespace, va->a_name, va->a_uio, va->a_size, va->a_cred); if (proc != NULL) PROC_UNLOCK(proc); PFS_RETURN (error); } /* * Convert a vnode to its component name */ static int pfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp = ap->a_vp; struct vnode **dvp = ap->a_vpp; struct pfs_vdata *pvd = vp->v_data; struct pfs_node *pd = pvd->pvd_pn; struct pfs_node *pn; struct mount *mp; char *buf = ap->a_buf; int *buflen = ap->a_buflen; char pidbuf[PFS_NAMELEN]; pid_t pid = pvd->pvd_pid; int len, i, error, locked; i = *buflen; error = 0; pfs_lock(pd); if (vp->v_type == VDIR && pd->pn_type == pfstype_root) { *dvp = vp; vhold(*dvp); pfs_unlock(pd); PFS_RETURN (0); } else if (vp->v_type == VDIR && pd->pn_type == pfstype_procdir) { len = snprintf(pidbuf, sizeof(pidbuf), "%d", pid); i -= len; if (i < 0) { error = ENOMEM; goto failed; } bcopy(pidbuf, buf + i, len); } else { len = strlen(pd->pn_name); i -= len; if (i < 0) { error = ENOMEM; goto failed; } bcopy(pd->pn_name, buf + i, len); } pn = pd->pn_parent; pfs_unlock(pd); mp = vp->v_mount; error = vfs_busy(mp, 0); if (error) return (error); /* * vp is held by caller. */ locked = VOP_ISLOCKED(vp); VOP_UNLOCK(vp, 0); error = pfs_vncache_alloc(mp, dvp, pn, pid); if (error) { vn_lock(vp, locked | LK_RETRY); vfs_unbusy(mp); PFS_RETURN(error); } *buflen = i; VOP_UNLOCK(*dvp, 0); vn_lock(vp, locked | LK_RETRY); vfs_unbusy(mp); PFS_RETURN (0); failed: pfs_unlock(pd); PFS_RETURN(error); } /* * Look up a file or directory */ static int pfs_lookup(struct vop_cachedlookup_args *va) { struct vnode *vn = va->a_dvp; struct vnode **vpp = va->a_vpp; struct componentname *cnp = va->a_cnp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pd = pvd->pvd_pn; struct pfs_node *pn, *pdn = NULL; struct mount *mp; pid_t pid = pvd->pvd_pid; char *pname; int error, i, namelen, visible; PFS_TRACE(("%.*s", (int)cnp->cn_namelen, cnp->cn_nameptr)); pfs_assert_not_owned(pd); if (vn->v_type != VDIR) PFS_RETURN (ENOTDIR); KASSERT_PN_IS_DIR(pd); error = VOP_ACCESS(vn, VEXEC, cnp->cn_cred, cnp->cn_thread); if (error) PFS_RETURN (error); /* * Don't support DELETE or RENAME. CREATE is supported so * that O_CREAT will work, but the lookup will still fail if * the file does not exist. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) PFS_RETURN (EOPNOTSUPP); /* shortcut: check if the name is too long */ if (cnp->cn_namelen >= PFS_NAMELEN) PFS_RETURN (ENOENT); /* check that parent directory is visible... */ if (!pfs_visible(curthread, pd, pvd->pvd_pid, false, NULL)) PFS_RETURN (ENOENT); /* self */ namelen = cnp->cn_namelen; pname = cnp->cn_nameptr; if (namelen == 1 && pname[0] == '.') { pn = pd; *vpp = vn; VREF(vn); PFS_RETURN (0); } mp = vn->v_mount; /* parent */ if (cnp->cn_flags & ISDOTDOT) { if (pd->pn_type == pfstype_root) PFS_RETURN (EIO); error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(vn, 0); error = vfs_busy(mp, 0); vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); vfs_rel(mp); if (error != 0) PFS_RETURN(ENOENT); if (vn->v_iflag & VI_DOOMED) { vfs_unbusy(mp); PFS_RETURN(ENOENT); } } VOP_UNLOCK(vn, 0); KASSERT(pd->pn_parent != NULL, ("%s(): non-root directory has no parent", __func__)); /* * This one is tricky. Descendents of procdir nodes * inherit their parent's process affinity, but * there's no easy reverse mapping. For simplicity, * we assume that if this node is a procdir, its * parent isn't (which is correct as long as * descendents of procdir nodes are never procdir * nodes themselves) */ if (pd->pn_type == pfstype_procdir) pid = NO_PID; pfs_lock(pd); pn = pd->pn_parent; pfs_unlock(pd); goto got_pnode; } pfs_lock(pd); /* named node */ for (pn = pd->pn_nodes; pn != NULL; pn = pn->pn_next) if (pn->pn_type == pfstype_procdir) pdn = pn; else if (pn->pn_name[namelen] == '\0' && bcmp(pname, pn->pn_name, namelen) == 0) { pfs_unlock(pd); goto got_pnode; } /* process dependent node */ if ((pn = pdn) != NULL) { pid = 0; for (pid = 0, i = 0; i < namelen && isdigit(pname[i]); ++i) if ((pid = pid * 10 + pname[i] - '0') > PID_MAX) break; if (i == cnp->cn_namelen) { pfs_unlock(pd); goto got_pnode; } } pfs_unlock(pd); PFS_RETURN (ENOENT); got_pnode: pfs_assert_not_owned(pd); pfs_assert_not_owned(pn); visible = pfs_visible(curthread, pn, pid, false, NULL); if (!visible) { error = ENOENT; goto failed; } error = pfs_vncache_alloc(mp, vpp, pn, pid); if (error) goto failed; if (cnp->cn_flags & ISDOTDOT) { vfs_unbusy(mp); vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); if (vn->v_iflag & VI_DOOMED) { vput(*vpp); *vpp = NULL; PFS_RETURN(ENOENT); } } if (cnp->cn_flags & MAKEENTRY && !(vn->v_iflag & VI_DOOMED)) cache_enter(vn, *vpp, cnp); PFS_RETURN (0); failed: if (cnp->cn_flags & ISDOTDOT) { vfs_unbusy(mp); vn_lock(vn, LK_EXCLUSIVE | LK_RETRY); *vpp = NULL; } PFS_RETURN(error); } /* * Open a file or directory. */ static int pfs_open(struct vop_open_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; int mode = va->a_mode; PFS_TRACE(("%s (mode 0x%x)", pn->pn_name, mode)); pfs_assert_not_owned(pn); /* check if the requested mode is permitted */ if (((mode & FREAD) && !(mode & PFS_RD)) || ((mode & FWRITE) && !(mode & PFS_WR))) PFS_RETURN (EPERM); /* we don't support locking */ if ((mode & O_SHLOCK) || (mode & O_EXLOCK)) PFS_RETURN (EOPNOTSUPP); PFS_RETURN (0); } /* * Read from a file */ static int pfs_read(struct vop_read_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf *sb = NULL; int error, locked; off_t buflen; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); KASSERT_PN_IS_FILE(pn); if (!(pn->pn_flags & PFS_RD)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, false, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } vhold(vn); locked = VOP_ISLOCKED(vn); VOP_UNLOCK(vn, 0); if (pn->pn_flags & PFS_RAWRD) { PFS_TRACE(("%zd resid", uio->uio_resid)); error = pn_fill(curthread, proc, pn, NULL, uio); PFS_TRACE(("%zd resid", uio->uio_resid)); goto ret; } if (uio->uio_resid < 0 || uio->uio_offset < 0 || uio->uio_resid > OFF_MAX - uio->uio_offset) { error = EINVAL; goto ret; } buflen = uio->uio_offset + uio->uio_resid; if (buflen > MAXPHYS) buflen = MAXPHYS; sb = sbuf_new(sb, NULL, buflen + 1, 0); if (sb == NULL) { error = EIO; goto ret; } error = pn_fill(curthread, proc, pn, sb, uio); if (error) { sbuf_delete(sb); goto ret; } /* * XXX: If the buffer overflowed, sbuf_len() will not return * the data length. Then just use the full length because an * overflowed sbuf must be full. */ if (sbuf_finish(sb) == 0) buflen = sbuf_len(sb); error = uiomove_frombuf(sbuf_data(sb), buflen, uio); sbuf_delete(sb); ret: vn_lock(vn, locked | LK_RETRY); vdrop(vn); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } /* * Iterate through directory entries */ static int pfs_iterate(struct thread *td, struct proc *proc, struct pfs_node *pd, struct pfs_node **pn, struct proc **p) { int visible; sx_assert(&allproc_lock, SX_SLOCKED); pfs_assert_owned(pd); again: if (*pn == NULL) { /* first node */ *pn = pd->pn_nodes; } else if ((*pn)->pn_type != pfstype_procdir) { /* next node */ *pn = (*pn)->pn_next; } if (*pn != NULL && (*pn)->pn_type == pfstype_procdir) { /* next process */ if (*p == NULL) *p = LIST_FIRST(&allproc); else *p = LIST_NEXT(*p, p_list); /* out of processes: next node */ if (*p == NULL) *pn = (*pn)->pn_next; else PROC_LOCK(*p); } if ((*pn) == NULL) return (-1); if (*p != NULL) { visible = pfs_visible_proc(td, *pn, *p); PROC_UNLOCK(*p); } else if (proc != NULL) { visible = pfs_visible_proc(td, *pn, proc); } else { visible = 1; } if (!visible) goto again; return (0); } /* Directory entry list */ struct pfsentry { STAILQ_ENTRY(pfsentry) link; struct dirent entry; }; STAILQ_HEAD(pfsdirentlist, pfsentry); /* * Return directory entries. */ static int pfs_readdir(struct vop_readdir_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pd = pvd->pvd_pn; pid_t pid = pvd->pvd_pid; struct proc *p, *proc; struct pfs_node *pn; struct uio *uio; struct pfsentry *pfsent, *pfsent2; struct pfsdirentlist lst; off_t offset; int error, i, resid; STAILQ_INIT(&lst); error = 0; KASSERT(pd->pn_info == vn->v_mount->mnt_data, ("%s(): pn_info does not match mountpoint", __func__)); PFS_TRACE(("%s pid %lu", pd->pn_name, (unsigned long)pid)); pfs_assert_not_owned(pd); if (vn->v_type != VDIR) PFS_RETURN (ENOTDIR); KASSERT_PN_IS_DIR(pd); uio = va->a_uio; /* only allow reading entire entries */ offset = uio->uio_offset; resid = uio->uio_resid; if (offset < 0 || offset % PFS_DELEN != 0 || (resid && resid < PFS_DELEN)) PFS_RETURN (EINVAL); if (resid == 0) PFS_RETURN (0); sx_slock(&allproc_lock); pfs_lock(pd); /* check if the directory is visible to the caller */ if (!pfs_visible(curthread, pd, pid, true, &proc)) { sx_sunlock(&allproc_lock); pfs_unlock(pd); PFS_RETURN (ENOENT); } KASSERT(pid == NO_PID || proc != NULL, ("%s(): no process for pid %lu", __func__, (unsigned long)pid)); /* skip unwanted entries */ for (pn = NULL, p = NULL; offset > 0; offset -= PFS_DELEN) { if (pfs_iterate(curthread, proc, pd, &pn, &p) == -1) { /* nothing left... */ if (proc != NULL) PROC_UNLOCK(proc); pfs_unlock(pd); sx_sunlock(&allproc_lock); PFS_RETURN (0); } } /* fill in entries */ while (pfs_iterate(curthread, proc, pd, &pn, &p) != -1 && resid >= PFS_DELEN) { if ((pfsent = malloc(sizeof(struct pfsentry), M_IOV, M_NOWAIT | M_ZERO)) == NULL) { error = ENOMEM; break; } pfsent->entry.d_reclen = PFS_DELEN; pfsent->entry.d_fileno = pn_fileno(pn, pid); /* PFS_DELEN was picked to fit PFS_NAMLEN */ for (i = 0; i < PFS_NAMELEN - 1 && pn->pn_name[i] != '\0'; ++i) pfsent->entry.d_name[i] = pn->pn_name[i]; pfsent->entry.d_name[i] = 0; pfsent->entry.d_namlen = i; switch (pn->pn_type) { case pfstype_procdir: KASSERT(p != NULL, ("reached procdir node with p == NULL")); pfsent->entry.d_namlen = snprintf(pfsent->entry.d_name, PFS_NAMELEN, "%d", p->p_pid); /* fall through */ case pfstype_root: case pfstype_dir: case pfstype_this: case pfstype_parent: pfsent->entry.d_type = DT_DIR; break; case pfstype_file: pfsent->entry.d_type = DT_REG; break; case pfstype_symlink: pfsent->entry.d_type = DT_LNK; break; default: panic("%s has unexpected node type: %d", pn->pn_name, pn->pn_type); } PFS_TRACE(("%s", pfsent->entry.d_name)); STAILQ_INSERT_TAIL(&lst, pfsent, link); offset += PFS_DELEN; resid -= PFS_DELEN; } if (proc != NULL) PROC_UNLOCK(proc); pfs_unlock(pd); sx_sunlock(&allproc_lock); i = 0; STAILQ_FOREACH_SAFE(pfsent, &lst, link, pfsent2) { if (error == 0) error = uiomove(&pfsent->entry, PFS_DELEN, uio); free(pfsent, M_IOV); i++; } PFS_TRACE(("%ju bytes", (uintmax_t)(i * PFS_DELEN))); PFS_RETURN (error); } /* * Read a symbolic link */ static int pfs_readlink(struct vop_readlink_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc = NULL; char buf[PATH_MAX]; struct sbuf sb; int error, locked; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VLNK) PFS_RETURN (EINVAL); KASSERT_PN_IS_LINK(pn); if (pn->pn_fill == NULL) PFS_RETURN (EIO); if (pvd->pvd_pid != NO_PID) { if ((proc = pfind(pvd->pvd_pid)) == NULL) PFS_RETURN (EIO); if (proc->p_flag & P_WEXIT) { PROC_UNLOCK(proc); PFS_RETURN (EIO); } _PHOLD(proc); PROC_UNLOCK(proc); } vhold(vn); locked = VOP_ISLOCKED(vn); VOP_UNLOCK(vn, 0); /* sbuf_new() can't fail with a static buffer */ sbuf_new(&sb, buf, sizeof buf, 0); error = pn_fill(curthread, proc, pn, &sb, NULL); if (proc != NULL) PRELE(proc); vn_lock(vn, locked | LK_RETRY); vdrop(vn); if (error) { sbuf_delete(&sb); PFS_RETURN (error); } if (sbuf_finish(&sb) != 0) { sbuf_delete(&sb); PFS_RETURN (ENAMETOOLONG); } error = uiomove_frombuf(sbuf_data(&sb), sbuf_len(&sb), uio); sbuf_delete(&sb); PFS_RETURN (error); } /* * Reclaim a vnode */ static int pfs_reclaim(struct vop_reclaim_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); return (pfs_vncache_free(va->a_vp)); } /* * Set attributes */ static int pfs_setattr(struct vop_setattr_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); PFS_RETURN (EOPNOTSUPP); } /* * Write to a file */ static int pfs_write(struct vop_write_args *va) { struct vnode *vn = va->a_vp; struct pfs_vdata *pvd = vn->v_data; struct pfs_node *pn = pvd->pvd_pn; struct uio *uio = va->a_uio; struct proc *proc; struct sbuf sb; int error; PFS_TRACE(("%s", pn->pn_name)); pfs_assert_not_owned(pn); if (vn->v_type != VREG) PFS_RETURN (EINVAL); KASSERT_PN_IS_FILE(pn); if (!(pn->pn_flags & PFS_WR)) PFS_RETURN (EBADF); if (pn->pn_fill == NULL) PFS_RETURN (EIO); /* * This is necessary because either process' privileges may * have changed since the open() call. */ if (!pfs_visible(curthread, pn, pvd->pvd_pid, false, &proc)) PFS_RETURN (EIO); if (proc != NULL) { _PHOLD(proc); PROC_UNLOCK(proc); } if (pn->pn_flags & PFS_RAWWR) { error = pn_fill(curthread, proc, pn, NULL, uio); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } sbuf_uionew(&sb, uio, &error); if (error) { if (proc != NULL) PRELE(proc); PFS_RETURN (error); } error = pn_fill(curthread, proc, pn, &sb, uio); sbuf_delete(&sb); if (proc != NULL) PRELE(proc); PFS_RETURN (error); } /* * Vnode operations */ struct vop_vector pfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = pfs_access, .vop_cachedlookup = pfs_lookup, .vop_close = pfs_close, .vop_create = VOP_EOPNOTSUPP, .vop_getattr = pfs_getattr, .vop_getextattr = pfs_getextattr, .vop_ioctl = pfs_ioctl, .vop_link = VOP_EOPNOTSUPP, .vop_lookup = vfs_cache_lookup, .vop_mkdir = VOP_EOPNOTSUPP, .vop_mknod = VOP_EOPNOTSUPP, .vop_open = pfs_open, .vop_read = pfs_read, .vop_readdir = pfs_readdir, .vop_readlink = pfs_readlink, .vop_reclaim = pfs_reclaim, .vop_remove = VOP_EOPNOTSUPP, .vop_rename = VOP_EOPNOTSUPP, .vop_rmdir = VOP_EOPNOTSUPP, .vop_setattr = pfs_setattr, .vop_symlink = VOP_EOPNOTSUPP, .vop_vptocnp = pfs_vptocnp, .vop_write = pfs_write, /* XXX I've probably forgotten a few that need VOP_EOPNOTSUPP */ }; Index: head/sys/fs/smbfs/smbfs.h =================================================================== --- head/sys/fs/smbfs/smbfs.h (revision 326267) +++ head/sys/fs/smbfs/smbfs.h (revision 326268) @@ -1,98 +1,100 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SMBFS_SMBFS_H_ #define _SMBFS_SMBFS_H_ #define SMBFS_VERMAJ 1 #define SMBFS_VERMIN 1012 #define SMBFS_VERSION (SMBFS_VERMAJ*100000 + SMBFS_VERMIN) #define SMBFS_VFSNAME "smbfs" /* Values for flags */ #define SMBFS_MOUNT_SOFT 0x0001 #define SMBFS_MOUNT_INTR 0x0002 #define SMBFS_MOUNT_STRONG 0x0004 #define SMBFS_MOUNT_HAVE_NLS 0x0008 #define SMBFS_MOUNT_NO_LONG 0x0010 #define SMBFS_MAXPATHCOMP 256 /* maximum number of path components */ /* Layout of the mount control block for an smb file system. */ struct smbfs_args { int version; int dev; u_int flags; char mount_point[MAXPATHLEN]; u_char root_path[512+1]; uid_t uid; gid_t gid; mode_t file_mode; mode_t dir_mode; int caseopt; }; #ifdef _KERNEL #include struct smbnode; struct smb_share; struct u_cred; struct vop_ioctl_args; struct buf; struct smbmount { /* struct smbfs_args sm_args; */ uid_t sm_uid; gid_t sm_gid; mode_t sm_file_mode; mode_t sm_dir_mode; struct mount * sm_mp; struct smbnode * sm_root; struct smb_dev * sm_dev; struct ucred * sm_owner; uint64_t sm_flags; long sm_nextino; struct smb_share * sm_share; struct smbnode * sm_npstack[SMBFS_MAXPATHCOMP]; int sm_caseopt; int sm_didrele; }; #define VFSTOSMBFS(mp) ((struct smbmount *)((mp)->mnt_data)) #define SMBFSTOVFS(smp) ((struct mount *)((smp)->sm_mp)) #define VTOVFS(vp) ((vp)->v_mount) #define VTOSMBFS(vp) (VFSTOSMBFS(VTOVFS(vp))) int smbfs_ioctl(struct vop_ioctl_args *ap); int smbfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td); int smbfs_vinvalbuf(struct vnode *vp, struct thread *td); #endif /* KERNEL */ #endif /* _SMBFS_SMBFS_H_ */ Index: head/sys/fs/smbfs/smbfs_io.c =================================================================== --- head/sys/fs/smbfs/smbfs_io.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_io.c (revision 326268) @@ -1,678 +1,680 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* #include */ #include #include #include #include #include #include /*#define SMBFS_RWGENERIC*/ extern int smbfs_pbuf_freecnt; static int smbfs_fastlookup = 1; SYSCTL_DECL(_vfs_smbfs); SYSCTL_INT(_vfs_smbfs, OID_AUTO, fastlookup, CTLFLAG_RW, &smbfs_fastlookup, 0, ""); #define DE_SIZE (sizeof(struct dirent)) static int smbfs_readvdir(struct vnode *vp, struct uio *uio, struct ucred *cred) { struct dirent de; struct componentname cn; struct smb_cred *scred; struct smbfs_fctx *ctx; struct vnode *newvp; struct smbnode *np = VTOSMB(vp); int error/*, *eofflag = ap->a_eofflag*/; long offset, limit; np = VTOSMB(vp); SMBVDEBUG("dirname='%s'\n", np->n_name); scred = smbfs_malloc_scred(); smb_makescred(scred, uio->uio_td, cred); offset = uio->uio_offset / DE_SIZE; /* offset in the directory */ limit = uio->uio_resid / DE_SIZE; if (uio->uio_resid < DE_SIZE || uio->uio_offset < 0) { error = EINVAL; goto out; } while (limit && offset < 2) { limit--; bzero((caddr_t)&de, DE_SIZE); de.d_reclen = DE_SIZE; de.d_fileno = (offset == 0) ? np->n_ino : (np->n_parent ? np->n_parentino : 2); if (de.d_fileno == 0) de.d_fileno = 0x7ffffffd + offset; de.d_namlen = offset + 1; de.d_name[0] = '.'; de.d_name[1] = '.'; de.d_name[offset + 1] = '\0'; de.d_type = DT_DIR; error = uiomove(&de, DE_SIZE, uio); if (error) goto out; offset++; uio->uio_offset += DE_SIZE; } if (limit == 0) { error = 0; goto out; } if (offset != np->n_dirofs || np->n_dirseq == NULL) { SMBVDEBUG("Reopening search %ld:%ld\n", offset, np->n_dirofs); if (np->n_dirseq) { smbfs_findclose(np->n_dirseq, scred); np->n_dirseq = NULL; } np->n_dirofs = 2; error = smbfs_findopen(np, "*", 1, SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, scred, &ctx); if (error) { SMBVDEBUG("can not open search, error = %d", error); goto out; } np->n_dirseq = ctx; } else ctx = np->n_dirseq; while (np->n_dirofs < offset) { error = smbfs_findnext(ctx, offset - np->n_dirofs++, scred); if (error) { smbfs_findclose(np->n_dirseq, scred); np->n_dirseq = NULL; error = ENOENT ? 0 : error; goto out; } } error = 0; for (; limit; limit--, offset++) { error = smbfs_findnext(ctx, limit, scred); if (error) break; np->n_dirofs++; bzero((caddr_t)&de, DE_SIZE); de.d_reclen = DE_SIZE; de.d_fileno = ctx->f_attr.fa_ino; de.d_type = (ctx->f_attr.fa_attr & SMB_FA_DIR) ? DT_DIR : DT_REG; de.d_namlen = ctx->f_nmlen; bcopy(ctx->f_name, de.d_name, de.d_namlen); de.d_name[de.d_namlen] = '\0'; if (smbfs_fastlookup) { error = smbfs_nget(vp->v_mount, vp, ctx->f_name, ctx->f_nmlen, &ctx->f_attr, &newvp); if (!error) { cn.cn_nameptr = de.d_name; cn.cn_namelen = de.d_namlen; cache_enter(vp, newvp, &cn); vput(newvp); } } error = uiomove(&de, DE_SIZE, uio); if (error) break; } if (error == ENOENT) error = 0; uio->uio_offset = offset * DE_SIZE; out: smbfs_free_scred(scred); return error; } int smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred) { struct smbmount *smp = VFSTOSMBFS(vp->v_mount); struct smbnode *np = VTOSMB(vp); struct thread *td; struct vattr vattr; struct smb_cred *scred; int error, lks; /* * Protect against method which is not supported for now */ if (uiop->uio_segflg == UIO_NOCOPY) return EOPNOTSUPP; if (vp->v_type != VREG && vp->v_type != VDIR) { SMBFSERR("vn types other than VREG or VDIR are unsupported !\n"); return EIO; } if (uiop->uio_resid == 0) return 0; if (uiop->uio_offset < 0) return EINVAL; /* if (uiop->uio_offset + uiop->uio_resid > smp->nm_maxfilesize) return EFBIG;*/ td = uiop->uio_td; if (vp->v_type == VDIR) { lks = LK_EXCLUSIVE; /* lockstatus(vp->v_vnlock); */ if (lks == LK_SHARED) vn_lock(vp, LK_UPGRADE | LK_RETRY); error = smbfs_readvdir(vp, uiop, cred); if (lks == LK_SHARED) vn_lock(vp, LK_DOWNGRADE | LK_RETRY); return error; } /* biosize = SSTOCN(smp->sm_share)->sc_txmax;*/ if (np->n_flag & NMODIFIED) { smbfs_attr_cacheremove(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, cred); if (error) return error; if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) { error = smbfs_vinvalbuf(vp, td); if (error) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } } scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); error = smb_read(smp->sm_share, np->n_fid, uiop, scred); smbfs_free_scred(scred); return (error); } int smbfs_writevnode(struct vnode *vp, struct uio *uiop, struct ucred *cred, int ioflag) { struct smbmount *smp = VTOSMBFS(vp); struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; struct thread *td; int error = 0; if (vp->v_type != VREG) { SMBERROR("vn types other than VREG unsupported !\n"); return EIO; } SMBVDEBUG("ofs=%jd,resid=%zd\n", (intmax_t)uiop->uio_offset, uiop->uio_resid); if (uiop->uio_offset < 0) return EINVAL; /* if (uiop->uio_offset + uiop->uio_resid > smp->nm_maxfilesize) return (EFBIG);*/ td = uiop->uio_td; if (ioflag & (IO_APPEND | IO_SYNC)) { if (np->n_flag & NMODIFIED) { smbfs_attr_cacheremove(vp); error = smbfs_vinvalbuf(vp, td); if (error) return error; } if (ioflag & IO_APPEND) { #ifdef notyet /* * File size can be changed by another client */ smbfs_attr_cacheremove(vp); error = VOP_GETATTR(vp, &vattr, cred); if (error) return (error); #endif uiop->uio_offset = np->n_size; } } if (uiop->uio_resid == 0) return 0; if (vn_rlimit_fsize(vp, uiop, td)) return (EFBIG); scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); error = smb_write(smp->sm_share, np->n_fid, uiop, scred); smbfs_free_scred(scred); SMBVDEBUG("after: ofs=%jd,resid=%zd\n", (intmax_t)uiop->uio_offset, uiop->uio_resid); if (!error) { if (uiop->uio_offset > np->n_size) { np->n_size = uiop->uio_offset; vnode_pager_setsize(vp, np->n_size); } } return error; } /* * Do an I/O operation to/from a cache block. */ int smbfs_doio(struct vnode *vp, struct buf *bp, struct ucred *cr, struct thread *td) { struct smbmount *smp = VFSTOSMBFS(vp->v_mount); struct smbnode *np = VTOSMB(vp); struct uio *uiop; struct iovec io; struct smb_cred *scred; int error = 0; uiop = malloc(sizeof(struct uio), M_SMBFSDATA, M_WAITOK); uiop->uio_iov = &io; uiop->uio_iovcnt = 1; uiop->uio_segflg = UIO_SYSSPACE; uiop->uio_td = td; scred = smbfs_malloc_scred(); smb_makescred(scred, td, cr); if (bp->b_iocmd == BIO_READ) { io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; error = smb_read(smp->sm_share, np->n_fid, uiop, scred); if (error) break; if (uiop->uio_resid) { int left = uiop->uio_resid; int nread = bp->b_bcount - left; if (left > 0) bzero((char *)bp->b_data + nread, left); } break; default: printf("smbfs_doio: type %x unexpected\n",vp->v_type); break; } if (error) { bp->b_error = error; bp->b_ioflags |= BIO_ERROR; } } else { /* write */ if (((bp->b_blkno * DEV_BSIZE) + bp->b_dirtyend) > np->n_size) bp->b_dirtyend = np->n_size - (bp->b_blkno * DEV_BSIZE); if (bp->b_dirtyend > bp->b_dirtyoff) { io.iov_len = uiop->uio_resid = bp->b_dirtyend - bp->b_dirtyoff; uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE + bp->b_dirtyoff; io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; uiop->uio_rw = UIO_WRITE; error = smb_write(smp->sm_share, np->n_fid, uiop, scred); /* * For an interrupted write, the buffer is still valid * and the write hasn't been pushed to the server yet, * so we can't set BIO_ERROR and report the interruption * by setting B_EINTR. For the B_ASYNC case, B_EINTR * is not relevant, so the rpc attempt is essentially * a noop. For the case of a V3 write rpc not being * committed to stable storage, the block is still * dirty and requires either a commit rpc or another * write rpc with iomode == NFSV3WRITE_FILESYNC before * the block is reused. This is indicated by setting * the B_DELWRI and B_NEEDCOMMIT flags. */ if (error == EINTR || (!error && (bp->b_flags & B_NEEDCOMMIT))) { int s; s = splbio(); bp->b_flags &= ~(B_INVAL|B_NOCACHE); if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; if ((bp->b_flags & B_PAGING) == 0) { bdirty(bp); bp->b_flags &= ~B_DONE; } if ((bp->b_flags & B_ASYNC) == 0) bp->b_flags |= B_EINTR; splx(s); } else { if (error) { bp->b_ioflags |= BIO_ERROR; bp->b_error = error; } bp->b_dirtyoff = bp->b_dirtyend = 0; } } else { bp->b_resid = 0; bufdone(bp); free(uiop, M_SMBFSDATA); smbfs_free_scred(scred); return 0; } } bp->b_resid = uiop->uio_resid; bufdone(bp); free(uiop, M_SMBFSDATA); smbfs_free_scred(scred); return error; } /* * Vnode op for VM getpages. * Wish wish .... get rid from multiple IO routines */ int smbfs_getpages(ap) struct vop_getpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_reqpage; } */ *ap; { #ifdef SMBFS_RWGENERIC return vop_stdgetpages(ap); #else int i, error, nextoff, size, toff, npages, count; struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; struct vnode *vp; struct thread *td; struct ucred *cred; struct smbmount *smp; struct smbnode *np; struct smb_cred *scred; vm_object_t object; vm_page_t *pages; vp = ap->a_vp; if ((object = vp->v_object) == NULL) { printf("smbfs_getpages: called with non-merged cache vnode??\n"); return VM_PAGER_ERROR; } td = curthread; /* XXX */ cred = td->td_ucred; /* XXX */ np = VTOSMB(vp); smp = VFSTOSMBFS(vp->v_mount); pages = ap->a_m; npages = ap->a_count; /* * If the requested page is partially valid, just return it and * allow the pager to zero-out the blanks. Partially valid pages * can only occur at the file EOF. * * XXXGL: is that true for SMB filesystem? */ VM_OBJECT_WLOCK(object); if (pages[npages - 1]->valid != 0 && --npages == 0) goto out; VM_OBJECT_WUNLOCK(object); scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); bp = getpbuf(&smbfs_pbuf_freecnt); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); VM_CNT_INC(v_vnodein); VM_CNT_ADD(v_vnodepgsin, npages); count = npages << PAGE_SHIFT; iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = td; error = smb_read(smp->sm_share, np->n_fid, &uio, scred); smbfs_free_scred(scred); pmap_qremove(kva, npages); relpbuf(bp, &smbfs_pbuf_freecnt); if (error && (uio.uio_resid == count)) { printf("smbfs_getpages: error %d\n",error); return VM_PAGER_ERROR; } size = count - uio.uio_resid; VM_OBJECT_WLOCK(object); for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { vm_page_t m; nextoff = toff + PAGE_SIZE; m = pages[i]; if (nextoff <= size) { /* * Read operation filled an entire page */ m->valid = VM_PAGE_BITS_ALL; KASSERT(m->dirty == 0, ("smbfs_getpages: page %p is dirty", m)); } else if (size > toff) { /* * Read operation filled a partial page. */ m->valid = 0; vm_page_set_valid_range(m, 0, size - toff); KASSERT(m->dirty == 0, ("smbfs_getpages: page %p is dirty", m)); } else { /* * Read operation was short. If no error occurred * we may have hit a zero-fill section. We simply * leave valid set to 0. */ ; } } out: VM_OBJECT_WUNLOCK(object); if (ap->a_rbehind) *ap->a_rbehind = 0; if (ap->a_rahead) *ap->a_rahead = 0; return (VM_PAGER_OK); #endif /* SMBFS_RWGENERIC */ } /* * Vnode op for VM putpages. * possible bug: all IO done in sync mode * Note that vop_close always invalidate pages before close, so it's * not necessary to open vnode. */ int smbfs_putpages(ap) struct vop_putpages_args /* { struct vnode *a_vp; vm_page_t *a_m; int a_count; int a_sync; int *a_rtvals; } */ *ap; { int error; struct vnode *vp = ap->a_vp; struct thread *td; struct ucred *cred; #ifdef SMBFS_RWGENERIC td = curthread; /* XXX */ cred = td->td_ucred; /* XXX */ VOP_OPEN(vp, FWRITE, cred, td, NULL); error = vop_stdputpages(ap); VOP_CLOSE(vp, FWRITE, cred, td); return error; #else struct uio uio; struct iovec iov; vm_offset_t kva; struct buf *bp; int i, npages, count; int *rtvals; struct smbmount *smp; struct smbnode *np; struct smb_cred *scred; vm_page_t *pages; td = curthread; /* XXX */ cred = td->td_ucred; /* XXX */ /* VOP_OPEN(vp, FWRITE, cred, td, NULL);*/ np = VTOSMB(vp); smp = VFSTOSMBFS(vp->v_mount); pages = ap->a_m; count = ap->a_count; rtvals = ap->a_rtvals; npages = btoc(count); for (i = 0; i < npages; i++) { rtvals[i] = VM_PAGER_ERROR; } bp = getpbuf(&smbfs_pbuf_freecnt); kva = (vm_offset_t) bp->b_data; pmap_qenter(kva, pages, npages); VM_CNT_INC(v_vnodeout); VM_CNT_ADD(v_vnodepgsout, count); iov.iov_base = (caddr_t) kva; iov.iov_len = count; uio.uio_iov = &iov; uio.uio_iovcnt = 1; uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); uio.uio_resid = count; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_WRITE; uio.uio_td = td; SMBVDEBUG("ofs=%jd,resid=%zd\n", (intmax_t)uio.uio_offset, uio.uio_resid); scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); error = smb_write(smp->sm_share, np->n_fid, &uio, scred); smbfs_free_scred(scred); /* VOP_CLOSE(vp, FWRITE, cred, td);*/ SMBVDEBUG("paged write done: %d\n", error); pmap_qremove(kva, npages); relpbuf(bp, &smbfs_pbuf_freecnt); if (error == 0) { vnode_pager_undirty_pages(pages, rtvals, count - uio.uio_resid, npages * PAGE_SIZE, npages * PAGE_SIZE); } return (rtvals[0]); #endif /* SMBFS_RWGENERIC */ } /* * Flush and invalidate all dirty buffers. If another process is already * doing the flush, just wait for completion. */ int smbfs_vinvalbuf(struct vnode *vp, struct thread *td) { struct smbnode *np = VTOSMB(vp); int error = 0; if (vp->v_iflag & VI_DOOMED) return 0; while (np->n_flag & NFLUSHINPROG) { np->n_flag |= NFLUSHWANT; error = tsleep(&np->n_flag, PRIBIO + 2, "smfsvinv", 2 * hz); error = smb_td_intr(td); if (error == EINTR) return EINTR; } np->n_flag |= NFLUSHINPROG; if (vp->v_bufobj.bo_object != NULL) { VM_OBJECT_WLOCK(vp->v_bufobj.bo_object); vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); VM_OBJECT_WUNLOCK(vp->v_bufobj.bo_object); } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); while (error) { if (error == ERESTART || error == EINTR) { np->n_flag &= ~NFLUSHINPROG; if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup(&np->n_flag); } return EINTR; } error = vinvalbuf(vp, V_SAVE, PCATCH, 0); } np->n_flag &= ~(NMODIFIED | NFLUSHINPROG); if (np->n_flag & NFLUSHWANT) { np->n_flag &= ~NFLUSHWANT; wakeup(&np->n_flag); } return (error); } Index: head/sys/fs/smbfs/smbfs_node.c =================================================================== --- head/sys/fs/smbfs/smbfs_node.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_node.c (revision 326268) @@ -1,410 +1,412 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*#include #include */ #include #include #include extern struct vop_vector smbfs_vnodeops; /* XXX -> .h file */ static MALLOC_DEFINE(M_SMBNODE, "smbufs_node", "SMBFS vnode private part"); static MALLOC_DEFINE(M_SMBNODENAME, "smbufs_nname", "SMBFS node name"); u_int32_t __inline smbfs_hash(const u_char *name, int nmlen) { return (fnv_32_buf(name, nmlen, FNV1_32_INIT)); } static char * smbfs_name_alloc(const u_char *name, int nmlen) { u_char *cp; nmlen++; cp = malloc(nmlen, M_SMBNODENAME, M_WAITOK); bcopy(name, cp, nmlen - 1); cp[nmlen - 1] = 0; return cp; } static void smbfs_name_free(u_char *name) { free(name, M_SMBNODENAME); } static int __inline smbfs_vnode_cmp(struct vnode *vp, void *_sc) { struct smbnode *np; struct smbcmp *sc; np = (struct smbnode *) vp->v_data; sc = (struct smbcmp *) _sc; if (np->n_parent != sc->n_parent || np->n_nmlen != sc->n_nmlen || bcmp(sc->n_name, np->n_name, sc->n_nmlen) != 0) return 1; return 0; } static int smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm, int dirlen, const char *name, int nmlen, char sep, struct smbfattr *fap, struct vnode **vpp) { struct vattr vattr; struct thread *td = curthread; /* XXX */ struct smbmount *smp = VFSTOSMBFS(mp); struct smbnode *np, *dnp; struct vnode *vp, *vp2; struct smbcmp sc; char *p, *rpath; int error, rplen; sc.n_parent = dvp; sc.n_nmlen = nmlen; sc.n_name = name; if (smp->sm_root != NULL && dvp == NULL) { SMBERROR("do not allocate root vnode twice!\n"); return EINVAL; } if (nmlen == 2 && bcmp(name, "..", 2) == 0) { if (dvp == NULL) return EINVAL; vp = VTOSMB(VTOSMB(dvp)->n_parent)->n_vnode; error = vget(vp, LK_EXCLUSIVE, td); if (error == 0) *vpp = vp; return error; } else if (nmlen == 1 && name[0] == '.') { SMBERROR("do not call me with dot!\n"); return EINVAL; } dnp = dvp ? VTOSMB(dvp) : NULL; if (dnp == NULL && dvp != NULL) { vn_printf(dvp, "smbfs_node_alloc: dead parent vnode "); return EINVAL; } error = vfs_hash_get(mp, smbfs_hash(name, nmlen), LK_EXCLUSIVE, td, vpp, smbfs_vnode_cmp, &sc); if (error) return (error); if (*vpp) { np = VTOSMB(*vpp); /* Force cached attributes to be refreshed if stale. */ (void)VOP_GETATTR(*vpp, &vattr, td->td_ucred); /* * If the file type on the server is inconsistent with * what it was when we created the vnode, kill the * bogus vnode now and fall through to the code below * to create a new one with the right type. */ if (((*vpp)->v_type == VDIR && (np->n_dosattr & SMB_FA_DIR) == 0) || ((*vpp)->v_type == VREG && (np->n_dosattr & SMB_FA_DIR) != 0)) { vgone(*vpp); vput(*vpp); } else { SMBVDEBUG("vnode taken from the hashtable\n"); return (0); } } /* * If we don't have node attributes, then it is an explicit lookup * for an existing vnode. */ if (fap == NULL) return ENOENT; error = getnewvnode("smbfs", mp, &smbfs_vnodeops, vpp); if (error) return (error); vp = *vpp; np = malloc(sizeof *np, M_SMBNODE, M_WAITOK | M_ZERO); rplen = dirlen; if (sep != '\0') rplen++; rplen += nmlen; rpath = malloc(rplen + 1, M_SMBNODENAME, M_WAITOK); p = rpath; bcopy(dirnm, p, dirlen); p += dirlen; if (sep != '\0') *p++ = sep; if (name != NULL) { bcopy(name, p, nmlen); p += nmlen; } *p = '\0'; MPASS(p == rpath + rplen); lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); /* Vnode initialization */ vp->v_type = fap->fa_attr & SMB_FA_DIR ? VDIR : VREG; vp->v_data = np; np->n_vnode = vp; np->n_mount = VFSTOSMBFS(mp); np->n_rpath = rpath; np->n_rplen = rplen; np->n_nmlen = nmlen; np->n_name = smbfs_name_alloc(name, nmlen); np->n_ino = fap->fa_ino; if (dvp) { ASSERT_VOP_LOCKED(dvp, "smbfs_node_alloc"); np->n_parent = dvp; np->n_parentino = VTOSMB(dvp)->n_ino; if (/*vp->v_type == VDIR &&*/ (dvp->v_vflag & VV_ROOT) == 0) { vref(dvp); np->n_flag |= NREFPARENT; } } else if (vp->v_type == VREG) SMBERROR("new vnode '%s' born without parent ?\n", np->n_name); error = insmntque(vp, mp); if (error) { free(np, M_SMBNODE); return (error); } error = vfs_hash_insert(vp, smbfs_hash(name, nmlen), LK_EXCLUSIVE, td, &vp2, smbfs_vnode_cmp, &sc); if (error) return (error); if (vp2 != NULL) *vpp = vp2; return (0); } int smbfs_nget(struct mount *mp, struct vnode *dvp, const char *name, int nmlen, struct smbfattr *fap, struct vnode **vpp) { struct smbnode *dnp, *np; struct vnode *vp; int error, sep; dnp = (dvp) ? VTOSMB(dvp) : NULL; sep = 0; if (dnp != NULL) { sep = SMBFS_DNP_SEP(dnp); error = smbfs_node_alloc(mp, dvp, dnp->n_rpath, dnp->n_rplen, name, nmlen, sep, fap, &vp); } else error = smbfs_node_alloc(mp, NULL, "\\", 1, name, nmlen, sep, fap, &vp); if (error) return error; MPASS(vp != NULL); np = VTOSMB(vp); if (fap) smbfs_attr_cacheenter(vp, fap); *vpp = vp; return 0; } /* * Free smbnode, and give vnode back to system */ int smbfs_reclaim(ap) struct vop_reclaim_args /* { struct vnode *a_vp; struct thread *a_p; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp; struct smbnode *np = VTOSMB(vp); struct smbmount *smp = VTOSMBFS(vp); SMBVDEBUG("%s,%d\n", np->n_name, vrefcnt(vp)); KASSERT((np->n_flag & NOPEN) == 0, ("file not closed before reclaim")); /* * Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); dvp = (np->n_parent && (np->n_flag & NREFPARENT)) ? np->n_parent : NULL; /* * Remove the vnode from its hash chain. */ vfs_hash_remove(vp); if (np->n_name) smbfs_name_free(np->n_name); if (np->n_rpath) free(np->n_rpath, M_SMBNODENAME); free(np, M_SMBNODE); vp->v_data = NULL; if (dvp != NULL) { vrele(dvp); /* * Indicate that we released something; see comment * in smbfs_unmount(). */ smp->sm_didrele = 1; } return 0; } int smbfs_inactive(ap) struct vop_inactive_args /* { struct vnode *a_vp; struct thread *a_td; } */ *ap; { struct thread *td = ap->a_td; struct ucred *cred = td->td_ucred; struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; struct vattr va; SMBVDEBUG("%s: %d\n", VTOSMB(vp)->n_name, vrefcnt(vp)); if ((np->n_flag & NOPEN) != 0) { scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); smbfs_vinvalbuf(vp, td); if (vp->v_type == VREG) { VOP_GETATTR(vp, &va, cred); smbfs_smb_close(np->n_mount->sm_share, np->n_fid, &np->n_mtime, scred); } else if (vp->v_type == VDIR) { if (np->n_dirseq != NULL) { smbfs_findclose(np->n_dirseq, scred); np->n_dirseq = NULL; } } np->n_flag &= ~NOPEN; smbfs_attr_cacheremove(vp); smbfs_free_scred(scred); } if (np->n_flag & NGONE) vrecycle(vp); return (0); } /* * routines to maintain vnode attributes cache * smbfs_attr_cacheenter: unpack np.i to vattr structure */ void smbfs_attr_cacheenter(struct vnode *vp, struct smbfattr *fap) { struct smbnode *np = VTOSMB(vp); if (vp->v_type == VREG) { if (np->n_size != fap->fa_size) { np->n_size = fap->fa_size; vnode_pager_setsize(vp, np->n_size); } } else if (vp->v_type == VDIR) { np->n_size = 16384; /* should be a better way ... */ } else return; np->n_mtime = fap->fa_mtime; np->n_dosattr = fap->fa_attr; np->n_attrage = time_second; return; } int smbfs_attr_cachelookup(struct vnode *vp, struct vattr *va) { struct smbnode *np = VTOSMB(vp); struct smbmount *smp = VTOSMBFS(vp); int diff; diff = time_second - np->n_attrage; if (diff > 2) /* XXX should be configurable */ return ENOENT; va->va_type = vp->v_type; /* vnode type (for create) */ va->va_flags = 0; /* flags defined for file */ if (vp->v_type == VREG) { va->va_mode = smp->sm_file_mode; /* files access mode and type */ if (np->n_dosattr & SMB_FA_RDONLY) { va->va_mode &= ~(S_IWUSR|S_IWGRP|S_IWOTH); va->va_flags |= UF_READONLY; } } else if (vp->v_type == VDIR) { va->va_mode = smp->sm_dir_mode; /* files access mode and type */ } else return EINVAL; va->va_size = np->n_size; va->va_nlink = 1; /* number of references to file */ va->va_uid = smp->sm_uid; /* owner user id */ va->va_gid = smp->sm_gid; /* owner group id */ va->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; va->va_fileid = np->n_ino; /* file id */ if (va->va_fileid == 0) va->va_fileid = 2; va->va_blocksize = SSTOVC(smp->sm_share)->vc_txmax; va->va_mtime = np->n_mtime; va->va_atime = va->va_ctime = va->va_mtime; /* time file changed */ va->va_gen = VNOVAL; /* generation number of file */ if (np->n_dosattr & SMB_FA_HIDDEN) va->va_flags |= UF_HIDDEN; if (np->n_dosattr & SMB_FA_SYSTEM) va->va_flags |= UF_SYSTEM; /* * We don't set the archive bit for directories. */ if ((vp->v_type != VDIR) && (np->n_dosattr & SMB_FA_ARCHIVE)) va->va_flags |= UF_ARCHIVE; va->va_rdev = NODEV; /* device the special file represents */ va->va_bytes = va->va_size; /* bytes of disk space held by file */ va->va_filerev = 0; /* file modification number */ va->va_vaflags = 0; /* operations flags */ return 0; } Index: head/sys/fs/smbfs/smbfs_node.h =================================================================== --- head/sys/fs/smbfs/smbfs_node.h (revision 326267) +++ head/sys/fs/smbfs/smbfs_node.h (revision 326268) @@ -1,103 +1,105 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_SMBFS_NODE_H_ #define _FS_SMBFS_NODE_H_ #define SMBFS_ROOT_INO 2 /* just like in UFS */ /* Bits for smbnode.n_flag */ #define NFLUSHINPROG 0x0001 #define NFLUSHWANT 0x0002 /* they should gone ... */ #define NMODIFIED 0x0004 /* bogus, until async IO implemented */ /*efine NNEW 0x0008*//* smb/vnode has been allocated */ #define NREFPARENT 0x0010 /* node holds parent from recycling */ #define NFLUSHWIRE 0x1000 /* pending flush request */ #define NOPEN 0x2000 /* file is open */ #define NGONE 0x4000 /* file has been removed/renamed */ struct smbfs_fctx; struct smbnode { int n_flag; struct vnode * n_parent; struct vnode * n_vnode; struct smbmount * n_mount; time_t n_attrage; /* attributes cache time */ /* time_t n_ctime;*/ struct timespec n_mtime; /* modify time */ struct timespec n_atime; /* last access time */ u_quad_t n_size; long n_ino; long n_parentino; /* parent inode number */ int n_dosattr; u_int16_t n_fid; /* file handle */ int n_rwstate; /* granted access mode */ int n_rplen; char * n_rpath; u_char n_nmlen; u_char * n_name; struct smbfs_fctx * n_dirseq; /* ff context */ long n_dirofs; /* last ff offset */ LIST_ENTRY(smbnode) n_hash; }; struct smbcmp { struct vnode * n_parent; int n_nmlen; const char * n_name; }; #define VTOSMB(vp) ((struct smbnode *)(vp)->v_data) #define SMBTOV(np) ((struct vnode *)(np)->n_vnode) #define SMBFS_DNP_SEP(dnp) ((dnp->n_rplen > 1) ? '\\' : '\0') struct vop_getpages_args; struct vop_inactive_args; struct vop_putpages_args; struct vop_reclaim_args; struct ucred; struct uio; struct smbfattr; int smbfs_inactive(struct vop_inactive_args *); int smbfs_reclaim(struct vop_reclaim_args *); int smbfs_nget(struct mount *mp, struct vnode *dvp, const char *name, int nmlen, struct smbfattr *fap, struct vnode **vpp); u_int32_t smbfs_hash(const u_char *name, int nmlen); int smbfs_getpages(struct vop_getpages_args *); int smbfs_putpages(struct vop_putpages_args *); int smbfs_readvnode(struct vnode *vp, struct uio *uiop, struct ucred *cred); int smbfs_writevnode(struct vnode *vp, struct uio *uiop, struct ucred *cred, int ioflag); void smbfs_attr_cacheenter(struct vnode *vp, struct smbfattr *fap); int smbfs_attr_cachelookup(struct vnode *vp ,struct vattr *va); #define smbfs_attr_cacheremove(vp) VTOSMB(vp)->n_attrage = 0 #endif /* _FS_SMBFS_NODE_H_ */ Index: head/sys/fs/smbfs/smbfs_smb.c =================================================================== --- head/sys/fs/smbfs/smbfs_smb.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_smb.c (revision 326268) @@ -1,1472 +1,1474 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #ifdef USE_MD5_HASH #include #endif #include #include #include #include #include #include #include /* * Lack of inode numbers leads us to the problem of generating them. * Partially this problem can be solved by having a dir/file cache * with inode numbers generated from the incremented by one counter. * However this way will require too much kernel memory, gives all * sorts of locking and consistency problems, not to mentinon counter overflows. * So, I'm decided to use a hash function to generate pseudo random (and unique) * inode numbers. */ static long smbfs_getino(struct smbnode *dnp, const char *name, int nmlen) { #ifdef USE_MD5_HASH MD5_CTX md5; u_int32_t state[4]; long ino; int i; MD5Init(&md5); MD5Update(&md5, name, nmlen); MD5Final((u_char *)state, &md5); for (i = 0, ino = 0; i < 4; i++) ino += state[i]; return dnp->n_ino + ino; #endif u_int32_t ino; ino = dnp->n_ino + smbfs_hash(name, nmlen); if (ino <= 2) ino += 3; return ino; } static int smbfs_smb_lockandx(struct smbnode *np, int op, u_int32_t pid, off_t start, off_t end, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; u_char ltype = 0; int error; if (op == SMB_LOCK_SHARED) ltype |= SMB_LOCKING_ANDX_SHARED_LOCK; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_LOCKING_ANDX, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint8(mbp, 0xff); /* secondary command */ mb_put_uint8(mbp, 0); /* MBZ */ mb_put_uint16le(mbp, 0); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint8(mbp, ltype); /* locktype */ mb_put_uint8(mbp, 0); /* oplocklevel - 0 seems is NO_OPLOCK */ mb_put_uint32le(mbp, 0); /* timeout - break immediately */ mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 1 : 0); mb_put_uint16le(mbp, op == SMB_LOCK_RELEASE ? 0 : 1); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint16le(mbp, pid); mb_put_uint32le(mbp, start); mb_put_uint32le(mbp, end - start); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_lock(struct smbnode *np, int op, caddr_t id, off_t start, off_t end, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; if (SMB_DIALECT(SSTOVC(ssp)) < SMB_DIALECT_LANMAN1_0) /* * TODO: use LOCK_BYTE_RANGE here. */ return EINVAL; else return smbfs_smb_lockandx(np, op, (uintptr_t)id, start, end, scred); } static int smbfs_query_info_fs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_t2rq *t2p; struct mbchain *mbp; struct mdchain *mdp; uint32_t bsize, bpu; int64_t units, funits; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION, scred, &t2p); if (error) return (error); mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_QUERY_FS_SIZE_INFO); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = sizeof(int64_t) * 2 + sizeof(uint32_t) * 2; error = smb_t2_request(t2p); if (error) { smb_t2_done(t2p); return (error); } mdp = &t2p->t2_rdata; md_get_int64le(mdp, &units); md_get_int64le(mdp, &funits); md_get_uint32le(mdp, &bpu); md_get_uint32le(mdp, &bsize); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= (uint64_t)units; /* total data blocks in filesystem */ sbp->f_bfree = (uint64_t)funits;/* free blocks in fs */ sbp->f_bavail= (uint64_t)funits;/* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_t2_done(t2p); return (0); } static int smbfs_query_info_alloc(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_t2rq *t2p; struct mbchain *mbp; struct mdchain *mdp; u_int16_t bsize; u_int32_t units, bpu, funits; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_QUERY_FS_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_INFO_ALLOCATION); t2p->t2_maxpcount = 4; t2p->t2_maxdcount = 4 * 4 + 2; error = smb_t2_request(t2p); if (error) { smb_t2_done(t2p); return error; } mdp = &t2p->t2_rdata; md_get_uint32(mdp, NULL); /* fs id */ md_get_uint32le(mdp, &bpu); md_get_uint32le(mdp, &units); md_get_uint32le(mdp, &funits); md_get_uint16le(mdp, &bsize); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= units; /* total data blocks in filesystem */ sbp->f_bfree = funits; /* free blocks in fs */ sbp->f_bavail= funits; /* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_t2_done(t2p); return 0; } static int smbfs_query_info_disk(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { struct smb_rq *rqp; struct mdchain *mdp; u_int16_t units, bpu, bsize, funits; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION_DISK, scred, &rqp); if (error) return (error); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { smb_rq_done(rqp); return error; } smb_rq_getreply(rqp, &mdp); md_get_uint16le(mdp, &units); md_get_uint16le(mdp, &bpu); md_get_uint16le(mdp, &bsize); md_get_uint16le(mdp, &funits); sbp->f_bsize = bpu * bsize; /* fundamental filesystem block size */ sbp->f_blocks= units; /* total data blocks in filesystem */ sbp->f_bfree = funits; /* free blocks in fs */ sbp->f_bavail= funits; /* free blocks avail to non-superuser */ sbp->f_files = 0xffff; /* total file nodes in filesystem */ sbp->f_ffree = 0xffff; /* free file nodes in fs */ smb_rq_done(rqp); return 0; } int smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred) { if (SMB_DIALECT(SSTOVC(ssp)) >= SMB_DIALECT_LANMAN2_0) { if (smbfs_query_info_fs(ssp, sbp, scred) == 0) return (0); if (smbfs_query_info_alloc(ssp, sbp, scred) == 0) return (0); } return (smbfs_query_info_disk(ssp, sbp, scred)); } static int smbfs_smb_seteof(struct smbnode *np, int64_t newsize, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, SMB_SET_FILE_END_OF_FILE_INFO); mb_put_uint32le(mbp, 0); mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, newsize); mb_put_uint32le(mbp, 0); /* padding */ mb_put_uint16le(mbp, 0); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = 0; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } static int smb_smb_flush(struct smbnode *np, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; int error; if ((np->n_flag & NOPEN) == 0 || !SMBTOV(np) || SMBTOV(np)->v_type != VREG) return 0; /* not a regular open file */ error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_FLUSH, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); if (!error) np->n_flag &= ~NFLUSHWIRE; return (error); } int smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred) { if (np->n_flag & NFLUSHWIRE) return (smb_smb_flush(np, scred)); return (0); } int smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, struct smb_cred *scred) { struct smb_share *ssp = np->n_mount->sm_share; struct smb_rq *rqp; struct mbchain *mbp; int error; if (!smbfs_smb_seteof(np, newsize, scred)) { np->n_flag |= NFLUSHWIRE; return (0); } /* XXX: We should use SMB_COM_WRITE_ANDX to support large offsets */ error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_WRITE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, 0); mb_put_uint32le(mbp, (uint32_t)newsize); mb_put_uint16le(mbp, 0); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_DATA); mb_put_uint16le(mbp, 0); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_query_info(struct smbnode *np, const char *name, int len, struct smbfattr *fap, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc; int error; u_int16_t wattr; u_int32_t lint; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_QUERY_INFORMATION, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, name, len); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) break; smb_rq_getreply(rqp, &mdp); if (md_get_uint8(mdp, &wc) != 0 || wc != 10) { error = EBADRPC; break; } md_get_uint16le(mdp, &wattr); fap->fa_attr = wattr; /* * Be careful using the time returned here, as * with FAT on NT4SP6, at least, the time returned is low * 32 bits of 100s of nanoseconds (since 1601) so it rolls * over about every seven minutes! */ md_get_uint32le(mdp, &lint); /* specs: secs since 1970 */ if (lint) /* avoid bogus zero returns */ smb_time_server2local(lint, SSTOVC(ssp)->vc_sopt.sv_tz, &fap->fa_mtime); md_get_uint32le(mdp, &lint); fap->fa_size = lint; } while(0); smb_rq_done(rqp); return error; } /* * Set DOS file attributes. mtime should be NULL for dialects above lm10 */ int smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; u_long time; int error, svtz; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION, scred, &rqp); if (error) return (error); svtz = SSTOVC(ssp)->vc_sopt.sv_tz; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, attr); if (mtime) { smb_time_local2server(mtime, svtz, &time); } else time = 0; mb_put_uint32le(mbp, time); /* mtime */ mb_put_mem(mbp, NULL, 5 * 2, MB_MZERO); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) { mb_put_padbyte(mbp); mb_put_uint8(mbp, 0); /* 1st byte of NULL Unicode char */ } mb_put_uint8(mbp, 0); smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { SMBERROR("smb_rq_simple(rqp) => error %d\n", error); break; } } while(0); smb_rq_done(rqp); return error; } /* * Note, win95 doesn't support this call. */ int smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime, struct timespec *atime, int attr, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct mbchain *mbp; u_int16_t date, time; int error, tzoff; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_INFO_STANDARD); mb_put_uint32le(mbp, 0); /* MBZ */ /* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */ error = smbfs_fullpath(mbp, vcp, np, NULL, 0); if (error) { smb_t2_done(t2p); return error; } tzoff = vcp->vc_sopt.sv_tz; mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_uint32le(mbp, 0); /* creation time */ if (atime) smb_time_unix2dos(atime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); if (mtime) smb_time_unix2dos(mtime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); mb_put_uint32le(mbp, 0); /* file size */ mb_put_uint32le(mbp, 0); /* allocation unit size */ mb_put_uint16le(mbp, attr); /* DOS attr */ mb_put_uint32le(mbp, 0); /* EA size */ t2p->t2_maxpcount = 5 * 2; t2p->t2_maxdcount = vcp->vc_txmax; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } /* * NT level. Specially for win9x */ int smbfs_smb_setpattrNT(struct smbnode *np, u_short attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct mbchain *mbp; int64_t tm; int error, tzoff; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_PATH_INFORMATION, scred, &t2p); if (error) return error; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO); mb_put_uint32le(mbp, 0); /* MBZ */ /* mb_put_uint8(mbp, SMB_DT_ASCII); specs incorrect */ error = smbfs_fullpath(mbp, vcp, np, NULL, 0); if (error) { smb_t2_done(t2p); return error; } tzoff = vcp->vc_sopt.sv_tz; mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, 0); /* creation time */ if (atime) { smb_time_local2NT(atime, tzoff, &tm); } else tm = 0; mb_put_int64le(mbp, tm); if (mtime) { smb_time_local2NT(mtime, tzoff, &tm); } else tm = 0; mb_put_int64le(mbp, tm); mb_put_int64le(mbp, tm); /* change time */ mb_put_uint32le(mbp, attr); /* attr */ t2p->t2_maxpcount = 24; t2p->t2_maxdcount = 56; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } /* * Set file atime and mtime. Doesn't supported by core dialect. */ int smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; u_int16_t date, time; int error, tzoff; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_SET_INFORMATION2, scred, &rqp); if (error) return (error); tzoff = SSTOVC(ssp)->vc_sopt.sv_tz; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint32le(mbp, 0); /* creation time */ if (atime) smb_time_unix2dos(atime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); if (mtime) smb_time_unix2dos(mtime, tzoff, &date, &time, NULL); else time = date = 0; mb_put_uint16le(mbp, date); mb_put_uint16le(mbp, time); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); SMBSDEBUG("%d\n", error); smb_rq_done(rqp); return error; } /* * Set DOS file attributes. * Looks like this call can be used only if SMB_CAP_NT_SMBS bit is on. */ int smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred) { struct smb_t2rq *t2p; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int64_t tm; int error, svtz; error = smb_t2_alloc(SSTOCP(ssp), SMB_TRANS2_SET_FILE_INFORMATION, scred, &t2p); if (error) return error; svtz = SSTOVC(ssp)->vc_sopt.sv_tz; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&np->n_fid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, SMB_SET_FILE_BASIC_INFO); mb_put_uint32le(mbp, 0); mbp = &t2p->t2_tdata; mb_init(mbp); mb_put_int64le(mbp, 0); /* creation time */ if (atime) { smb_time_local2NT(atime, svtz, &tm); } else tm = 0; mb_put_int64le(mbp, tm); if (mtime) { smb_time_local2NT(mtime, svtz, &tm); } else tm = 0; mb_put_int64le(mbp, tm); mb_put_int64le(mbp, tm); /* change time */ mb_put_uint16le(mbp, attr); mb_put_uint32le(mbp, 0); /* padding */ mb_put_uint16le(mbp, 0); t2p->t2_maxpcount = 2; t2p->t2_maxdcount = 0; error = smb_t2_request(t2p); smb_t2_done(t2p); return error; } int smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc; u_int16_t fid, wattr, grantedmode; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_OPEN, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, accmode); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) break; smb_rq_getreply(rqp, &mdp); if (md_get_uint8(mdp, &wc) != 0 || wc != 7) { error = EBADRPC; break; } md_get_uint16(mdp, &fid); md_get_uint16le(mdp, &wattr); md_get_uint32(mdp, NULL); /* mtime */ md_get_uint32(mdp, NULL); /* fsize */ md_get_uint16le(mdp, &grantedmode); /* * TODO: refresh attributes from this reply */ } while(0); smb_rq_done(rqp); if (error) return error; np->n_fid = fid; np->n_rwstate = grantedmode; return 0; } int smbfs_smb_close(struct smb_share *ssp, u_int16_t fid, struct timespec *mtime, struct smb_cred *scred) { struct smb_rq *rqp; struct mbchain *mbp; u_long time; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CLOSE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&fid, sizeof(fid), MB_MSYSTEM); if (mtime) { smb_time_local2server(mtime, SSTOVC(ssp)->vc_sopt.sv_tz, &time); } else time = 0; mb_put_uint32le(mbp, time); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } int smbfs_smb_create(struct smbnode *dnp, const char *name, int nmlen, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = dnp->n_mount->sm_share; struct mbchain *mbp; struct mdchain *mdp; struct timespec ctime; u_int8_t wc; u_int16_t fid; u_long tm; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_ARCHIVE); /* attributes */ nanotime(&ctime); smb_time_local2server(&ctime, SSTOVC(ssp)->vc_sopt.sv_tz, &tm); mb_put_uint32le(mbp, tm); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, nmlen); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (!error) { smb_rq_getreply(rqp, &mdp); md_get_uint8(mdp, &wc); if (wc == 1) md_get_uint16(mdp, &fid); else error = EBADRPC; } } smb_rq_done(rqp); if (error) return error; smbfs_smb_close(ssp, fid, &ctime, scred); return error; } int smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } int smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = src->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_RENAME, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_FA_SYSTEM | SMB_FA_HIDDEN); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); } while(0); smb_rq_done(rqp); return error; } int smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = src->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_MOVE, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, SMB_TID_UNKNOWN); mb_put_uint16le(mbp, 0x20); /* delete target file */ mb_put_uint16le(mbp, flags); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); do { error = smbfs_fullpath(mbp, SSTOVC(ssp), src, NULL, 0); if (error) break; mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), tdnp, tname, tnmlen); if (error) break; smb_rq_bend(rqp); error = smb_rq_simple(rqp); } while(0); smb_rq_done(rqp); return error; } int smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = dnp->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_CREATE_DIRECTORY, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), dnp, name, len); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } int smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred) { struct smb_rq *rqp; struct smb_share *ssp = np->n_mount->sm_share; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_DELETE_DIRECTORY, scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); error = smbfs_fullpath(mbp, SSTOVC(ssp), np, NULL, 0); if (!error) { smb_rq_bend(rqp); error = smb_rq_simple(rqp); } smb_rq_done(rqp); return error; } static int smbfs_smb_search(struct smbfs_fctx *ctx) { struct smb_vc *vcp = SSTOVC(ctx->f_ssp); struct smb_rq *rqp; struct mbchain *mbp; struct mdchain *mdp; u_int8_t wc, bt; u_int16_t ec, dlen, bc; int maxent, error, iseof = 0; maxent = min(ctx->f_left, (vcp->vc_txmax - SMB_HDRLEN - 3) / SMB_DENTRYLEN); if (ctx->f_rq) { smb_rq_done(ctx->f_rq); ctx->f_rq = NULL; } error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_SEARCH, ctx->f_scred, &rqp); if (error) return (error); ctx->f_rq = rqp; smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_uint16le(mbp, maxent); /* max entries to return */ mb_put_uint16le(mbp, ctx->f_attrmask); smb_rq_wend(rqp); smb_rq_bstart(rqp); mb_put_uint8(mbp, SMB_DT_ASCII); /* buffer format */ if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen); if (error) return error; mb_put_uint8(mbp, SMB_DT_VARIABLE); mb_put_uint16le(mbp, 0); /* context length */ ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } else { if (SMB_UNICODE_STRINGS(vcp)) { mb_put_padbyte(mbp); mb_put_uint8(mbp, 0); } mb_put_uint8(mbp, 0); /* file name length */ mb_put_uint8(mbp, SMB_DT_VARIABLE); mb_put_uint16le(mbp, SMB_SKEYLEN); mb_put_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM); } smb_rq_bend(rqp); error = smb_rq_simple(rqp); if (error) { if (rqp->sr_errclass == ERRDOS && rqp->sr_serror == ERRnofiles) { error = 0; iseof = 1; ctx->f_flags |= SMBFS_RDD_EOF; } else return error; } smb_rq_getreply(rqp, &mdp); md_get_uint8(mdp, &wc); if (wc != 1) return iseof ? ENOENT : EBADRPC; md_get_uint16le(mdp, &ec); if (ec == 0) return ENOENT; ctx->f_ecnt = ec; md_get_uint16le(mdp, &bc); if (bc < 3) return EBADRPC; bc -= 3; md_get_uint8(mdp, &bt); if (bt != SMB_DT_VARIABLE) return EBADRPC; md_get_uint16le(mdp, &dlen); if (dlen != bc || dlen % SMB_DENTRYLEN != 0) return EBADRPC; return 0; } static int smbfs_findopenLM1(struct smbfs_fctx *ctx, struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred) { ctx->f_attrmask = attr; if (wildcard) { if (wclen == 1 && wildcard[0] == '*') { ctx->f_wildcard = "*.*"; ctx->f_wclen = 3; } else { ctx->f_wildcard = wildcard; ctx->f_wclen = wclen; } } else { ctx->f_wildcard = NULL; ctx->f_wclen = 0; } ctx->f_name = ctx->f_fname; return 0; } static int smbfs_findnextLM1(struct smbfs_fctx *ctx, int limit) { struct mdchain *mbp; struct smb_rq *rqp; char *cp; u_int8_t battr; u_int16_t date, time; u_int32_t size; int error; if (ctx->f_ecnt == 0) { if (ctx->f_flags & SMBFS_RDD_EOF) return ENOENT; ctx->f_left = ctx->f_limit = limit; error = smbfs_smb_search(ctx); if (error) return error; } rqp = ctx->f_rq; smb_rq_getreply(rqp, &mbp); md_get_mem(mbp, ctx->f_skey, SMB_SKEYLEN, MB_MSYSTEM); md_get_uint8(mbp, &battr); md_get_uint16le(mbp, &time); md_get_uint16le(mbp, &date); md_get_uint32le(mbp, &size); cp = ctx->f_name; md_get_mem(mbp, cp, sizeof(ctx->f_fname), MB_MSYSTEM); cp[sizeof(ctx->f_fname) - 1] = 0; cp += strlen(cp) - 1; while (*cp == ' ' && cp >= ctx->f_name) *cp-- = 0; ctx->f_attr.fa_attr = battr; smb_dos2unixtime(date, time, 0, rqp->sr_vc->vc_sopt.sv_tz, &ctx->f_attr.fa_mtime); ctx->f_attr.fa_size = size; ctx->f_nmlen = strlen(ctx->f_name); ctx->f_ecnt--; ctx->f_left--; return 0; } static int smbfs_findcloseLM1(struct smbfs_fctx *ctx) { if (ctx->f_rq) smb_rq_done(ctx->f_rq); return 0; } /* * TRANS2_FIND_FIRST2/NEXT2, used for NT LM12 dialect */ static int smbfs_smb_trans2find2(struct smbfs_fctx *ctx) { struct smb_t2rq *t2p; struct smb_vc *vcp = SSTOVC(ctx->f_ssp); struct mbchain *mbp; struct mdchain *mdp; u_int16_t tw, flags; int error; if (ctx->f_t2) { smb_t2_done(ctx->f_t2); ctx->f_t2 = NULL; } ctx->f_flags &= ~SMBFS_RDD_GOTRNAME; flags = 8 | 2; /* | */ if (ctx->f_flags & SMBFS_RDD_FINDSINGLE) { flags |= 1; /* close search after this request */ ctx->f_flags |= SMBFS_RDD_NOCLOSE; } if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_FIRST2, ctx->f_scred, &t2p); if (error) return error; ctx->f_t2 = t2p; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_uint16le(mbp, ctx->f_attrmask); mb_put_uint16le(mbp, ctx->f_limit); mb_put_uint16le(mbp, flags); mb_put_uint16le(mbp, ctx->f_infolevel); mb_put_uint32le(mbp, 0); error = smbfs_fullpath(mbp, vcp, ctx->f_dnp, ctx->f_wildcard, ctx->f_wclen); if (error) return error; } else { error = smb_t2_alloc(SSTOCP(ctx->f_ssp), SMB_TRANS2_FIND_NEXT2, ctx->f_scred, &t2p); if (error) return error; ctx->f_t2 = t2p; mbp = &t2p->t2_tparam; mb_init(mbp); mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM); mb_put_uint16le(mbp, ctx->f_limit); mb_put_uint16le(mbp, ctx->f_infolevel); mb_put_uint32le(mbp, 0); /* resume key */ mb_put_uint16le(mbp, flags); if (ctx->f_rname) mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM); else mb_put_uint8(mbp, 0); /* resume file name */ #if 0 struct timeval tv; tv.tv_sec = 0; tv.tv_usec = 200 * 1000; /* 200ms */ if (vcp->vc_flags & SMBC_WIN95) { /* * some implementations suggests to sleep here * for 200ms, due to the bug in the Win95. * I've didn't notice any problem, but put code * for it. */ pause("fix95", tvtohz(&tv)); } #endif } t2p->t2_maxpcount = 5 * 2; t2p->t2_maxdcount = vcp->vc_txmax; error = smb_t2_request(t2p); if (error) return error; mdp = &t2p->t2_rparam; if (ctx->f_flags & SMBFS_RDD_FINDFIRST) { if ((error = md_get_uint16(mdp, &ctx->f_Sid)) != 0) return error; ctx->f_flags &= ~SMBFS_RDD_FINDFIRST; } if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; ctx->f_ecnt = tw; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if (tw) ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if ((error = md_get_uint16le(mdp, &tw)) != 0) return error; if (ctx->f_ecnt == 0) { ctx->f_flags |= SMBFS_RDD_EOF | SMBFS_RDD_NOCLOSE; return ENOENT; } ctx->f_rnameofs = tw; mdp = &t2p->t2_rdata; if (mdp->md_top == NULL) { printf("bug: ecnt = %d, but data is NULL (please report)\n", ctx->f_ecnt); return ENOENT; } if (mdp->md_top->m_len == 0) { printf("bug: ecnt = %d, but m_len = 0 and m_next = %p (please report)\n", ctx->f_ecnt,mbp->mb_top->m_next); return ENOENT; } ctx->f_eofs = 0; return 0; } static int smbfs_smb_findclose2(struct smbfs_fctx *ctx) { struct smb_rq *rqp; struct mbchain *mbp; int error; error = smb_rq_alloc(SSTOCP(ctx->f_ssp), SMB_COM_FIND_CLOSE2, ctx->f_scred, &rqp); if (error) return (error); smb_rq_getrequest(rqp, &mbp); smb_rq_wstart(rqp); mb_put_mem(mbp, (caddr_t)&ctx->f_Sid, 2, MB_MSYSTEM); smb_rq_wend(rqp); smb_rq_bstart(rqp); smb_rq_bend(rqp); error = smb_rq_simple(rqp); smb_rq_done(rqp); return error; } static int smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred) { if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK); } else ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK); ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ? SMB_INFO_STANDARD : SMB_FIND_FILE_DIRECTORY_INFO; ctx->f_attrmask = attr; ctx->f_wildcard = wildcard; ctx->f_wclen = wclen; return 0; } static int smbfs_findnextLM2(struct smbfs_fctx *ctx, int limit) { struct mdchain *mbp; struct smb_t2rq *t2p; char *cp; u_int8_t tb; u_int16_t date, time, wattr; u_int32_t size, next, dattr; int64_t lint; int error, svtz, cnt, fxsz, nmlen, recsz; if (ctx->f_ecnt == 0) { if (ctx->f_flags & SMBFS_RDD_EOF) return ENOENT; ctx->f_left = ctx->f_limit = limit; error = smbfs_smb_trans2find2(ctx); if (error) return error; } t2p = ctx->f_t2; mbp = &t2p->t2_rdata; svtz = SSTOVC(ctx->f_ssp)->vc_sopt.sv_tz; switch (ctx->f_infolevel) { case SMB_INFO_STANDARD: next = 0; fxsz = 0; md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* creation time */ md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* access time */ smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_atime); md_get_uint16le(mbp, &date); md_get_uint16le(mbp, &time); /* access time */ smb_dos2unixtime(date, time, 0, svtz, &ctx->f_attr.fa_mtime); md_get_uint32le(mbp, &size); ctx->f_attr.fa_size = size; md_get_uint32(mbp, NULL); /* allocation size */ md_get_uint16le(mbp, &wattr); ctx->f_attr.fa_attr = wattr; md_get_uint8(mbp, &tb); size = nmlen = tb; fxsz = 23; recsz = next = 24 + nmlen; /* docs misses zero byte at end */ break; case SMB_FIND_FILE_DIRECTORY_INFO: md_get_uint32le(mbp, &next); md_get_uint32(mbp, NULL); /* file index */ md_get_int64(mbp, NULL); /* creation time */ md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_atime); md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_mtime); md_get_int64le(mbp, &lint); smb_time_NT2local(lint, svtz, &ctx->f_attr.fa_ctime); md_get_int64le(mbp, &lint); /* file size */ ctx->f_attr.fa_size = lint; md_get_int64(mbp, NULL); /* real size (should use) */ md_get_uint32le(mbp, &dattr); /* EA */ ctx->f_attr.fa_attr = dattr; md_get_uint32le(mbp, &size); /* name len */ fxsz = 64; recsz = next ? next : fxsz + size; break; default: SMBERROR("unexpected info level %d\n", ctx->f_infolevel); return EINVAL; } if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { nmlen = min(size, SMB_MAXFNAMELEN * 2); } else nmlen = min(size, SMB_MAXFNAMELEN); cp = ctx->f_name; error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM); if (error) return error; if (next) { cnt = next - nmlen - fxsz; if (cnt > 0) md_get_mem(mbp, NULL, cnt, MB_MSYSTEM); else if (cnt < 0) { SMBERROR("out of sync\n"); return EBADRPC; } } if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0) nmlen -= 2; } else if (nmlen && cp[nmlen - 1] == 0) nmlen--; if (nmlen == 0) return EBADRPC; next = ctx->f_eofs + recsz; if (ctx->f_rnameofs && (ctx->f_flags & SMBFS_RDD_GOTRNAME) == 0 && (ctx->f_rnameofs >= ctx->f_eofs && ctx->f_rnameofs < next)) { /* * Server needs a resume filename. */ if (ctx->f_rnamelen <= nmlen) { if (ctx->f_rname) free(ctx->f_rname, M_SMBFSDATA); ctx->f_rname = malloc(nmlen + 1, M_SMBFSDATA, M_WAITOK); ctx->f_rnamelen = nmlen; } bcopy(ctx->f_name, ctx->f_rname, nmlen); ctx->f_rname[nmlen] = 0; ctx->f_flags |= SMBFS_RDD_GOTRNAME; } ctx->f_nmlen = nmlen; ctx->f_eofs = next; ctx->f_ecnt--; ctx->f_left--; return 0; } static int smbfs_findcloseLM2(struct smbfs_fctx *ctx) { if (ctx->f_name) free(ctx->f_name, M_SMBFSDATA); if (ctx->f_t2) smb_t2_done(ctx->f_t2); if ((ctx->f_flags & SMBFS_RDD_NOCLOSE) == 0) smbfs_smb_findclose2(ctx); return 0; } int smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred, struct smbfs_fctx **ctxpp) { struct smbfs_fctx *ctx; int error; ctx = malloc(sizeof(*ctx), M_SMBFSDATA, M_WAITOK | M_ZERO); ctx->f_ssp = dnp->n_mount->sm_share; ctx->f_dnp = dnp; ctx->f_flags = SMBFS_RDD_FINDFIRST; ctx->f_scred = scred; if (SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_LANMAN2_0 || (dnp->n_mount->sm_flags & SMBFS_MOUNT_NO_LONG)) { ctx->f_flags |= SMBFS_RDD_USESEARCH; error = smbfs_findopenLM1(ctx, dnp, wildcard, wclen, attr, scred); } else error = smbfs_findopenLM2(ctx, dnp, wildcard, wclen, attr, scred); if (error) smbfs_findclose(ctx, scred); else *ctxpp = ctx; return error; } int smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred) { int error; if (limit == 0) limit = 1000000; else if (limit > 1) limit *= 4; /* imperical */ ctx->f_scred = scred; for (;;) { if (ctx->f_flags & SMBFS_RDD_USESEARCH) { error = smbfs_findnextLM1(ctx, limit); } else error = smbfs_findnextLM2(ctx, limit); if (error) return error; if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) { if ((ctx->f_nmlen == 2 && *(u_int16_t *)ctx->f_name == htole16(0x002e)) || (ctx->f_nmlen == 4 && *(u_int32_t *)ctx->f_name == htole32(0x002e002e))) continue; } else if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') || (ctx->f_nmlen == 2 && ctx->f_name[0] == '.' && ctx->f_name[1] == '.')) continue; break; } smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen, ctx->f_dnp->n_mount->sm_caseopt); ctx->f_attr.fa_ino = smbfs_getino(ctx->f_dnp, ctx->f_name, ctx->f_nmlen); return 0; } int smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred) { ctx->f_scred = scred; if (ctx->f_flags & SMBFS_RDD_USESEARCH) { smbfs_findcloseLM1(ctx); } else smbfs_findcloseLM2(ctx); if (ctx->f_rname) free(ctx->f_rname, M_SMBFSDATA); free(ctx, M_SMBFSDATA); return 0; } int smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen, struct smbfattr *fap, struct smb_cred *scred) { struct smbfs_fctx *ctx; int error; if (dnp == NULL || (dnp->n_ino == 2 && name == NULL)) { bzero(fap, sizeof(*fap)); fap->fa_attr = SMB_FA_DIR; fap->fa_ino = 2; return 0; } MPASS(!(nmlen == 2 && name[0] == '.' && name[1] == '.')); MPASS(!(nmlen == 1 && name[0] == '.')); ASSERT_VOP_ELOCKED(dnp->n_vnode, "smbfs_smb_lookup"); error = smbfs_findopen(dnp, name, nmlen, SMB_FA_SYSTEM | SMB_FA_HIDDEN | SMB_FA_DIR, scred, &ctx); if (error) return error; ctx->f_flags |= SMBFS_RDD_FINDSINGLE; error = smbfs_findnext(ctx, 1, scred); if (error == 0) { *fap = ctx->f_attr; if (name == NULL) fap->fa_ino = dnp->n_ino; } smbfs_findclose(ctx, scred); return error; } Index: head/sys/fs/smbfs/smbfs_subr.c =================================================================== --- head/sys/fs/smbfs/smbfs_subr.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_subr.c (revision 326268) @@ -1,203 +1,205 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DEFINE(M_SMBFSDATA, "smbfs_data", "SMBFS private data"); MALLOC_DEFINE(M_SMBFSCRED, "smbfs_cred", "SMBFS cred data"); void smb_time_local2server(struct timespec *tsp, int tzoff, u_long *seconds) { *seconds = tsp->tv_sec - tzoff * 60 /*- tz_minuteswest * 60 - (wall_cmos_clock ? adjkerntz : 0)*/; } void smb_time_server2local(u_long seconds, int tzoff, struct timespec *tsp) { tsp->tv_sec = seconds + tzoff * 60; } /* * Number of seconds between 1970 and 1601 year */ static int64_t DIFF1970TO1601 = 11644473600ULL; /* * Time from server comes as UTC, so no need to use tz */ void smb_time_NT2local(int64_t nsec, int tzoff, struct timespec *tsp) { smb_time_server2local(nsec / 10000000 - DIFF1970TO1601, 0, tsp); } void smb_time_local2NT(struct timespec *tsp, int tzoff, int64_t *nsec) { u_long seconds; smb_time_local2server(tsp, 0, &seconds); *nsec = (((int64_t)(seconds) & ~1) + DIFF1970TO1601) * (int64_t)10000000; } void smb_time_unix2dos(struct timespec *tsp, int tzoff, u_int16_t *ddp, u_int16_t *dtp, u_int8_t *dhp) { struct timespec tt; u_long t; tt = *tsp; smb_time_local2server(tsp, tzoff, &t); tt.tv_sec = t; timespec2fattime(&tt, 1, ddp, dtp, dhp); } void smb_dos2unixtime(u_int dd, u_int dt, u_int dh, int tzoff, struct timespec *tsp) { fattime2timespec(dd, dt, dh, 1, tsp); smb_time_server2local(tsp->tv_sec, tzoff, tsp); } int smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp, const char *name, int nmlen) { int caseopt = SMB_CS_NONE; int error; if (SMB_UNICODE_STRINGS(vcp)) { error = mb_put_padbyte(mbp); if (error) return error; } if (SMB_DIALECT(vcp) < SMB_DIALECT_LANMAN1_0) caseopt |= SMB_CS_UPPER; if (dnp != NULL) { error = smb_put_dmem(mbp, vcp, dnp->n_rpath, dnp->n_rplen, caseopt); if (error) return error; if (name) { /* Put the separator */ if (SMB_UNICODE_STRINGS(vcp)) error = mb_put_uint16le(mbp, '\\'); else error = mb_put_uint8(mbp, '\\'); if (error) return error; /* Put the name */ error = smb_put_dmem(mbp, vcp, name, nmlen, caseopt); if (error) return error; } } /* Put NULL terminator. */ if (SMB_UNICODE_STRINGS(vcp)) error = mb_put_uint16le(mbp, 0); else error = mb_put_uint8(mbp, 0); return error; } int smbfs_fname_tolocal(struct smb_vc *vcp, char *name, int *nmlen, int caseopt) { int copt = (caseopt == SMB_CS_LOWER ? KICONV_FROM_LOWER : (caseopt == SMB_CS_UPPER ? KICONV_FROM_UPPER : 0)); int error = 0; size_t ilen = *nmlen; size_t olen; char *ibuf = name; char outbuf[SMB_MAXFNAMELEN]; char *obuf = outbuf; if (vcp->vc_tolocal) { olen = sizeof(outbuf); bzero(outbuf, sizeof(outbuf)); /* error = iconv_conv_case (vcp->vc_tolocal, NULL, NULL, &obuf, &olen, copt); if (error) return error; */ error = iconv_conv_case(vcp->vc_tolocal, __DECONST(const char **, &ibuf), &ilen, &obuf, &olen, copt); if (error && SMB_UNICODE_STRINGS(vcp)) { /* * If using unicode, leaving a file name as it was when * convert fails will cause a problem because the file name * will contain NULL. * Here, put '?' and give converted file name. */ *obuf = '?'; olen--; error = 0; } if (!error) { *nmlen = sizeof(outbuf) - olen; memcpy(name, outbuf, *nmlen); } } return error; } void * smbfs_malloc_scred(void) { return (malloc(sizeof(struct smb_cred), M_SMBFSCRED, M_WAITOK)); } void smbfs_free_scred(void *scred) { free(scred, M_SMBFSCRED); } Index: head/sys/fs/smbfs/smbfs_subr.h =================================================================== --- head/sys/fs/smbfs/smbfs_subr.h (revision 326267) +++ head/sys/fs/smbfs/smbfs_subr.h (revision 326268) @@ -1,183 +1,185 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_SMBFS_SMBFS_SUBR_H_ #define _FS_SMBFS_SMBFS_SUBR_H_ #ifdef MALLOC_DECLARE MALLOC_DECLARE(M_SMBFSDATA); MALLOC_DECLARE(M_SMBFSCRED); #endif #define SMBFSERR(format, args...) printf("%s: "format, __func__ ,## args) #ifdef SMB_VNODE_DEBUG #define SMBVDEBUG(format, args...) printf("%s: "format, __func__ ,## args) #else #define SMBVDEBUG(format, args...) #endif /* * Possible lock commands */ #define SMB_LOCK_EXCL 0 #define SMB_LOCK_SHARED 1 #define SMB_LOCK_RELEASE 2 struct smbmount; struct proc; struct timespec; struct ucred; struct vattr; struct vnode; struct statfs; struct smbfattr { int fa_attr; int64_t fa_size; struct timespec fa_atime; struct timespec fa_ctime; struct timespec fa_mtime; long fa_ino; }; /* * Context to perform findfirst/findnext/findclose operations */ #define SMBFS_RDD_FINDFIRST 0x01 #define SMBFS_RDD_EOF 0x02 #define SMBFS_RDD_FINDSINGLE 0x04 #define SMBFS_RDD_USESEARCH 0x08 #define SMBFS_RDD_NOCLOSE 0x10 #define SMBFS_RDD_GOTRNAME 0x1000 /* * Search context supplied by server */ #define SMB_SKEYLEN 21 /* search context */ #define SMB_DENTRYLEN (SMB_SKEYLEN + 22) /* entire entry */ struct smbfs_fctx { /* * Setable values */ int f_flags; /* SMBFS_RDD_ */ /* * Return values */ struct smbfattr f_attr; /* current attributes */ char * f_name; /* current file name */ int f_nmlen; /* name len */ /* * Internal variables */ int f_limit; /* maximum number of entries */ int f_attrmask; /* SMB_FA_ */ int f_wclen; const char * f_wildcard; struct smbnode* f_dnp; struct smb_cred*f_scred; struct smb_share *f_ssp; union { struct smb_rq * uf_rq; struct smb_t2rq * uf_t2; } f_urq; int f_left; /* entries left */ int f_ecnt; /* entries left in the current response */ int f_eofs; /* entry offset in the parameter block */ u_char f_skey[SMB_SKEYLEN]; /* server side search context */ u_char f_fname[8 + 1 + 3 + 1]; /* common case for 8.3 filenames */ u_int16_t f_Sid; u_int16_t f_infolevel; int f_rnamelen; char * f_rname; /* resume name/key */ int f_rnameofs; }; #define f_rq f_urq.uf_rq #define f_t2 f_urq.uf_t2 /* * smb level */ int smbfs_smb_lock(struct smbnode *np, int op, caddr_t id, off_t start, off_t end, struct smb_cred *scred); int smbfs_smb_statfs(struct smb_share *ssp, struct statfs *sbp, struct smb_cred *scred); int smbfs_smb_setfsize(struct smbnode *np, int64_t newsize, struct smb_cred *scred); int smbfs_smb_query_info(struct smbnode *np, const char *name, int len, struct smbfattr *fap, struct smb_cred *scred); int smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct smb_cred *scred); int smbfs_smb_setptime2(struct smbnode *np, struct timespec *mtime, struct timespec *atime, int attr, struct smb_cred *scred); int smbfs_smb_setpattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_setftime(struct smbnode *np, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_setfattrNT(struct smbnode *np, u_int16_t attr, struct timespec *mtime, struct timespec *atime, struct smb_cred *scred); int smbfs_smb_open(struct smbnode *np, int accmode, struct smb_cred *scred); int smbfs_smb_close(struct smb_share *ssp, u_int16_t fid, struct timespec *mtime, struct smb_cred *scred); int smbfs_smb_create(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred); int smbfs_smb_delete(struct smbnode *np, struct smb_cred *scred); int smbfs_smb_flush(struct smbnode *np, struct smb_cred *scred); int smbfs_smb_rename(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, struct smb_cred *scred); int smbfs_smb_move(struct smbnode *src, struct smbnode *tdnp, const char *tname, int tnmlen, u_int16_t flags, struct smb_cred *scred); int smbfs_smb_mkdir(struct smbnode *dnp, const char *name, int len, struct smb_cred *scred); int smbfs_smb_rmdir(struct smbnode *np, struct smb_cred *scred); int smbfs_findopen(struct smbnode *dnp, const char *wildcard, int wclen, int attr, struct smb_cred *scred, struct smbfs_fctx **ctxpp); int smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred); int smbfs_findclose(struct smbfs_fctx *ctx, struct smb_cred *scred); int smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp, const char *name, int nmlen); int smbfs_smb_lookup(struct smbnode *dnp, const char *name, int nmlen, struct smbfattr *fap, struct smb_cred *scred); int smbfs_fname_tolocal(struct smb_vc *vcp, char *name, int *nmlen, int caseopt); void smb_time_local2server(struct timespec *tsp, int tzoff, u_long *seconds); void smb_time_server2local(u_long seconds, int tzoff, struct timespec *tsp); void smb_time_NT2local(int64_t nsec, int tzoff, struct timespec *tsp); void smb_time_local2NT(struct timespec *tsp, int tzoff, int64_t *nsec); void smb_time_unix2dos(struct timespec *tsp, int tzoff, u_int16_t *ddp, u_int16_t *dtp, u_int8_t *dhp); void smb_dos2unixtime (u_int dd, u_int dt, u_int dh, int tzoff, struct timespec *tsp); void *smbfs_malloc_scred(void); void smbfs_free_scred(void *); #endif /* !_FS_SMBFS_SMBFS_SUBR_H_ */ Index: head/sys/fs/smbfs/smbfs_vfsops.c =================================================================== --- head/sys/fs/smbfs/smbfs_vfsops.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_vfsops.c (revision 326268) @@ -1,406 +1,408 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static int smbfs_debuglevel = 0; static int smbfs_version = SMBFS_VERSION; SYSCTL_NODE(_vfs, OID_AUTO, smbfs, CTLFLAG_RW, 0, "SMB/CIFS filesystem"); SYSCTL_INT(_vfs_smbfs, OID_AUTO, version, CTLFLAG_RD, &smbfs_version, 0, ""); SYSCTL_INT(_vfs_smbfs, OID_AUTO, debuglevel, CTLFLAG_RW, &smbfs_debuglevel, 0, ""); static vfs_init_t smbfs_init; static vfs_uninit_t smbfs_uninit; static vfs_cmount_t smbfs_cmount; static vfs_mount_t smbfs_mount; static vfs_root_t smbfs_root; static vfs_quotactl_t smbfs_quotactl; static vfs_statfs_t smbfs_statfs; static vfs_unmount_t smbfs_unmount; static struct vfsops smbfs_vfsops = { .vfs_init = smbfs_init, .vfs_cmount = smbfs_cmount, .vfs_mount = smbfs_mount, .vfs_quotactl = smbfs_quotactl, .vfs_root = smbfs_root, .vfs_statfs = smbfs_statfs, .vfs_sync = vfs_stdsync, .vfs_uninit = smbfs_uninit, .vfs_unmount = smbfs_unmount, }; VFS_SET(smbfs_vfsops, smbfs, VFCF_NETWORK); MODULE_DEPEND(smbfs, netsmb, NSMB_VERSION, NSMB_VERSION, NSMB_VERSION); MODULE_DEPEND(smbfs, libiconv, 1, 1, 2); MODULE_DEPEND(smbfs, libmchain, 1, 1, 1); int smbfs_pbuf_freecnt = -1; /* start out unlimited */ static int smbfs_cmount(struct mntarg *ma, void * data, uint64_t flags) { struct smbfs_args args; int error; error = copyin(data, &args, sizeof(struct smbfs_args)); if (error) return error; if (args.version != SMBFS_VERSION) { printf("mount version mismatch: kernel=%d, mount=%d\n", SMBFS_VERSION, args.version); return EINVAL; } ma = mount_argf(ma, "dev", "%d", args.dev); ma = mount_argb(ma, args.flags & SMBFS_MOUNT_SOFT, "nosoft"); ma = mount_argb(ma, args.flags & SMBFS_MOUNT_INTR, "nointr"); ma = mount_argb(ma, args.flags & SMBFS_MOUNT_STRONG, "nostrong"); ma = mount_argb(ma, args.flags & SMBFS_MOUNT_HAVE_NLS, "nohave_nls"); ma = mount_argb(ma, !(args.flags & SMBFS_MOUNT_NO_LONG), "nolong"); ma = mount_arg(ma, "rootpath", args.root_path, -1); ma = mount_argf(ma, "uid", "%d", args.uid); ma = mount_argf(ma, "gid", "%d", args.gid); ma = mount_argf(ma, "file_mode", "%d", args.file_mode); ma = mount_argf(ma, "dir_mode", "%d", args.dir_mode); ma = mount_argf(ma, "caseopt", "%d", args.caseopt); error = kernel_mount(ma, flags); return (error); } static const char *smbfs_opts[] = { "fd", "soft", "intr", "strong", "have_nls", "long", "mountpoint", "rootpath", "uid", "gid", "file_mode", "dir_mode", "caseopt", "errmsg", NULL }; static int smbfs_mount(struct mount *mp) { struct smbmount *smp = NULL; struct smb_vc *vcp; struct smb_share *ssp = NULL; struct vnode *vp; struct thread *td; struct smb_dev *dev; struct smb_cred *scred; int error, v; char *pc, *pe; dev = NULL; td = curthread; if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS)) return EOPNOTSUPP; if (vfs_filteropt(mp->mnt_optnew, smbfs_opts)) { vfs_mount_error(mp, "%s", "Invalid option"); return (EINVAL); } scred = smbfs_malloc_scred(); smb_makescred(scred, td, td->td_ucred); /* Ask userspace of `fd`, the file descriptor of this session */ if (1 != vfs_scanopt(mp->mnt_optnew, "fd", "%d", &v)) { vfs_mount_error(mp, "No fd option"); smbfs_free_scred(scred); return (EINVAL); } error = smb_dev2share(v, SMBM_EXEC, scred, &ssp, &dev); smp = malloc(sizeof(*smp), M_SMBFSDATA, M_WAITOK | M_ZERO); if (error) { printf("invalid device handle %d (%d)\n", v, error); vfs_mount_error(mp, "invalid device handle %d %d\n", v, error); smbfs_free_scred(scred); free(smp, M_SMBFSDATA); return error; } vcp = SSTOVC(ssp); smb_share_unlock(ssp); mp->mnt_stat.f_iosize = SSTOVC(ssp)->vc_txmax; mp->mnt_data = smp; smp->sm_share = ssp; smp->sm_root = NULL; smp->sm_dev = dev; if (1 != vfs_scanopt(mp->mnt_optnew, "caseopt", "%d", &smp->sm_caseopt)) { vfs_mount_error(mp, "Invalid caseopt"); error = EINVAL; goto bad; } if (1 != vfs_scanopt(mp->mnt_optnew, "uid", "%d", &v)) { vfs_mount_error(mp, "Invalid uid"); error = EINVAL; goto bad; } smp->sm_uid = v; if (1 != vfs_scanopt(mp->mnt_optnew, "gid", "%d", &v)) { vfs_mount_error(mp, "Invalid gid"); error = EINVAL; goto bad; } smp->sm_gid = v; if (1 != vfs_scanopt(mp->mnt_optnew, "file_mode", "%d", &v)) { vfs_mount_error(mp, "Invalid file_mode"); error = EINVAL; goto bad; } smp->sm_file_mode = (v & (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFREG; if (1 != vfs_scanopt(mp->mnt_optnew, "dir_mode", "%d", &v)) { vfs_mount_error(mp, "Invalid dir_mode"); error = EINVAL; goto bad; } smp->sm_dir_mode = (v & (S_IRWXU|S_IRWXG|S_IRWXO)) | S_IFDIR; vfs_flagopt(mp->mnt_optnew, "nolong", &smp->sm_flags, SMBFS_MOUNT_NO_LONG); pc = mp->mnt_stat.f_mntfromname; pe = pc + sizeof(mp->mnt_stat.f_mntfromname); bzero(pc, MNAMELEN); *pc++ = '/'; *pc++ = '/'; pc = strchr(strncpy(pc, vcp->vc_username, pe - pc - 2), 0); if (pc < pe-1) { *(pc++) = '@'; pc = strchr(strncpy(pc, vcp->vc_srvname, pe - pc - 2), 0); if (pc < pe - 1) { *(pc++) = '/'; strncpy(pc, ssp->ss_name, pe - pc - 2); } } vfs_getnewfsid(mp); error = smbfs_root(mp, LK_EXCLUSIVE, &vp); if (error) { vfs_mount_error(mp, "smbfs_root error: %d", error); goto bad; } VOP_UNLOCK(vp, 0); SMBVDEBUG("root.v_usecount = %d\n", vrefcnt(vp)); #ifdef DIAGNOSTIC SMBERROR("mp=%p\n", mp); #endif smbfs_free_scred(scred); return error; bad: if (ssp) smb_share_put(ssp, scred); smbfs_free_scred(scred); SMB_LOCK(); if (error && smp->sm_dev == dev) { smp->sm_dev = NULL; sdp_trydestroy(dev); } SMB_UNLOCK(); free(smp, M_SMBFSDATA); return error; } /* Unmount the filesystem described by mp. */ static int smbfs_unmount(struct mount *mp, int mntflags) { struct thread *td; struct smbmount *smp = VFSTOSMBFS(mp); struct smb_cred *scred; struct smb_dev *dev; int error, flags; SMBVDEBUG("smbfs_unmount: flags=%04x\n", mntflags); td = curthread; flags = 0; if (mntflags & MNT_FORCE) flags |= FORCECLOSE; /* * Keep trying to flush the vnode list for the mount while * some are still busy and we are making progress towards * making them not busy. This is needed because smbfs vnodes * reference their parent directory but may appear after their * parent in the list; one pass over the vnode list is not * sufficient in this case. */ do { smp->sm_didrele = 0; /* There is 1 extra root vnode reference from smbfs_mount(). */ error = vflush(mp, 1, flags, td); } while (error == EBUSY && smp->sm_didrele != 0); if (error) return error; scred = smbfs_malloc_scred(); smb_makescred(scred, td, td->td_ucred); error = smb_share_lock(smp->sm_share); if (error) goto out; smb_share_put(smp->sm_share, scred); SMB_LOCK(); dev = smp->sm_dev; if (!dev) panic("No private data for mount point"); sdp_trydestroy(dev); mp->mnt_data = NULL; SMB_UNLOCK(); free(smp, M_SMBFSDATA); MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); out: smbfs_free_scred(scred); return error; } /* * Return locked root vnode of a filesystem */ static int smbfs_root(struct mount *mp, int flags, struct vnode **vpp) { struct smbmount *smp = VFSTOSMBFS(mp); struct vnode *vp; struct smbnode *np; struct smbfattr fattr; struct thread *td; struct ucred *cred; struct smb_cred *scred; int error; td = curthread; cred = td->td_ucred; if (smp->sm_root) { *vpp = SMBTOV(smp->sm_root); return vget(*vpp, LK_EXCLUSIVE | LK_RETRY, td); } scred = smbfs_malloc_scred(); smb_makescred(scred, td, cred); error = smbfs_smb_lookup(NULL, NULL, 0, &fattr, scred); if (error) goto out; error = smbfs_nget(mp, NULL, NULL, 0, &fattr, &vp); if (error) goto out; ASSERT_VOP_LOCKED(vp, "smbfs_root"); vp->v_vflag |= VV_ROOT; np = VTOSMB(vp); smp->sm_root = np; *vpp = vp; out: smbfs_free_scred(scred); return error; } /* * Do operations associated with quotas, not supported */ /* ARGSUSED */ static int smbfs_quotactl(mp, cmd, uid, arg) struct mount *mp; int cmd; uid_t uid; void *arg; { SMBVDEBUG("return EOPNOTSUPP\n"); return EOPNOTSUPP; } /*ARGSUSED*/ int smbfs_init(struct vfsconf *vfsp) { smbfs_pbuf_freecnt = nswbuf / 2 + 1; SMBVDEBUG("done.\n"); return 0; } /*ARGSUSED*/ int smbfs_uninit(struct vfsconf *vfsp) { SMBVDEBUG("done.\n"); return 0; } /* * smbfs_statfs call */ int smbfs_statfs(struct mount *mp, struct statfs *sbp) { struct thread *td = curthread; struct smbmount *smp = VFSTOSMBFS(mp); struct smbnode *np = smp->sm_root; struct smb_share *ssp = smp->sm_share; struct smb_cred *scred; int error; if (np == NULL) { vfs_mount_error(mp, "np == NULL"); return EINVAL; } sbp->f_iosize = SSTOVC(ssp)->vc_txmax; /* optimal transfer block size */ scred = smbfs_malloc_scred(); smb_makescred(scred, td, td->td_ucred); error = smbfs_smb_statfs(ssp, sbp, scred); smbfs_free_scred(scred); return (error); } Index: head/sys/fs/smbfs/smbfs_vnops.c =================================================================== --- head/sys/fs/smbfs/smbfs_vnops.c (revision 326267) +++ head/sys/fs/smbfs/smbfs_vnops.c (revision 326268) @@ -1,1374 +1,1376 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2000-2001 Boris Popov * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Prototypes for SMBFS vnode operations */ static vop_create_t smbfs_create; static vop_mknod_t smbfs_mknod; static vop_open_t smbfs_open; static vop_close_t smbfs_close; static vop_access_t smbfs_access; static vop_getattr_t smbfs_getattr; static vop_setattr_t smbfs_setattr; static vop_read_t smbfs_read; static vop_write_t smbfs_write; static vop_fsync_t smbfs_fsync; static vop_remove_t smbfs_remove; static vop_link_t smbfs_link; static vop_lookup_t smbfs_lookup; static vop_rename_t smbfs_rename; static vop_mkdir_t smbfs_mkdir; static vop_rmdir_t smbfs_rmdir; static vop_symlink_t smbfs_symlink; static vop_readdir_t smbfs_readdir; static vop_strategy_t smbfs_strategy; static vop_print_t smbfs_print; static vop_pathconf_t smbfs_pathconf; static vop_advlock_t smbfs_advlock; static vop_getextattr_t smbfs_getextattr; struct vop_vector smbfs_vnodeops = { .vop_default = &default_vnodeops, .vop_access = smbfs_access, .vop_advlock = smbfs_advlock, .vop_close = smbfs_close, .vop_create = smbfs_create, .vop_fsync = smbfs_fsync, .vop_getattr = smbfs_getattr, .vop_getextattr = smbfs_getextattr, .vop_getpages = smbfs_getpages, .vop_inactive = smbfs_inactive, .vop_ioctl = smbfs_ioctl, .vop_link = smbfs_link, .vop_lookup = smbfs_lookup, .vop_mkdir = smbfs_mkdir, .vop_mknod = smbfs_mknod, .vop_open = smbfs_open, .vop_pathconf = smbfs_pathconf, .vop_print = smbfs_print, .vop_putpages = smbfs_putpages, .vop_read = smbfs_read, .vop_readdir = smbfs_readdir, .vop_reclaim = smbfs_reclaim, .vop_remove = smbfs_remove, .vop_rename = smbfs_rename, .vop_rmdir = smbfs_rmdir, .vop_setattr = smbfs_setattr, /* .vop_setextattr = smbfs_setextattr,*/ .vop_strategy = smbfs_strategy, .vop_symlink = smbfs_symlink, .vop_write = smbfs_write, }; static int smbfs_access(ap) struct vop_access_args /* { struct vnode *a_vp; accmode_t a_accmode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; accmode_t accmode = ap->a_accmode; mode_t mpmode; struct smbmount *smp = VTOSMBFS(vp); SMBVDEBUG("\n"); if ((accmode & VWRITE) && (vp->v_mount->mnt_flag & MNT_RDONLY)) { switch (vp->v_type) { case VREG: case VDIR: case VLNK: return EROFS; default: break; } } mpmode = vp->v_type == VREG ? smp->sm_file_mode : smp->sm_dir_mode; return (vaccess(vp->v_type, mpmode, smp->sm_uid, smp->sm_gid, ap->a_accmode, ap->a_cred, NULL)); } /* ARGSUSED */ static int smbfs_open(ap) struct vop_open_args /* { struct vnode *a_vp; int a_mode; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; struct vattr vattr; int mode = ap->a_mode; int error, accmode; SMBVDEBUG("%s,%d\n", np->n_name, (np->n_flag & NOPEN) != 0); if (vp->v_type != VREG && vp->v_type != VDIR) { SMBFSERR("open eacces vtype=%d\n", vp->v_type); return EACCES; } if (vp->v_type == VDIR) { np->n_flag |= NOPEN; return 0; } if (np->n_flag & NMODIFIED) { if ((error = smbfs_vinvalbuf(vp, ap->a_td)) == EINTR) return error; smbfs_attr_cacheremove(vp); error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } else { error = VOP_GETATTR(vp, &vattr, ap->a_cred); if (error) return error; if (np->n_mtime.tv_sec != vattr.va_mtime.tv_sec) { error = smbfs_vinvalbuf(vp, ap->a_td); if (error == EINTR) return error; np->n_mtime.tv_sec = vattr.va_mtime.tv_sec; } } if ((np->n_flag & NOPEN) != 0) return 0; /* * Use DENYNONE to give unixy semantics of permitting * everything not forbidden by permissions. Ie denial * is up to server with clients/openers needing to use * advisory locks for further control. */ accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD; if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) accmode = SMB_SM_DENYNONE|SMB_AM_OPENRW; scred = smbfs_malloc_scred(); smb_makescred(scred, ap->a_td, ap->a_cred); error = smbfs_smb_open(np, accmode, scred); if (error) { if (mode & FWRITE) return EACCES; else if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { accmode = SMB_SM_DENYNONE|SMB_AM_OPENREAD; error = smbfs_smb_open(np, accmode, scred); } } if (error == 0) { np->n_flag |= NOPEN; vnode_create_vobject(ap->a_vp, vattr.va_size, ap->a_td); } smbfs_attr_cacheremove(vp); smbfs_free_scred(scred); return error; } static int smbfs_close(ap) struct vop_close_args /* { struct vnodeop_desc *a_desc; struct vnode *a_vp; int a_fflag; struct ucred *a_cred; struct thread *a_td; } */ *ap; { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; if (vp->v_type == VDIR && (np->n_flag & NOPEN) != 0 && np->n_dirseq != NULL) { scred = smbfs_malloc_scred(); smb_makescred(scred, td, ap->a_cred); smbfs_findclose(np->n_dirseq, scred); smbfs_free_scred(scred); np->n_dirseq = NULL; } return 0; } /* * smbfs_getattr call from vfs. */ static int smbfs_getattr(ap) struct vop_getattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct vattr *va=ap->a_vap; struct smbfattr fattr; struct smb_cred *scred; u_quad_t oldsize; int error; SMBVDEBUG("%lx: '%s' %d\n", (long)vp, np->n_name, (vp->v_vflag & VV_ROOT) != 0); error = smbfs_attr_cachelookup(vp, va); if (!error) return 0; SMBVDEBUG("not in the cache\n"); scred = smbfs_malloc_scred(); smb_makescred(scred, curthread, ap->a_cred); oldsize = np->n_size; error = smbfs_smb_lookup(np, NULL, 0, &fattr, scred); if (error) { SMBVDEBUG("error %d\n", error); smbfs_free_scred(scred); return error; } smbfs_attr_cacheenter(vp, &fattr); smbfs_attr_cachelookup(vp, va); if (np->n_flag & NOPEN) np->n_size = oldsize; smbfs_free_scred(scred); return 0; } static int smbfs_setattr(ap) struct vop_setattr_args /* { struct vnode *a_vp; struct vattr *a_vap; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct vattr *vap = ap->a_vap; struct timespec *mtime, *atime; struct smb_cred *scred; struct smb_share *ssp = np->n_mount->sm_share; struct smb_vc *vcp = SSTOVC(ssp); struct thread *td = curthread; u_quad_t tsize = 0; int isreadonly, doclose, error = 0; int old_n_dosattr; SMBVDEBUG("\n"); isreadonly = (vp->v_mount->mnt_flag & MNT_RDONLY); /* * Disallow write attempts if the filesystem is mounted read-only. */ if ((vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL || vap->va_flags != VNOVAL) && isreadonly) return EROFS; /* * We only support setting four flags. Don't allow setting others. * * We map UF_READONLY to SMB_FA_RDONLY, unlike the MacOS X version * of this code, which maps both UF_IMMUTABLE AND SF_IMMUTABLE to * SMB_FA_RDONLY. The immutable flags have different semantics * than readonly, which is the reason for the difference. */ if (vap->va_flags != VNOVAL) { if (vap->va_flags & ~(UF_HIDDEN|UF_SYSTEM|UF_ARCHIVE| UF_READONLY)) return EINVAL; } scred = smbfs_malloc_scred(); smb_makescred(scred, td, ap->a_cred); if (vap->va_size != VNOVAL) { switch (vp->v_type) { case VDIR: error = EISDIR; goto out; case VREG: break; default: error = EINVAL; goto out; } if (isreadonly) { error = EROFS; goto out; } doclose = 0; vnode_pager_setsize(vp, (u_long)vap->va_size); tsize = np->n_size; np->n_size = vap->va_size; if ((np->n_flag & NOPEN) == 0) { error = smbfs_smb_open(np, SMB_SM_DENYNONE|SMB_AM_OPENRW, scred); if (error == 0) doclose = 1; } if (error == 0) error = smbfs_smb_setfsize(np, (int64_t)vap->va_size, scred); if (doclose) smbfs_smb_close(ssp, np->n_fid, NULL, scred); if (error) { np->n_size = tsize; vnode_pager_setsize(vp, (u_long)tsize); goto out; } } if ((vap->va_flags != VNOVAL) || (vap->va_mode != (mode_t)VNOVAL)) { old_n_dosattr = np->n_dosattr; if (vap->va_mode != (mode_t)VNOVAL) { if (vap->va_mode & S_IWUSR) np->n_dosattr &= ~SMB_FA_RDONLY; else np->n_dosattr |= SMB_FA_RDONLY; } if (vap->va_flags != VNOVAL) { if (vap->va_flags & UF_HIDDEN) np->n_dosattr |= SMB_FA_HIDDEN; else np->n_dosattr &= ~SMB_FA_HIDDEN; if (vap->va_flags & UF_SYSTEM) np->n_dosattr |= SMB_FA_SYSTEM; else np->n_dosattr &= ~SMB_FA_SYSTEM; if (vap->va_flags & UF_ARCHIVE) np->n_dosattr |= SMB_FA_ARCHIVE; else np->n_dosattr &= ~SMB_FA_ARCHIVE; /* * We only support setting the immutable / readonly * bit for regular files. According to comments in * the MacOS X version of this code, supporting the * readonly bit on directories doesn't do the same * thing in Windows as in Unix. */ if (vp->v_type == VREG) { if (vap->va_flags & UF_READONLY) np->n_dosattr |= SMB_FA_RDONLY; else np->n_dosattr &= ~SMB_FA_RDONLY; } } if (np->n_dosattr != old_n_dosattr) { error = smbfs_smb_setpattr(np, np->n_dosattr, NULL, scred); if (error) goto out; } } mtime = atime = NULL; if (vap->va_mtime.tv_sec != VNOVAL) mtime = &vap->va_mtime; if (vap->va_atime.tv_sec != VNOVAL) atime = &vap->va_atime; if (mtime != atime) { if (vap->va_vaflags & VA_UTIMES_NULL) { error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td); if (error) error = VOP_ACCESS(vp, VWRITE, ap->a_cred, td); } else error = VOP_ACCESS(vp, VADMIN, ap->a_cred, td); #if 0 if (mtime == NULL) mtime = &np->n_mtime; if (atime == NULL) atime = &np->n_atime; #endif /* * If file is opened, then we can use handle based calls. * If not, use path based ones. */ if ((np->n_flag & NOPEN) == 0) { if (vcp->vc_flags & SMBV_WIN95) { error = VOP_OPEN(vp, FWRITE, ap->a_cred, td, NULL); if (!error) { /* error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred); VOP_GETATTR(vp, &vattr, ap->a_cred); */ if (mtime) np->n_mtime = *mtime; VOP_CLOSE(vp, FWRITE, ap->a_cred, td); } } else if ((vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS)) { error = smbfs_smb_setptime2(np, mtime, atime, 0, scred); /* error = smbfs_smb_setpattrNT(np, 0, mtime, atime, scred);*/ } else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN2_0) { error = smbfs_smb_setptime2(np, mtime, atime, 0, scred); } else { error = smbfs_smb_setpattr(np, 0, mtime, scred); } } else { if (vcp->vc_sopt.sv_caps & SMB_CAP_NT_SMBS) { error = smbfs_smb_setfattrNT(np, 0, mtime, atime, scred); } else if (SMB_DIALECT(vcp) >= SMB_DIALECT_LANMAN1_0) { error = smbfs_smb_setftime(np, mtime, atime, scred); } else { /* * I have no idea how to handle this for core * level servers. The possible solution is to * update mtime after file is closed. */ SMBERROR("can't update times on an opened file\n"); } } } /* * Invalidate attribute cache in case if server doesn't set * required attributes. */ smbfs_attr_cacheremove(vp); /* invalidate cache */ VOP_GETATTR(vp, vap, ap->a_cred); np->n_mtime.tv_sec = vap->va_mtime.tv_sec; out: smbfs_free_scred(scred); return error; } /* * smbfs_read call. */ static int smbfs_read(ap) struct vop_read_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; SMBVDEBUG("\n"); if (vp->v_type != VREG && vp->v_type != VDIR) return EPERM; return smbfs_readvnode(vp, uio, ap->a_cred); } static int smbfs_write(ap) struct vop_write_args /* { struct vnode *a_vp; struct uio *a_uio; int a_ioflag; struct ucred *a_cred; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; SMBVDEBUG("%d,ofs=%jd,sz=%zd\n",vp->v_type, (intmax_t)uio->uio_offset, uio->uio_resid); if (vp->v_type != VREG) return (EPERM); return smbfs_writevnode(vp, uio, ap->a_cred,ap->a_ioflag); } /* * smbfs_create call * Create a regular file. On entry the directory to contain the file being * created is locked. We must release before we return. We must also free * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or * only if the SAVESTART bit in cn_flags is clear on success. */ static int smbfs_create(ap) struct vop_create_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; struct vattr *vap = ap->a_vap; struct vnode **vpp=ap->a_vpp; struct componentname *cnp = ap->a_cnp; struct smbnode *dnp = VTOSMB(dvp); struct vnode *vp; struct vattr vattr; struct smbfattr fattr; struct smb_cred *scred; char *name = cnp->cn_nameptr; int nmlen = cnp->cn_namelen; int error; SMBVDEBUG("\n"); *vpp = NULL; if (vap->va_type != VREG) return EOPNOTSUPP; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) return error; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_create(dnp, name, nmlen, scred); if (error) goto out; error = smbfs_smb_lookup(dnp, name, nmlen, &fattr, scred); if (error) goto out; error = smbfs_nget(VTOVFS(dvp), dvp, name, nmlen, &fattr, &vp); if (error) goto out; *vpp = vp; if (cnp->cn_flags & MAKEENTRY) cache_enter(dvp, vp, cnp); out: smbfs_free_scred(scred); return error; } static int smbfs_remove(ap) struct vop_remove_args /* { struct vnodeop_desc *a_desc; struct vnode * a_dvp; struct vnode * a_vp; struct componentname * a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; /* struct vnode *dvp = ap->a_dvp;*/ struct componentname *cnp = ap->a_cnp; struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; int error; if (vp->v_type == VDIR || (np->n_flag & NOPEN) != 0 || vrefcnt(vp) != 1) return EPERM; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_delete(np, scred); if (error == 0) np->n_flag |= NGONE; cache_purge(vp); smbfs_free_scred(scred); return error; } /* * smbfs_file rename call */ static int smbfs_rename(ap) struct vop_rename_args /* { struct vnode *a_fdvp; struct vnode *a_fvp; struct componentname *a_fcnp; struct vnode *a_tdvp; struct vnode *a_tvp; struct componentname *a_tcnp; } */ *ap; { struct vnode *fvp = ap->a_fvp; struct vnode *tvp = ap->a_tvp; struct vnode *fdvp = ap->a_fdvp; struct vnode *tdvp = ap->a_tdvp; struct componentname *tcnp = ap->a_tcnp; /* struct componentname *fcnp = ap->a_fcnp;*/ struct smb_cred *scred; u_int16_t flags = 6; int error=0; scred = NULL; /* Check for cross-device rename */ if ((fvp->v_mount != tdvp->v_mount) || (tvp && (fvp->v_mount != tvp->v_mount))) { error = EXDEV; goto out; } if (tvp && vrefcnt(tvp) > 1) { error = EBUSY; goto out; } flags = 0x10; /* verify all writes */ if (fvp->v_type == VDIR) { flags |= 2; } else if (fvp->v_type == VREG) { flags |= 1; } else { return EINVAL; } scred = smbfs_malloc_scred(); smb_makescred(scred, tcnp->cn_thread, tcnp->cn_cred); /* * It seems that Samba doesn't implement SMB_COM_MOVE call... */ #ifdef notnow if (SMB_DIALECT(SSTOCN(smp->sm_share)) >= SMB_DIALECT_LANMAN1_0) { error = smbfs_smb_move(VTOSMB(fvp), VTOSMB(tdvp), tcnp->cn_nameptr, tcnp->cn_namelen, flags, scred); } else #endif { /* * We have to do the work atomicaly */ if (tvp && tvp != fvp) { error = smbfs_smb_delete(VTOSMB(tvp), scred); if (error) goto out_cacherem; VTOSMB(fvp)->n_flag |= NGONE; } error = smbfs_smb_rename(VTOSMB(fvp), VTOSMB(tdvp), tcnp->cn_nameptr, tcnp->cn_namelen, scred); } if (fvp->v_type == VDIR) { if (tvp != NULL && tvp->v_type == VDIR) cache_purge(tdvp); cache_purge(fdvp); } out_cacherem: smbfs_attr_cacheremove(fdvp); smbfs_attr_cacheremove(tdvp); out: smbfs_free_scred(scred); if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp) vput(tvp); vrele(fdvp); vrele(fvp); #ifdef possible_mistake vgone(fvp); if (tvp) vgone(tvp); #endif return error; } /* * somtime it will come true... */ static int smbfs_link(ap) struct vop_link_args /* { struct vnode *a_tdvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { return EOPNOTSUPP; } /* * smbfs_symlink link create call. * Sometime it will be functional... */ static int smbfs_symlink(ap) struct vop_symlink_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; char *a_target; } */ *ap; { return EOPNOTSUPP; } static int smbfs_mknod(ap) struct vop_mknod_args /* { } */ *ap; { return EOPNOTSUPP; } static int smbfs_mkdir(ap) struct vop_mkdir_args /* { struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; struct vattr *a_vap; } */ *ap; { struct vnode *dvp = ap->a_dvp; /* struct vattr *vap = ap->a_vap;*/ struct vnode *vp; struct componentname *cnp = ap->a_cnp; struct smbnode *dnp = VTOSMB(dvp); struct vattr vattr; struct smb_cred *scred; struct smbfattr fattr; char *name = cnp->cn_nameptr; int len = cnp->cn_namelen; int error; if ((error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred))) { return error; } if ((name[0] == '.') && ((len == 1) || ((len == 2) && (name[1] == '.')))) return EEXIST; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_mkdir(dnp, name, len, scred); if (error) goto out; error = smbfs_smb_lookup(dnp, name, len, &fattr, scred); if (error) goto out; error = smbfs_nget(VTOVFS(dvp), dvp, name, len, &fattr, &vp); if (error) goto out; *ap->a_vpp = vp; out: smbfs_free_scred(scred); return error; } /* * smbfs_remove directory call */ static int smbfs_rmdir(ap) struct vop_rmdir_args /* { struct vnode *a_dvp; struct vnode *a_vp; struct componentname *a_cnp; } */ *ap; { struct vnode *vp = ap->a_vp; struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; /* struct smbmount *smp = VTOSMBFS(vp);*/ struct smbnode *dnp = VTOSMB(dvp); struct smbnode *np = VTOSMB(vp); struct smb_cred *scred; int error; if (dvp == vp) return EINVAL; scred = smbfs_malloc_scred(); smb_makescred(scred, cnp->cn_thread, cnp->cn_cred); error = smbfs_smb_rmdir(np, scred); if (error == 0) np->n_flag |= NGONE; dnp->n_flag |= NMODIFIED; smbfs_attr_cacheremove(dvp); /* cache_purge(dvp);*/ cache_purge(vp); smbfs_free_scred(scred); return error; } /* * smbfs_readdir call */ static int smbfs_readdir(ap) struct vop_readdir_args /* { struct vnode *a_vp; struct uio *a_uio; struct ucred *a_cred; int *a_eofflag; u_long *a_cookies; int a_ncookies; } */ *ap; { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; int error; if (vp->v_type != VDIR) return (EPERM); #ifdef notnow if (ap->a_ncookies) { printf("smbfs_readdir: no support for cookies now..."); return (EOPNOTSUPP); } #endif error = smbfs_readvnode(vp, uio, ap->a_cred); return error; } /* ARGSUSED */ static int smbfs_fsync(ap) struct vop_fsync_args /* { struct vnodeop_desc *a_desc; struct vnode * a_vp; struct ucred * a_cred; int a_waitfor; struct thread * a_td; } */ *ap; { /* return (smb_flush(ap->a_vp, ap->a_cred, ap->a_waitfor, ap->a_td, 1));*/ return (0); } static int smbfs_print (ap) struct vop_print_args /* { struct vnode *a_vp; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); if (np == NULL) { printf("no smbnode data\n"); return (0); } printf("\tname = %s, parent = %p, open = %d\n", np->n_name, np->n_parent ? np->n_parent : NULL, (np->n_flag & NOPEN) != 0); return (0); } static int smbfs_pathconf (ap) struct vop_pathconf_args /* { struct vnode *vp; int name; register_t *retval; } */ *ap; { struct smbmount *smp = VFSTOSMBFS(VTOVFS(ap->a_vp)); struct smb_vc *vcp = SSTOVC(smp->sm_share); register_t *retval = ap->a_retval; int error = 0; switch (ap->a_name) { case _PC_LINK_MAX: *retval = 0; break; case _PC_NAME_MAX: *retval = (vcp->vc_hflags2 & SMB_FLAGS2_KNOWS_LONG_NAMES) ? 255 : 12; break; case _PC_PATH_MAX: *retval = 800; /* XXX: a correct one ? */ break; default: error = vop_stdpathconf(ap); } return error; } static int smbfs_strategy (ap) struct vop_strategy_args /* { struct buf *a_bp } */ *ap; { struct buf *bp=ap->a_bp; struct ucred *cr; struct thread *td; SMBVDEBUG("\n"); if (bp->b_flags & B_ASYNC) td = (struct thread *)0; else td = curthread; /* XXX */ if (bp->b_iocmd == BIO_READ) cr = bp->b_rcred; else cr = bp->b_wcred; if ((bp->b_flags & B_ASYNC) == 0 ) (void)smbfs_doio(ap->a_vp, bp, cr, td); return (0); } int smbfs_ioctl(ap) struct vop_ioctl_args /* { struct vnode *a_vp; u_long a_command; caddr_t a_data; int fflag; struct ucred *cred; struct thread *td; } */ *ap; { return ENOTTY; } static char smbfs_atl[] = "rhsvda"; static int smbfs_getextattr(struct vop_getextattr_args *ap) /* { IN struct vnode *a_vp; IN char *a_name; INOUT struct uio *a_uio; IN struct ucred *a_cred; IN struct thread *a_td; }; */ { struct vnode *vp = ap->a_vp; struct thread *td = ap->a_td; struct ucred *cred = ap->a_cred; struct uio *uio = ap->a_uio; const char *name = ap->a_name; struct smbnode *np = VTOSMB(vp); struct vattr vattr; char buf[10]; int i, attr, error; error = VOP_ACCESS(vp, VREAD, cred, td); if (error) return error; error = VOP_GETATTR(vp, &vattr, cred); if (error) return error; if (strcmp(name, "dosattr") == 0) { attr = np->n_dosattr; for (i = 0; i < 6; i++, attr >>= 1) buf[i] = (attr & 1) ? smbfs_atl[i] : '-'; buf[i] = 0; error = uiomove(buf, i, uio); } else error = EINVAL; return error; } /* * Since we expected to support F_GETLK (and SMB protocol has no such function), * it is necessary to use lf_advlock(). It would be nice if this function had * a callback mechanism because it will help to improve a level of consistency. */ int smbfs_advlock(ap) struct vop_advlock_args /* { struct vnode *a_vp; caddr_t a_id; int a_op; struct flock *a_fl; int a_flags; } */ *ap; { struct vnode *vp = ap->a_vp; struct smbnode *np = VTOSMB(vp); struct flock *fl = ap->a_fl; caddr_t id = (caddr_t)1 /* ap->a_id */; /* int flags = ap->a_flags;*/ struct thread *td = curthread; struct smb_cred *scred; u_quad_t size; off_t start, end, oadd; int error, lkop; if (vp->v_type == VDIR) { /* * SMB protocol have no support for directory locking. * Although locks can be processed on local machine, I don't * think that this is a good idea, because some programs * can work wrong assuming directory is locked. So, we just * return 'operation not supported */ return EOPNOTSUPP; } size = np->n_size; switch (fl->l_whence) { case SEEK_SET: case SEEK_CUR: start = fl->l_start; break; case SEEK_END: if (size > OFF_MAX || (fl->l_start > 0 && size > OFF_MAX - fl->l_start)) return EOVERFLOW; start = size + fl->l_start; break; default: return EINVAL; } if (start < 0) return EINVAL; if (fl->l_len < 0) { if (start == 0) return EINVAL; end = start - 1; start += fl->l_len; if (start < 0) return EINVAL; } else if (fl->l_len == 0) end = -1; else { oadd = fl->l_len - 1; if (oadd > OFF_MAX - start) return EOVERFLOW; end = start + oadd; } scred = smbfs_malloc_scred(); smb_makescred(scred, td, td->td_ucred); switch (ap->a_op) { case F_SETLK: switch (fl->l_type) { case F_WRLCK: lkop = SMB_LOCK_EXCL; break; case F_RDLCK: lkop = SMB_LOCK_SHARED; break; case F_UNLCK: lkop = SMB_LOCK_RELEASE; break; default: smbfs_free_scred(scred); return EINVAL; } error = lf_advlock(ap, &vp->v_lockf, size); if (error) break; lkop = SMB_LOCK_EXCL; error = smbfs_smb_lock(np, lkop, id, start, end, scred); if (error) { int oldtype = fl->l_type; fl->l_type = F_UNLCK; ap->a_op = F_UNLCK; lf_advlock(ap, &vp->v_lockf, size); fl->l_type = oldtype; } break; case F_UNLCK: lf_advlock(ap, &vp->v_lockf, size); error = smbfs_smb_lock(np, SMB_LOCK_RELEASE, id, start, end, scred); break; case F_GETLK: error = lf_advlock(ap, &vp->v_lockf, size); break; default: smbfs_free_scred(scred); return EINVAL; } smbfs_free_scred(scred); return error; } static int smbfs_pathcheck(struct smbmount *smp, const char *name, int nmlen, int nameiop) { static const char *badchars = "*/:<>;?"; static const char *badchars83 = " +|,[]="; const char *cp; int i, error; /* * Backslash characters, being a path delimiter, are prohibited * within a path component even for LOOKUP operations. */ if (strchr(name, '\\') != NULL) return ENOENT; if (nameiop == LOOKUP) return 0; error = ENOENT; if (SMB_DIALECT(SSTOVC(smp->sm_share)) < SMB_DIALECT_LANMAN2_0) { /* * Name should conform 8.3 format */ if (nmlen > 12) return ENAMETOOLONG; cp = strchr(name, '.'); if (cp == NULL) return error; if (cp == name || (cp - name) > 8) return error; cp = strchr(cp + 1, '.'); if (cp != NULL) return error; for (cp = name, i = 0; i < nmlen; i++, cp++) if (strchr(badchars83, *cp) != NULL) return error; } for (cp = name, i = 0; i < nmlen; i++, cp++) if (strchr(badchars, *cp) != NULL) return error; return 0; } /* * Things go even weird without fixed inode numbers... */ int smbfs_lookup(ap) struct vop_lookup_args /* { struct vnodeop_desc *a_desc; struct vnode *a_dvp; struct vnode **a_vpp; struct componentname *a_cnp; } */ *ap; { struct componentname *cnp = ap->a_cnp; struct thread *td = cnp->cn_thread; struct vnode *dvp = ap->a_dvp; struct vnode **vpp = ap->a_vpp; struct vnode *vp; struct smbmount *smp; struct mount *mp = dvp->v_mount; struct smbnode *dnp; struct smbfattr fattr, *fap; struct smb_cred *scred; char *name = cnp->cn_nameptr; int flags = cnp->cn_flags; int nameiop = cnp->cn_nameiop; int nmlen = cnp->cn_namelen; int error, islastcn, isdot; int killit; SMBVDEBUG("\n"); if (dvp->v_type != VDIR) return ENOTDIR; if ((flags & ISDOTDOT) && (dvp->v_vflag & VV_ROOT)) { SMBFSERR("invalid '..'\n"); return EIO; } islastcn = flags & ISLASTCN; if (islastcn && (mp->mnt_flag & MNT_RDONLY) && (nameiop != LOOKUP)) return EROFS; if ((error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, td)) != 0) return error; smp = VFSTOSMBFS(mp); dnp = VTOSMB(dvp); isdot = (nmlen == 1 && name[0] == '.'); error = smbfs_pathcheck(smp, cnp->cn_nameptr, cnp->cn_namelen, nameiop); if (error) return ENOENT; error = cache_lookup(dvp, vpp, cnp, NULL, NULL); SMBVDEBUG("cache_lookup returned %d\n", error); if (error > 0) return error; if (error) { /* name was found */ struct vattr vattr; killit = 0; vp = *vpp; error = VOP_GETATTR(vp, &vattr, cnp->cn_cred); /* * If the file type on the server is inconsistent * with what it was when we created the vnode, * kill the bogus vnode now and fall through to * the code below to create a new one with the * right type. */ if (error == 0 && ((vp->v_type == VDIR && (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) == 0) || (vp->v_type == VREG && (VTOSMB(vp)->n_dosattr & SMB_FA_DIR) != 0))) killit = 1; else if (error == 0 /* && vattr.va_ctime.tv_sec == VTOSMB(vp)->n_ctime*/) { if (nameiop != LOOKUP && islastcn) cnp->cn_flags |= SAVENAME; SMBVDEBUG("use cached vnode\n"); return (0); } cache_purge(vp); /* * XXX This is not quite right, if '.' is * inconsistent, we really need to start the lookup * all over again. Hopefully there is some other * guarantee that prevents this case from happening. */ if (killit && vp != dvp) vgone(vp); if (vp != dvp) vput(vp); else vrele(vp); *vpp = NULLVP; } /* * entry is not in the cache or has been expired */ error = 0; *vpp = NULLVP; scred = smbfs_malloc_scred(); smb_makescred(scred, td, cnp->cn_cred); fap = &fattr; if (flags & ISDOTDOT) { /* * In the DOTDOT case, don't go over-the-wire * in order to request attributes. We already * know it's a directory and subsequent call to * smbfs_getattr() will restore consistency. * */ SMBVDEBUG("smbfs_smb_lookup: dotdot\n"); } else if (isdot) { error = smbfs_smb_lookup(dnp, NULL, 0, fap, scred); SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error); } else { error = smbfs_smb_lookup(dnp, name, nmlen, fap, scred); SMBVDEBUG("result of smbfs_smb_lookup: %d\n", error); } if (error && error != ENOENT) goto out; if (error) { /* entry not found */ /* * Handle RENAME or CREATE case... */ if ((nameiop == CREATE || nameiop == RENAME) && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; goto out; } error = ENOENT; goto out; }/* else { SMBVDEBUG("Found entry %s with id=%d\n", fap->entryName, fap->dirEntNum); }*/ /* * handle DELETE case ... */ if (nameiop == DELETE && islastcn) { /* delete last component */ error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; if (isdot) { VREF(dvp); *vpp = dvp; goto out; } error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; cnp->cn_flags |= SAVENAME; goto out; } if (nameiop == RENAME && islastcn) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, td); if (error) goto out; if (isdot) { error = EISDIR; goto out; } error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; cnp->cn_flags |= SAVENAME; goto out; } if (flags & ISDOTDOT) { mp = dvp->v_mount; error = vfs_busy(mp, MBF_NOWAIT); if (error != 0) { vfs_ref(mp); VOP_UNLOCK(dvp, 0); error = vfs_busy(mp, 0); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); vfs_rel(mp); if (error) { error = ENOENT; goto out; } if ((dvp->v_iflag & VI_DOOMED) != 0) { vfs_unbusy(mp); error = ENOENT; goto out; } } VOP_UNLOCK(dvp, 0); error = smbfs_nget(mp, dvp, name, nmlen, NULL, &vp); vfs_unbusy(mp); vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); if ((dvp->v_iflag & VI_DOOMED) != 0) { if (error == 0) vput(vp); error = ENOENT; } if (error) goto out; *vpp = vp; } else if (isdot) { vref(dvp); *vpp = dvp; } else { error = smbfs_nget(mp, dvp, name, nmlen, fap, &vp); if (error) goto out; *vpp = vp; SMBVDEBUG("lookup: getnewvp!\n"); } if ((cnp->cn_flags & MAKEENTRY)/* && !islastcn*/) { /* VTOSMB(*vpp)->n_ctime = VTOSMB(*vpp)->n_vattr.va_ctime.tv_sec;*/ cache_enter(dvp, *vpp, cnp); } out: smbfs_free_scred(scred); return (error); } Index: head/sys/fs/tmpfs/tmpfs.h =================================================================== --- head/sys/fs/tmpfs/tmpfs.h (revision 326267) +++ head/sys/fs/tmpfs/tmpfs.h (revision 326268) @@ -1,540 +1,542 @@ /* $NetBSD: tmpfs.h,v 1.26 2007/02/22 06:37:00 thorpej Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_TMPFS_TMPFS_H_ #define _FS_TMPFS_TMPFS_H_ #include #include #include #include #include #include #include #include #include #include #include #include MALLOC_DECLARE(M_TMPFSMNT); MALLOC_DECLARE(M_TMPFSNAME); /* * Internal representation of a tmpfs directory entry. */ LIST_HEAD(tmpfs_dir_duphead, tmpfs_dirent); struct tmpfs_dirent { /* * Depending on td_cookie flag entry can be of 3 types: * - regular -- no hash collisions, stored in RB-Tree * - duphead -- synthetic linked list head for dup entries * - dup -- stored in linked list instead of RB-Tree */ union { /* regular and duphead entry types */ RB_ENTRY(tmpfs_dirent) td_entries; /* dup entry type */ struct { LIST_ENTRY(tmpfs_dirent) entries; LIST_ENTRY(tmpfs_dirent) index_entries; } td_dup; } uh; uint32_t td_cookie; uint32_t td_hash; u_int td_namelen; /* * Pointer to the node this entry refers to. In case this field * is NULL, the node is a whiteout. */ struct tmpfs_node * td_node; union { /* * The name of the entry, allocated from a string pool. This * string is not required to be zero-terminated. */ char * td_name; /* regular, dup */ struct tmpfs_dir_duphead td_duphead; /* duphead */ } ud; }; /* * A directory in tmpfs holds a collection of directory entries, which * in turn point to other files (which can be directories themselves). * * In tmpfs, this collection is managed by a RB-Tree, whose head is * defined by the struct tmpfs_dir type. * * It is important to notice that directories do not have entries for . and * .. as other file systems do. These can be generated when requested * based on information available by other means, such as the pointer to * the node itself in the former case or the pointer to the parent directory * in the latter case. This is done to simplify tmpfs's code and, more * importantly, to remove redundancy. */ RB_HEAD(tmpfs_dir, tmpfs_dirent); /* * Each entry in a directory has a cookie that identifies it. Cookies * supersede offsets within directories because, given how tmpfs stores * directories in memory, there is no such thing as an offset. * * The '.', '..' and the end of directory markers have fixed cookies which * cannot collide with the cookies generated by other entries. The cookies * for the other entries are generated based on the file name hash value or * unique number in case of name hash collision. * * To preserve compatibility cookies are limited to 31 bits. */ #define TMPFS_DIRCOOKIE_DOT 0 #define TMPFS_DIRCOOKIE_DOTDOT 1 #define TMPFS_DIRCOOKIE_EOF 2 #define TMPFS_DIRCOOKIE_MASK ((off_t)0x3fffffffU) #define TMPFS_DIRCOOKIE_MIN ((off_t)0x00000004U) #define TMPFS_DIRCOOKIE_DUP ((off_t)0x40000000U) #define TMPFS_DIRCOOKIE_DUPHEAD ((off_t)0x80000000U) #define TMPFS_DIRCOOKIE_DUP_MIN TMPFS_DIRCOOKIE_DUP #define TMPFS_DIRCOOKIE_DUP_MAX \ (TMPFS_DIRCOOKIE_DUP | TMPFS_DIRCOOKIE_MASK) /* * Internal representation of a tmpfs file system node. * * This structure is splitted in two parts: one holds attributes common * to all file types and the other holds data that is only applicable to * a particular type. The code must be careful to only access those * attributes that are actually allowed by the node's type. * * Below is the key of locks used to protected the fields in the following * structures. * (v) vnode lock in exclusive mode * (vi) vnode lock in exclusive mode, or vnode lock in shared vnode and * tn_interlock * (i) tn_interlock * (m) tmpfs_mount tm_allnode_lock * (c) stable after creation */ struct tmpfs_node { /* * Doubly-linked list entry which links all existing nodes for * a single file system. This is provided to ease the removal * of all nodes during the unmount operation, and to support * the implementation of VOP_VNTOCNP(). tn_attached is false * when the node is removed from list and unlocked. */ LIST_ENTRY(tmpfs_node) tn_entries; /* (m) */ bool tn_attached; /* (m) */ /* * The node's type. Any of 'VBLK', 'VCHR', 'VDIR', 'VFIFO', * 'VLNK', 'VREG' and 'VSOCK' is allowed. The usage of vnode * types instead of a custom enumeration is to make things simpler * and faster, as we do not need to convert between two types. */ enum vtype tn_type; /* (c) */ /* Node identifier. */ ino_t tn_id; /* (c) */ /* * Node's internal status. This is used by several file system * operations to do modifications to the node in a delayed * fashion. */ int tn_status; /* (vi) */ #define TMPFS_NODE_ACCESSED (1 << 1) #define TMPFS_NODE_MODIFIED (1 << 2) #define TMPFS_NODE_CHANGED (1 << 3) /* * The node size. It does not necessarily match the real amount * of memory consumed by it. */ off_t tn_size; /* (v) */ /* Generic node attributes. */ uid_t tn_uid; /* (v) */ gid_t tn_gid; /* (v) */ mode_t tn_mode; /* (v) */ u_long tn_flags; /* (v) */ nlink_t tn_links; /* (v) */ struct timespec tn_atime; /* (vi) */ struct timespec tn_mtime; /* (vi) */ struct timespec tn_ctime; /* (vi) */ struct timespec tn_birthtime; /* (v) */ unsigned long tn_gen; /* (c) */ /* * As there is a single vnode for each active file within the * system, care has to be taken to avoid allocating more than one * vnode per file. In order to do this, a bidirectional association * is kept between vnodes and nodes. * * Whenever a vnode is allocated, its v_data field is updated to * point to the node it references. At the same time, the node's * tn_vnode field is modified to point to the new vnode representing * it. Further attempts to allocate a vnode for this same node will * result in returning a new reference to the value stored in * tn_vnode. * * May be NULL when the node is unused (that is, no vnode has been * allocated for it or it has been reclaimed). */ struct vnode * tn_vnode; /* (i) */ /* * Interlock to protect tn_vpstate, and tn_status under shared * vnode lock. */ struct mtx tn_interlock; /* * Identify if current node has vnode assiocate with * or allocating vnode. */ int tn_vpstate; /* (i) */ /* Transient refcounter on this node. */ u_int tn_refcount; /* (m) + (i) */ /* misc data field for different tn_type node */ union { /* Valid when tn_type == VBLK || tn_type == VCHR. */ dev_t tn_rdev; /* (c) */ /* Valid when tn_type == VDIR. */ struct tn_dir { /* * Pointer to the parent directory. The root * directory has a pointer to itself in this field; * this property identifies the root node. */ struct tmpfs_node * tn_parent; /* * Head of a tree that links the contents of * the directory together. */ struct tmpfs_dir tn_dirhead; /* * Head of a list the contains fake directory entries * heads, i.e. entries with TMPFS_DIRCOOKIE_DUPHEAD * flag. */ struct tmpfs_dir_duphead tn_dupindex; /* * Number and pointer of the first directory entry * returned by the readdir operation if it were * called again to continue reading data from the * same directory as before. This is used to speed * up reads of long directories, assuming that no * more than one read is in progress at a given time. * Otherwise, these values are discarded. */ off_t tn_readdir_lastn; struct tmpfs_dirent * tn_readdir_lastp; } tn_dir; /* Valid when tn_type == VLNK. */ /* The link's target, allocated from a string pool. */ char * tn_link; /* (c) */ /* Valid when tn_type == VREG. */ struct tn_reg { /* * The contents of regular files stored in a * tmpfs file system are represented by a * single anonymous memory object (aobj, for * short). The aobj provides direct access to * any position within the file. It is a task * of the memory management subsystem to issue * the required page ins or page outs whenever * a position within the file is accessed. */ vm_object_t tn_aobj; /* (c) */ } tn_reg; } tn_spec; /* (v) */ }; LIST_HEAD(tmpfs_node_list, tmpfs_node); #define tn_rdev tn_spec.tn_rdev #define tn_dir tn_spec.tn_dir #define tn_link tn_spec.tn_link #define tn_reg tn_spec.tn_reg #define tn_fifo tn_spec.tn_fifo #define TMPFS_NODE_LOCK(node) mtx_lock(&(node)->tn_interlock) #define TMPFS_NODE_UNLOCK(node) mtx_unlock(&(node)->tn_interlock) #define TMPFS_NODE_MTX(node) (&(node)->tn_interlock) #define TMPFS_NODE_ASSERT_LOCKED(node) mtx_assert(TMPFS_NODE_MTX(node), \ MA_OWNED) #ifdef INVARIANTS #define TMPFS_ASSERT_LOCKED(node) do { \ MPASS((node) != NULL); \ MPASS((node)->tn_vnode != NULL); \ ASSERT_VOP_LOCKED((node)->tn_vnode, "tmpfs assert"); \ } while (0) #else #define TMPFS_ASSERT_LOCKED(node) (void)0 #endif #define TMPFS_VNODE_ALLOCATING 1 #define TMPFS_VNODE_WANT 2 #define TMPFS_VNODE_DOOMED 4 #define TMPFS_VNODE_WRECLAIM 8 /* * Internal representation of a tmpfs mount point. */ struct tmpfs_mount { /* * Maximum number of memory pages available for use by the file * system, set during mount time. This variable must never be * used directly as it may be bigger than the current amount of * free memory; in the extreme case, it will hold the ULONG_MAX * value. */ u_long tm_pages_max; /* Number of pages in use by the file system. */ u_long tm_pages_used; /* * Pointer to the node representing the root directory of this * file system. */ struct tmpfs_node * tm_root; /* * Maximum number of possible nodes for this file system; set * during mount time. We need a hard limit on the maximum number * of nodes to avoid allocating too much of them; their objects * cannot be released until the file system is unmounted. * Otherwise, we could easily run out of memory by creating lots * of empty files and then simply removing them. */ ino_t tm_nodes_max; /* unrhdr used to allocate inode numbers */ struct unrhdr * tm_ino_unr; /* Number of nodes currently that are in use. */ ino_t tm_nodes_inuse; /* Refcounter on this struct tmpfs_mount. */ uint64_t tm_refcount; /* maximum representable file size */ u_int64_t tm_maxfilesize; /* * The used list contains all nodes that are currently used by * the file system; i.e., they refer to existing files. */ struct tmpfs_node_list tm_nodes_used; /* All node lock to protect the node list and tmp_pages_used. */ struct mtx tm_allnode_lock; /* Zones used to store file system meta data, per tmpfs mount. */ uma_zone_t tm_dirent_pool; uma_zone_t tm_node_pool; /* Read-only status. */ bool tm_ronly; /* Do not use namecache. */ bool tm_nonc; }; #define TMPFS_LOCK(tm) mtx_lock(&(tm)->tm_allnode_lock) #define TMPFS_UNLOCK(tm) mtx_unlock(&(tm)->tm_allnode_lock) #define TMPFS_MP_ASSERT_LOCKED(tm) mtx_assert(&(tm)->tm_allnode_lock, MA_OWNED) /* * This structure maps a file identifier to a tmpfs node. Used by the * NFS code. */ struct tmpfs_fid { uint16_t tf_len; uint16_t tf_pad; ino_t tf_id; unsigned long tf_gen; }; struct tmpfs_dir_cursor { struct tmpfs_dirent *tdc_current; struct tmpfs_dirent *tdc_tree; }; #ifdef _KERNEL /* * Prototypes for tmpfs_subr.c. */ void tmpfs_ref_node(struct tmpfs_node *node); void tmpfs_ref_node_locked(struct tmpfs_node *node); int tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *, enum vtype, uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *, char *, dev_t, struct tmpfs_node **); void tmpfs_free_node(struct tmpfs_mount *, struct tmpfs_node *); bool tmpfs_free_node_locked(struct tmpfs_mount *, struct tmpfs_node *, bool); void tmpfs_free_tmp(struct tmpfs_mount *); int tmpfs_alloc_dirent(struct tmpfs_mount *, struct tmpfs_node *, const char *, u_int, struct tmpfs_dirent **); void tmpfs_free_dirent(struct tmpfs_mount *, struct tmpfs_dirent *); void tmpfs_dirent_init(struct tmpfs_dirent *, const char *, u_int); void tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj); int tmpfs_alloc_vp(struct mount *, struct tmpfs_node *, int, struct vnode **); void tmpfs_free_vp(struct vnode *); int tmpfs_alloc_file(struct vnode *, struct vnode **, struct vattr *, struct componentname *, char *); void tmpfs_check_mtime(struct vnode *); void tmpfs_dir_attach(struct vnode *, struct tmpfs_dirent *); void tmpfs_dir_detach(struct vnode *, struct tmpfs_dirent *); void tmpfs_dir_destroy(struct tmpfs_mount *, struct tmpfs_node *); struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, struct componentname *cnp); int tmpfs_dir_getdents(struct tmpfs_node *, struct uio *, int, u_long *, int *); int tmpfs_dir_whiteout_add(struct vnode *, struct componentname *); void tmpfs_dir_whiteout_remove(struct vnode *, struct componentname *); int tmpfs_reg_resize(struct vnode *, off_t, boolean_t); int tmpfs_chflags(struct vnode *, u_long, struct ucred *, struct thread *); int tmpfs_chmod(struct vnode *, mode_t, struct ucred *, struct thread *); int tmpfs_chown(struct vnode *, uid_t, gid_t, struct ucred *, struct thread *); int tmpfs_chsize(struct vnode *, u_quad_t, struct ucred *, struct thread *); int tmpfs_chtimes(struct vnode *, struct vattr *, struct ucred *cred, struct thread *); void tmpfs_itimes(struct vnode *, const struct timespec *, const struct timespec *); void tmpfs_set_status(struct tmpfs_node *node, int status); void tmpfs_update(struct vnode *); int tmpfs_truncate(struct vnode *, off_t); struct tmpfs_dirent *tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc); struct tmpfs_dirent *tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc); /* * Convenience macros to simplify some logical expressions. */ #define IMPLIES(a, b) (!(a) || (b)) #define IFF(a, b) (IMPLIES(a, b) && IMPLIES(b, a)) /* * Checks that the directory entry pointed by 'de' matches the name 'name' * with a length of 'len'. */ #define TMPFS_DIRENT_MATCHES(de, name, len) \ (de->td_namelen == len && \ bcmp((de)->ud.td_name, (name), (de)->td_namelen) == 0) /* * Ensures that the node pointed by 'node' is a directory and that its * contents are consistent with respect to directories. */ #define TMPFS_VALIDATE_DIR(node) do { \ MPASS((node)->tn_type == VDIR); \ MPASS((node)->tn_size % sizeof(struct tmpfs_dirent) == 0); \ } while (0) /* * Amount of memory pages to reserve for the system (e.g., to not use by * tmpfs). */ #define TMPFS_PAGES_MINRESERVED (4 * 1024 * 1024 / PAGE_SIZE) size_t tmpfs_mem_avail(void); size_t tmpfs_pages_used(struct tmpfs_mount *tmp); #endif /* * Macros/functions to convert from generic data structures to tmpfs * specific ones. */ static inline struct tmpfs_mount * VFS_TO_TMPFS(struct mount *mp) { struct tmpfs_mount *tmp; MPASS(mp != NULL && mp->mnt_data != NULL); tmp = (struct tmpfs_mount *)mp->mnt_data; return (tmp); } static inline struct tmpfs_node * VP_TO_TMPFS_NODE(struct vnode *vp) { struct tmpfs_node *node; MPASS(vp != NULL && vp->v_data != NULL); node = (struct tmpfs_node *)vp->v_data; return (node); } static inline struct tmpfs_node * VP_TO_TMPFS_DIR(struct vnode *vp) { struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); TMPFS_VALIDATE_DIR(node); return (node); } static inline bool tmpfs_use_nc(struct vnode *vp) { return (!(VFS_TO_TMPFS(vp->v_mount)->tm_nonc)); } #endif /* _FS_TMPFS_TMPFS_H_ */ Index: head/sys/fs/tmpfs/tmpfs_fifoops.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_fifoops.c (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_fifoops.c (revision 326268) @@ -1,72 +1,74 @@ /* $NetBSD: tmpfs_fifoops.c,v 1.5 2005/12/11 12:24:29 christos Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * tmpfs vnode interface for named pipes. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include static int tmpfs_fifo_close(struct vop_close_args *v) { struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(v->a_vp); tmpfs_set_status(node, TMPFS_NODE_ACCESSED); tmpfs_update(v->a_vp); return (fifo_specops.vop_close(v)); } /* * vnode operations vector used for fifos stored in a tmpfs file system. */ struct vop_vector tmpfs_fifoop_entries = { .vop_default = &fifo_specops, .vop_close = tmpfs_fifo_close, .vop_reclaim = tmpfs_reclaim, .vop_access = tmpfs_access, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, .vop_print = tmpfs_print, }; Index: head/sys/fs/tmpfs/tmpfs_fifoops.h =================================================================== --- head/sys/fs/tmpfs/tmpfs_fifoops.h (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_fifoops.h (revision 326268) @@ -1,50 +1,52 @@ /* $NetBSD: tmpfs_fifoops.h,v 1.4 2005/12/03 17:34:44 christos Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_TMPFS_TMPFS_FIFOOPS_H_ #define _FS_TMPFS_TMPFS_FIFOOPS_H_ #if !defined(_KERNEL) #error not supposed to be exposed to userland. #endif #include /* * Declarations for tmpfs_fifoops.c. */ extern struct vop_vector tmpfs_fifoop_entries; #endif /* _FS_TMPFS_TMPFS_FIFOOPS_H_ */ Index: head/sys/fs/tmpfs/tmpfs_subr.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_subr.c (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_subr.c (revision 326268) @@ -1,1871 +1,1873 @@ /* $NetBSD: tmpfs_subr.c,v 1.35 2007/07/09 21:10:50 ad Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Efficient memory file system supporting functions. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_NODE(_vfs, OID_AUTO, tmpfs, CTLFLAG_RW, 0, "tmpfs file system"); static long tmpfs_pages_reserved = TMPFS_PAGES_MINRESERVED; static int sysctl_mem_reserved(SYSCTL_HANDLER_ARGS) { int error; long pages, bytes; pages = *(long *)arg1; bytes = pages * PAGE_SIZE; error = sysctl_handle_long(oidp, &bytes, 0, req); if (error || !req->newptr) return (error); pages = bytes / PAGE_SIZE; if (pages < TMPFS_PAGES_MINRESERVED) return (EINVAL); *(long *)arg1 = pages; return (0); } SYSCTL_PROC(_vfs_tmpfs, OID_AUTO, memory_reserved, CTLTYPE_LONG|CTLFLAG_RW, &tmpfs_pages_reserved, 0, sysctl_mem_reserved, "L", "Amount of available memory and swap below which tmpfs growth stops"); static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b); RB_PROTOTYPE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); size_t tmpfs_mem_avail(void) { vm_ooffset_t avail; avail = swap_pager_avail + vm_cnt.v_free_count - tmpfs_pages_reserved; if (__predict_false(avail < 0)) avail = 0; return (avail); } size_t tmpfs_pages_used(struct tmpfs_mount *tmp) { const size_t node_size = sizeof(struct tmpfs_node) + sizeof(struct tmpfs_dirent); size_t meta_pages; meta_pages = howmany((uintmax_t)tmp->tm_nodes_inuse * node_size, PAGE_SIZE); return (meta_pages + tmp->tm_pages_used); } static size_t tmpfs_pages_check_avail(struct tmpfs_mount *tmp, size_t req_pages) { if (tmpfs_mem_avail() < req_pages) return (0); if (tmp->tm_pages_max != ULONG_MAX && tmp->tm_pages_max < req_pages + tmpfs_pages_used(tmp)) return (0); return (1); } void tmpfs_ref_node(struct tmpfs_node *node) { TMPFS_NODE_LOCK(node); tmpfs_ref_node_locked(node); TMPFS_NODE_UNLOCK(node); } void tmpfs_ref_node_locked(struct tmpfs_node *node) { TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p zero refcount", node)); KASSERT(node->tn_refcount < UINT_MAX, ("node %p refcount %u", node, node->tn_refcount)); node->tn_refcount++; } /* * Allocates a new node of type 'type' inside the 'tmp' mount point, with * its owner set to 'uid', its group to 'gid' and its mode set to 'mode', * using the credentials of the process 'p'. * * If the node type is set to 'VDIR', then the parent parameter must point * to the parent directory of the node being created. It may only be NULL * while allocating the root node. * * If the node type is set to 'VBLK' or 'VCHR', then the rdev parameter * specifies the device the node represents. * * If the node type is set to 'VLNK', then the parameter target specifies * the file name of the target file for the symbolic link that is being * created. * * Note that new nodes are retrieved from the available list if it has * items or, if it is empty, from the node pool as long as there is enough * space to create them. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_node(struct mount *mp, struct tmpfs_mount *tmp, enum vtype type, uid_t uid, gid_t gid, mode_t mode, struct tmpfs_node *parent, char *target, dev_t rdev, struct tmpfs_node **node) { struct tmpfs_node *nnode; vm_object_t obj; /* If the root directory of the 'tmp' file system is not yet * allocated, this must be the request to do it. */ MPASS(IMPLIES(tmp->tm_root == NULL, parent == NULL && type == VDIR)); KASSERT(tmp->tm_root == NULL || mp->mnt_writeopcount > 0, ("creating node not under vn_start_write")); MPASS(IFF(type == VLNK, target != NULL)); MPASS(IFF(type == VBLK || type == VCHR, rdev != VNOVAL)); if (tmp->tm_nodes_inuse >= tmp->tm_nodes_max) return (ENOSPC); if (tmpfs_pages_check_avail(tmp, 1) == 0) return (ENOSPC); if ((mp->mnt_kern_flag & MNTK_UNMOUNT) != 0) { /* * When a new tmpfs node is created for fully * constructed mount point, there must be a parent * node, which vnode is locked exclusively. As * consequence, if the unmount is executing in * parallel, vflush() cannot reclaim the parent vnode. * Due to this, the check for MNTK_UNMOUNT flag is not * racy: if we did not see MNTK_UNMOUNT flag, then tmp * cannot be destroyed until node construction is * finished and the parent vnode unlocked. * * Tmpfs does not need to instantiate new nodes during * unmount. */ return (EBUSY); } nnode = (struct tmpfs_node *)uma_zalloc_arg(tmp->tm_node_pool, tmp, M_WAITOK); /* Generic initialization. */ nnode->tn_type = type; vfs_timestamp(&nnode->tn_atime); nnode->tn_birthtime = nnode->tn_ctime = nnode->tn_mtime = nnode->tn_atime; nnode->tn_uid = uid; nnode->tn_gid = gid; nnode->tn_mode = mode; nnode->tn_id = alloc_unr(tmp->tm_ino_unr); nnode->tn_refcount = 1; /* Type-specific initialization. */ switch (nnode->tn_type) { case VBLK: case VCHR: nnode->tn_rdev = rdev; break; case VDIR: RB_INIT(&nnode->tn_dir.tn_dirhead); LIST_INIT(&nnode->tn_dir.tn_dupindex); MPASS(parent != nnode); MPASS(IMPLIES(parent == NULL, tmp->tm_root == NULL)); nnode->tn_dir.tn_parent = (parent == NULL) ? nnode : parent; nnode->tn_dir.tn_readdir_lastn = 0; nnode->tn_dir.tn_readdir_lastp = NULL; nnode->tn_links++; TMPFS_NODE_LOCK(nnode->tn_dir.tn_parent); nnode->tn_dir.tn_parent->tn_links++; TMPFS_NODE_UNLOCK(nnode->tn_dir.tn_parent); break; case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: MPASS(strlen(target) < MAXPATHLEN); nnode->tn_size = strlen(target); nnode->tn_link = malloc(nnode->tn_size, M_TMPFSNAME, M_WAITOK); memcpy(nnode->tn_link, target, nnode->tn_size); break; case VREG: obj = nnode->tn_reg.tn_aobj = vm_pager_allocate(OBJT_SWAP, NULL, 0, VM_PROT_DEFAULT, 0, NULL /* XXXKIB - tmpfs needs swap reservation */); VM_OBJECT_WLOCK(obj); /* OBJ_TMPFS is set together with the setting of vp->v_object */ vm_object_set_flag(obj, OBJ_NOSPLIT | OBJ_TMPFS_NODE); vm_object_clear_flag(obj, OBJ_ONEMAPPING); VM_OBJECT_WUNLOCK(obj); break; default: panic("tmpfs_alloc_node: type %p %d", nnode, (int)nnode->tn_type); } TMPFS_LOCK(tmp); LIST_INSERT_HEAD(&tmp->tm_nodes_used, nnode, tn_entries); nnode->tn_attached = true; tmp->tm_nodes_inuse++; tmp->tm_refcount++; TMPFS_UNLOCK(tmp); *node = nnode; return (0); } /* * Destroys the node pointed to by node from the file system 'tmp'. * If the node references a directory, no entries are allowed. */ void tmpfs_free_node(struct tmpfs_mount *tmp, struct tmpfs_node *node) { TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(node); if (!tmpfs_free_node_locked(tmp, node, false)) { TMPFS_NODE_UNLOCK(node); TMPFS_UNLOCK(tmp); } } bool tmpfs_free_node_locked(struct tmpfs_mount *tmp, struct tmpfs_node *node, bool detach) { vm_object_t uobj; TMPFS_MP_ASSERT_LOCKED(tmp); TMPFS_NODE_ASSERT_LOCKED(node); KASSERT(node->tn_refcount > 0, ("node %p refcount zero", node)); node->tn_refcount--; if (node->tn_attached && (detach || node->tn_refcount == 0)) { MPASS(tmp->tm_nodes_inuse > 0); tmp->tm_nodes_inuse--; LIST_REMOVE(node, tn_entries); node->tn_attached = false; } if (node->tn_refcount > 0) return (false); #ifdef INVARIANTS MPASS(node->tn_vnode == NULL); MPASS((node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0); #endif TMPFS_NODE_UNLOCK(node); TMPFS_UNLOCK(tmp); switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VDIR: /* FALLTHROUGH */ case VFIFO: /* FALLTHROUGH */ case VSOCK: break; case VLNK: free(node->tn_link, M_TMPFSNAME); break; case VREG: uobj = node->tn_reg.tn_aobj; if (uobj != NULL) { if (uobj->size != 0) atomic_subtract_long(&tmp->tm_pages_used, uobj->size); KASSERT((uobj->flags & OBJ_TMPFS) == 0, ("leaked OBJ_TMPFS node %p vm_obj %p", node, uobj)); vm_object_deallocate(uobj); } break; default: panic("tmpfs_free_node: type %p %d", node, (int)node->tn_type); } /* * If we are unmounting there is no need for going through the overhead * of freeing the inodes from the unr individually, so free them all in * one go later. */ if (!detach) free_unr(tmp->tm_ino_unr, node->tn_id); uma_zfree(tmp->tm_node_pool, node); TMPFS_LOCK(tmp); tmpfs_free_tmp(tmp); return (true); } static __inline uint32_t tmpfs_dirent_hash(const char *name, u_int len) { uint32_t hash; hash = fnv_32_buf(name, len, FNV1_32_INIT + len) & TMPFS_DIRCOOKIE_MASK; #ifdef TMPFS_DEBUG_DIRCOOKIE_DUP hash &= 0xf; #endif if (hash < TMPFS_DIRCOOKIE_MIN) hash += TMPFS_DIRCOOKIE_MIN; return (hash); } static __inline off_t tmpfs_dirent_cookie(struct tmpfs_dirent *de) { if (de == NULL) return (TMPFS_DIRCOOKIE_EOF); MPASS(de->td_cookie >= TMPFS_DIRCOOKIE_MIN); return (de->td_cookie); } static __inline boolean_t tmpfs_dirent_dup(struct tmpfs_dirent *de) { return ((de->td_cookie & TMPFS_DIRCOOKIE_DUP) != 0); } static __inline boolean_t tmpfs_dirent_duphead(struct tmpfs_dirent *de) { return ((de->td_cookie & TMPFS_DIRCOOKIE_DUPHEAD) != 0); } void tmpfs_dirent_init(struct tmpfs_dirent *de, const char *name, u_int namelen) { de->td_hash = de->td_cookie = tmpfs_dirent_hash(name, namelen); memcpy(de->ud.td_name, name, namelen); de->td_namelen = namelen; } /* * Allocates a new directory entry for the node node with a name of name. * The new directory entry is returned in *de. * * The link count of node is increased by one to reflect the new object * referencing it. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_dirent(struct tmpfs_mount *tmp, struct tmpfs_node *node, const char *name, u_int len, struct tmpfs_dirent **de) { struct tmpfs_dirent *nde; nde = uma_zalloc(tmp->tm_dirent_pool, M_WAITOK); nde->td_node = node; if (name != NULL) { nde->ud.td_name = malloc(len, M_TMPFSNAME, M_WAITOK); tmpfs_dirent_init(nde, name, len); } else nde->td_namelen = 0; if (node != NULL) node->tn_links++; *de = nde; return 0; } /* * Frees a directory entry. It is the caller's responsibility to destroy * the node referenced by it if needed. * * The link count of node is decreased by one to reflect the removal of an * object that referenced it. This only happens if 'node_exists' is true; * otherwise the function will not access the node referred to by the * directory entry, as it may already have been released from the outside. */ void tmpfs_free_dirent(struct tmpfs_mount *tmp, struct tmpfs_dirent *de) { struct tmpfs_node *node; node = de->td_node; if (node != NULL) { MPASS(node->tn_links > 0); node->tn_links--; } if (!tmpfs_dirent_duphead(de) && de->ud.td_name != NULL) free(de->ud.td_name, M_TMPFSNAME); uma_zfree(tmp->tm_dirent_pool, de); } void tmpfs_destroy_vobject(struct vnode *vp, vm_object_t obj) { ASSERT_VOP_ELOCKED(vp, "tmpfs_destroy_vobject"); if (vp->v_type != VREG || obj == NULL) return; VM_OBJECT_WLOCK(obj); VI_LOCK(vp); vm_object_clear_flag(obj, OBJ_TMPFS); obj->un_pager.swp.swp_tmpfs = NULL; VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(obj); } /* * Need to clear v_object for insmntque failure. */ static void tmpfs_insmntque_dtr(struct vnode *vp, void *dtr_arg) { tmpfs_destroy_vobject(vp, vp->v_object); vp->v_object = NULL; vp->v_data = NULL; vp->v_op = &dead_vnodeops; vgone(vp); vput(vp); } /* * Allocates a new vnode for the node node or returns a new reference to * an existing one if the node had already a vnode referencing it. The * resulting locked vnode is returned in *vpp. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, struct vnode **vpp) { struct vnode *vp; struct tmpfs_mount *tm; vm_object_t object; int error; error = 0; tm = VFS_TO_TMPFS(mp); TMPFS_NODE_LOCK(node); tmpfs_ref_node_locked(node); loop: TMPFS_NODE_ASSERT_LOCKED(node); if ((vp = node->tn_vnode) != NULL) { MPASS((node->tn_vpstate & TMPFS_VNODE_DOOMED) == 0); VI_LOCK(vp); if ((node->tn_type == VDIR && node->tn_dir.tn_parent == NULL) || ((vp->v_iflag & VI_DOOMED) != 0 && (lkflag & LK_NOWAIT) != 0)) { VI_UNLOCK(vp); TMPFS_NODE_UNLOCK(node); error = ENOENT; vp = NULL; goto out; } if ((vp->v_iflag & VI_DOOMED) != 0) { VI_UNLOCK(vp); node->tn_vpstate |= TMPFS_VNODE_WRECLAIM; while ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) { msleep(&node->tn_vnode, TMPFS_NODE_MTX(node), 0, "tmpfsE", 0); } goto loop; } TMPFS_NODE_UNLOCK(node); error = vget(vp, lkflag | LK_INTERLOCK, curthread); if (error == ENOENT) { TMPFS_NODE_LOCK(node); goto loop; } if (error != 0) { vp = NULL; goto out; } /* * Make sure the vnode is still there after * getting the interlock to avoid racing a free. */ if (node->tn_vnode == NULL || node->tn_vnode != vp) { vput(vp); TMPFS_NODE_LOCK(node); goto loop; } goto out; } if ((node->tn_vpstate & TMPFS_VNODE_DOOMED) || (node->tn_type == VDIR && node->tn_dir.tn_parent == NULL)) { TMPFS_NODE_UNLOCK(node); error = ENOENT; vp = NULL; goto out; } /* * otherwise lock the vp list while we call getnewvnode * since that can block. */ if (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) { node->tn_vpstate |= TMPFS_VNODE_WANT; error = msleep((caddr_t) &node->tn_vpstate, TMPFS_NODE_MTX(node), 0, "tmpfs_alloc_vp", 0); if (error != 0) goto out; goto loop; } else node->tn_vpstate |= TMPFS_VNODE_ALLOCATING; TMPFS_NODE_UNLOCK(node); /* Get a new vnode and associate it with our node. */ error = getnewvnode("tmpfs", mp, VFS_TO_TMPFS(mp)->tm_nonc ? &tmpfs_vnodeop_nonc_entries : &tmpfs_vnodeop_entries, &vp); if (error != 0) goto unlock; MPASS(vp != NULL); /* lkflag is ignored, the lock is exclusive */ (void) vn_lock(vp, lkflag | LK_RETRY); vp->v_data = node; vp->v_type = node->tn_type; /* Type-specific initialization. */ switch (node->tn_type) { case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VSOCK: break; case VFIFO: vp->v_op = &tmpfs_fifoop_entries; break; case VREG: object = node->tn_reg.tn_aobj; VM_OBJECT_WLOCK(object); VI_LOCK(vp); KASSERT(vp->v_object == NULL, ("Not NULL v_object in tmpfs")); vp->v_object = object; object->un_pager.swp.swp_tmpfs = vp; vm_object_set_flag(object, OBJ_TMPFS); VI_UNLOCK(vp); VM_OBJECT_WUNLOCK(object); break; case VDIR: MPASS(node->tn_dir.tn_parent != NULL); if (node->tn_dir.tn_parent == node) vp->v_vflag |= VV_ROOT; break; default: panic("tmpfs_alloc_vp: type %p %d", node, (int)node->tn_type); } if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); error = insmntque1(vp, mp, tmpfs_insmntque_dtr, NULL); if (error != 0) vp = NULL; unlock: TMPFS_NODE_LOCK(node); MPASS(node->tn_vpstate & TMPFS_VNODE_ALLOCATING); node->tn_vpstate &= ~TMPFS_VNODE_ALLOCATING; node->tn_vnode = vp; if (node->tn_vpstate & TMPFS_VNODE_WANT) { node->tn_vpstate &= ~TMPFS_VNODE_WANT; TMPFS_NODE_UNLOCK(node); wakeup((caddr_t) &node->tn_vpstate); } else TMPFS_NODE_UNLOCK(node); out: if (error == 0) { *vpp = vp; #ifdef INVARIANTS MPASS(*vpp != NULL && VOP_ISLOCKED(*vpp)); TMPFS_NODE_LOCK(node); MPASS(*vpp == node->tn_vnode); TMPFS_NODE_UNLOCK(node); #endif } tmpfs_free_node(tm, node); return (error); } /* * Destroys the association between the vnode vp and the node it * references. */ void tmpfs_free_vp(struct vnode *vp) { struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); TMPFS_NODE_ASSERT_LOCKED(node); node->tn_vnode = NULL; if ((node->tn_vpstate & TMPFS_VNODE_WRECLAIM) != 0) wakeup(&node->tn_vnode); node->tn_vpstate &= ~TMPFS_VNODE_WRECLAIM; vp->v_data = NULL; } /* * Allocates a new file of type 'type' and adds it to the parent directory * 'dvp'; this addition is done using the component name given in 'cnp'. * The ownership of the new file is automatically assigned based on the * credentials of the caller (through 'cnp'), the group is set based on * the parent directory and the mode is determined from the 'vap' argument. * If successful, *vpp holds a vnode to the newly created file and zero * is returned. Otherwise *vpp is NULL and the function returns an * appropriate error code. */ int tmpfs_alloc_file(struct vnode *dvp, struct vnode **vpp, struct vattr *vap, struct componentname *cnp, char *target) { int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; struct tmpfs_node *parent; ASSERT_VOP_ELOCKED(dvp, "tmpfs_alloc_file"); MPASS(cnp->cn_flags & HASBUF); tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULL; /* If the entry we are creating is a directory, we cannot overflow * the number of links of its parent, because it will get a new * link. */ if (vap->va_type == VDIR) { /* Ensure that we do not overflow the maximum number of links * imposed by the system. */ MPASS(dnode->tn_links <= LINK_MAX); if (dnode->tn_links == LINK_MAX) { return (EMLINK); } parent = dnode; MPASS(parent != NULL); } else parent = NULL; /* Allocate a node that represents the new file. */ error = tmpfs_alloc_node(dvp->v_mount, tmp, vap->va_type, cnp->cn_cred->cr_uid, dnode->tn_gid, vap->va_mode, parent, target, vap->va_rdev, &node); if (error != 0) return (error); /* Allocate a directory entry that points to the new file. */ error = tmpfs_alloc_dirent(tmp, node, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) { tmpfs_free_node(tmp, node); return (error); } /* Allocate a vnode for the new file. */ error = tmpfs_alloc_vp(dvp->v_mount, node, LK_EXCLUSIVE, vpp); if (error != 0) { tmpfs_free_dirent(tmp, de); tmpfs_free_node(tmp, node); return (error); } /* Now that all required items are allocated, we can proceed to * insert the new node into the directory, an operation that * cannot fail. */ if (cnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(dvp, cnp); tmpfs_dir_attach(dvp, de); return (0); } struct tmpfs_dirent * tmpfs_dir_first(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) { struct tmpfs_dirent *de; de = RB_MIN(tmpfs_dir, &dnode->tn_dir.tn_dirhead); dc->tdc_tree = de; if (de != NULL && tmpfs_dirent_duphead(de)) de = LIST_FIRST(&de->ud.td_duphead); dc->tdc_current = de; return (dc->tdc_current); } struct tmpfs_dirent * tmpfs_dir_next(struct tmpfs_node *dnode, struct tmpfs_dir_cursor *dc) { struct tmpfs_dirent *de; MPASS(dc->tdc_tree != NULL); if (tmpfs_dirent_dup(dc->tdc_current)) { dc->tdc_current = LIST_NEXT(dc->tdc_current, uh.td_dup.entries); if (dc->tdc_current != NULL) return (dc->tdc_current); } dc->tdc_tree = dc->tdc_current = RB_NEXT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, dc->tdc_tree); if ((de = dc->tdc_current) != NULL && tmpfs_dirent_duphead(de)) { dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); MPASS(dc->tdc_current != NULL); } return (dc->tdc_current); } /* Lookup directory entry in RB-Tree. Function may return duphead entry. */ static struct tmpfs_dirent * tmpfs_dir_xlookup_hash(struct tmpfs_node *dnode, uint32_t hash) { struct tmpfs_dirent *de, dekey; dekey.td_hash = hash; de = RB_FIND(tmpfs_dir, &dnode->tn_dir.tn_dirhead, &dekey); return (de); } /* Lookup directory entry by cookie, initialize directory cursor accordingly. */ static struct tmpfs_dirent * tmpfs_dir_lookup_cookie(struct tmpfs_node *node, off_t cookie, struct tmpfs_dir_cursor *dc) { struct tmpfs_dir *dirhead = &node->tn_dir.tn_dirhead; struct tmpfs_dirent *de, dekey; MPASS(cookie >= TMPFS_DIRCOOKIE_MIN); if (cookie == node->tn_dir.tn_readdir_lastn && (de = node->tn_dir.tn_readdir_lastp) != NULL) { /* Protect against possible race, tn_readdir_last[pn] * may be updated with only shared vnode lock held. */ if (cookie == tmpfs_dirent_cookie(de)) goto out; } if ((cookie & TMPFS_DIRCOOKIE_DUP) != 0) { LIST_FOREACH(de, &node->tn_dir.tn_dupindex, uh.td_dup.index_entries) { MPASS(tmpfs_dirent_dup(de)); if (de->td_cookie == cookie) goto out; /* dupindex list is sorted. */ if (de->td_cookie < cookie) { de = NULL; goto out; } } MPASS(de == NULL); goto out; } if ((cookie & TMPFS_DIRCOOKIE_MASK) != cookie) { de = NULL; } else { dekey.td_hash = cookie; /* Recover if direntry for cookie was removed */ de = RB_NFIND(tmpfs_dir, dirhead, &dekey); } dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_duphead(de)) { dc->tdc_current = LIST_FIRST(&de->ud.td_duphead); MPASS(dc->tdc_current != NULL); } return (dc->tdc_current); out: dc->tdc_tree = de; dc->tdc_current = de; if (de != NULL && tmpfs_dirent_dup(de)) dc->tdc_tree = tmpfs_dir_xlookup_hash(node, de->td_hash); return (dc->tdc_current); } /* * Looks for a directory entry in the directory represented by node. * 'cnp' describes the name of the entry to look for. Note that the . * and .. components are not allowed as they do not physically exist * within directories. * * Returns a pointer to the entry when found, otherwise NULL. */ struct tmpfs_dirent * tmpfs_dir_lookup(struct tmpfs_node *node, struct tmpfs_node *f, struct componentname *cnp) { struct tmpfs_dir_duphead *duphead; struct tmpfs_dirent *de; uint32_t hash; MPASS(IMPLIES(cnp->cn_namelen == 1, cnp->cn_nameptr[0] != '.')); MPASS(IMPLIES(cnp->cn_namelen == 2, !(cnp->cn_nameptr[0] == '.' && cnp->cn_nameptr[1] == '.'))); TMPFS_VALIDATE_DIR(node); hash = tmpfs_dirent_hash(cnp->cn_nameptr, cnp->cn_namelen); de = tmpfs_dir_xlookup_hash(node, hash); if (de != NULL && tmpfs_dirent_duphead(de)) { duphead = &de->ud.td_duphead; LIST_FOREACH(de, duphead, uh.td_dup.entries) { if (TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, cnp->cn_namelen)) break; } } else if (de != NULL) { if (!TMPFS_DIRENT_MATCHES(de, cnp->cn_nameptr, cnp->cn_namelen)) de = NULL; } if (de != NULL && f != NULL && de->td_node != f) de = NULL; return (de); } /* * Attach duplicate-cookie directory entry nde to dnode and insert to dupindex * list, allocate new cookie value. */ static void tmpfs_dir_attach_dup(struct tmpfs_node *dnode, struct tmpfs_dir_duphead *duphead, struct tmpfs_dirent *nde) { struct tmpfs_dir_duphead *dupindex; struct tmpfs_dirent *de, *pde; dupindex = &dnode->tn_dir.tn_dupindex; de = LIST_FIRST(dupindex); if (de == NULL || de->td_cookie < TMPFS_DIRCOOKIE_DUP_MAX) { if (de == NULL) nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; else nde->td_cookie = de->td_cookie + 1; MPASS(tmpfs_dirent_dup(nde)); LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } /* * Cookie numbers are near exhaustion. Scan dupindex list for unused * numbers. dupindex list is sorted in descending order. Keep it so * after inserting nde. */ while (1) { pde = de; de = LIST_NEXT(de, uh.td_dup.index_entries); if (de == NULL && pde->td_cookie != TMPFS_DIRCOOKIE_DUP_MIN) { /* * Last element of the index doesn't have minimal cookie * value, use it. */ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MIN; LIST_INSERT_AFTER(pde, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } else if (de == NULL) { /* * We are so lucky have 2^30 hash duplicates in single * directory :) Return largest possible cookie value. * It should be fine except possible issues with * VOP_READDIR restart. */ nde->td_cookie = TMPFS_DIRCOOKIE_DUP_MAX; LIST_INSERT_HEAD(dupindex, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } if (de->td_cookie + 1 == pde->td_cookie || de->td_cookie >= TMPFS_DIRCOOKIE_DUP_MAX) continue; /* No hole or invalid cookie. */ nde->td_cookie = de->td_cookie + 1; MPASS(tmpfs_dirent_dup(nde)); MPASS(pde->td_cookie > nde->td_cookie); MPASS(nde->td_cookie > de->td_cookie); LIST_INSERT_BEFORE(de, nde, uh.td_dup.index_entries); LIST_INSERT_HEAD(duphead, nde, uh.td_dup.entries); return; } } /* * Attaches the directory entry de to the directory represented by vp. * Note that this does not change the link count of the node pointed by * the directory entry, as this is done by tmpfs_alloc_dirent. */ void tmpfs_dir_attach(struct vnode *vp, struct tmpfs_dirent *de) { struct tmpfs_node *dnode; struct tmpfs_dirent *xde, *nde; ASSERT_VOP_ELOCKED(vp, __func__); MPASS(de->td_namelen > 0); MPASS(de->td_hash >= TMPFS_DIRCOOKIE_MIN); MPASS(de->td_cookie == de->td_hash); dnode = VP_TO_TMPFS_DIR(vp); dnode->tn_dir.tn_readdir_lastn = 0; dnode->tn_dir.tn_readdir_lastp = NULL; MPASS(!tmpfs_dirent_dup(de)); xde = RB_INSERT(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); if (xde != NULL && tmpfs_dirent_duphead(xde)) tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); else if (xde != NULL) { /* * Allocate new duphead. Swap xde with duphead to avoid * adding/removing elements with the same hash. */ MPASS(!tmpfs_dirent_dup(xde)); tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), NULL, NULL, 0, &nde); /* *nde = *xde; XXX gcc 4.2.1 may generate invalid code. */ memcpy(nde, xde, sizeof(*xde)); xde->td_cookie |= TMPFS_DIRCOOKIE_DUPHEAD; LIST_INIT(&xde->ud.td_duphead); xde->td_namelen = 0; xde->td_node = NULL; tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, nde); tmpfs_dir_attach_dup(dnode, &xde->ud.td_duphead, de); } dnode->tn_size += sizeof(struct tmpfs_dirent); dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; tmpfs_update(vp); } /* * Detaches the directory entry de from the directory represented by vp. * Note that this does not change the link count of the node pointed by * the directory entry, as this is done by tmpfs_free_dirent. */ void tmpfs_dir_detach(struct vnode *vp, struct tmpfs_dirent *de) { struct tmpfs_mount *tmp; struct tmpfs_dir *head; struct tmpfs_node *dnode; struct tmpfs_dirent *xde; ASSERT_VOP_ELOCKED(vp, __func__); dnode = VP_TO_TMPFS_DIR(vp); head = &dnode->tn_dir.tn_dirhead; dnode->tn_dir.tn_readdir_lastn = 0; dnode->tn_dir.tn_readdir_lastp = NULL; if (tmpfs_dirent_dup(de)) { /* Remove duphead if de was last entry. */ if (LIST_NEXT(de, uh.td_dup.entries) == NULL) { xde = tmpfs_dir_xlookup_hash(dnode, de->td_hash); MPASS(tmpfs_dirent_duphead(xde)); } else xde = NULL; LIST_REMOVE(de, uh.td_dup.entries); LIST_REMOVE(de, uh.td_dup.index_entries); if (xde != NULL) { if (LIST_EMPTY(&xde->ud.td_duphead)) { RB_REMOVE(tmpfs_dir, head, xde); tmp = VFS_TO_TMPFS(vp->v_mount); MPASS(xde->td_node == NULL); tmpfs_free_dirent(tmp, xde); } } de->td_cookie = de->td_hash; } else RB_REMOVE(tmpfs_dir, head, de); dnode->tn_size -= sizeof(struct tmpfs_dirent); dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | \ TMPFS_NODE_MODIFIED; tmpfs_update(vp); } void tmpfs_dir_destroy(struct tmpfs_mount *tmp, struct tmpfs_node *dnode) { struct tmpfs_dirent *de, *dde, *nde; RB_FOREACH_SAFE(de, tmpfs_dir, &dnode->tn_dir.tn_dirhead, nde) { RB_REMOVE(tmpfs_dir, &dnode->tn_dir.tn_dirhead, de); /* Node may already be destroyed. */ de->td_node = NULL; if (tmpfs_dirent_duphead(de)) { while ((dde = LIST_FIRST(&de->ud.td_duphead)) != NULL) { LIST_REMOVE(dde, uh.td_dup.entries); dde->td_node = NULL; tmpfs_free_dirent(tmp, dde); } } tmpfs_free_dirent(tmp, de); } } /* * Helper function for tmpfs_readdir. Creates a '.' entry for the given * directory and returns it in the uio space. The function returns 0 * on success, -1 if there was not enough space in the uio structure to * hold the directory entry or an appropriate error code if another * error happens. */ static int tmpfs_dir_getdotdent(struct tmpfs_node *node, struct uio *uio) { int error; struct dirent dent; TMPFS_VALIDATE_DIR(node); MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOT); dent.d_fileno = node->tn_id; dent.d_type = DT_DIR; dent.d_namlen = 1; dent.d_name[0] = '.'; dent.d_name[1] = '\0'; dent.d_reclen = GENERIC_DIRSIZ(&dent); if (dent.d_reclen > uio->uio_resid) error = EJUSTRETURN; else error = uiomove(&dent, dent.d_reclen, uio); tmpfs_set_status(node, TMPFS_NODE_ACCESSED); return (error); } /* * Helper function for tmpfs_readdir. Creates a '..' entry for the given * directory and returns it in the uio space. The function returns 0 * on success, -1 if there was not enough space in the uio structure to * hold the directory entry or an appropriate error code if another * error happens. */ static int tmpfs_dir_getdotdotdent(struct tmpfs_node *node, struct uio *uio) { int error; struct dirent dent; TMPFS_VALIDATE_DIR(node); MPASS(uio->uio_offset == TMPFS_DIRCOOKIE_DOTDOT); /* * Return ENOENT if the current node is already removed. */ TMPFS_ASSERT_LOCKED(node); if (node->tn_dir.tn_parent == NULL) return (ENOENT); TMPFS_NODE_LOCK(node->tn_dir.tn_parent); dent.d_fileno = node->tn_dir.tn_parent->tn_id; TMPFS_NODE_UNLOCK(node->tn_dir.tn_parent); dent.d_type = DT_DIR; dent.d_namlen = 2; dent.d_name[0] = '.'; dent.d_name[1] = '.'; dent.d_name[2] = '\0'; dent.d_reclen = GENERIC_DIRSIZ(&dent); if (dent.d_reclen > uio->uio_resid) error = EJUSTRETURN; else error = uiomove(&dent, dent.d_reclen, uio); tmpfs_set_status(node, TMPFS_NODE_ACCESSED); return (error); } /* * Helper function for tmpfs_readdir. Returns as much directory entries * as can fit in the uio space. The read starts at uio->uio_offset. * The function returns 0 on success, -1 if there was not enough space * in the uio structure to hold the directory entry or an appropriate * error code if another error happens. */ int tmpfs_dir_getdents(struct tmpfs_node *node, struct uio *uio, int maxcookies, u_long *cookies, int *ncookies) { struct tmpfs_dir_cursor dc; struct tmpfs_dirent *de; off_t off; int error; TMPFS_VALIDATE_DIR(node); off = 0; /* * Lookup the node from the current offset. The starting offset of * 0 will lookup both '.' and '..', and then the first real entry, * or EOF if there are none. Then find all entries for the dir that * fit into the buffer. Once no more entries are found (de == NULL), * the offset is set to TMPFS_DIRCOOKIE_EOF, which will cause the next * call to return 0. */ switch (uio->uio_offset) { case TMPFS_DIRCOOKIE_DOT: error = tmpfs_dir_getdotdent(node, uio); if (error != 0) return (error); uio->uio_offset = TMPFS_DIRCOOKIE_DOTDOT; if (cookies != NULL) cookies[(*ncookies)++] = off = uio->uio_offset; /* FALLTHROUGH */ case TMPFS_DIRCOOKIE_DOTDOT: error = tmpfs_dir_getdotdotdent(node, uio); if (error != 0) return (error); de = tmpfs_dir_first(node, &dc); uio->uio_offset = tmpfs_dirent_cookie(de); if (cookies != NULL) cookies[(*ncookies)++] = off = uio->uio_offset; /* EOF. */ if (de == NULL) return (0); break; case TMPFS_DIRCOOKIE_EOF: return (0); default: de = tmpfs_dir_lookup_cookie(node, uio->uio_offset, &dc); if (de == NULL) return (EINVAL); if (cookies != NULL) off = tmpfs_dirent_cookie(de); } /* Read as much entries as possible; i.e., until we reach the end of * the directory or we exhaust uio space. */ do { struct dirent d; /* Create a dirent structure representing the current * tmpfs_node and fill it. */ if (de->td_node == NULL) { d.d_fileno = 1; d.d_type = DT_WHT; } else { d.d_fileno = de->td_node->tn_id; switch (de->td_node->tn_type) { case VBLK: d.d_type = DT_BLK; break; case VCHR: d.d_type = DT_CHR; break; case VDIR: d.d_type = DT_DIR; break; case VFIFO: d.d_type = DT_FIFO; break; case VLNK: d.d_type = DT_LNK; break; case VREG: d.d_type = DT_REG; break; case VSOCK: d.d_type = DT_SOCK; break; default: panic("tmpfs_dir_getdents: type %p %d", de->td_node, (int)de->td_node->tn_type); } } d.d_namlen = de->td_namelen; MPASS(de->td_namelen < sizeof(d.d_name)); (void)memcpy(d.d_name, de->ud.td_name, de->td_namelen); d.d_name[de->td_namelen] = '\0'; d.d_reclen = GENERIC_DIRSIZ(&d); /* Stop reading if the directory entry we are treating is * bigger than the amount of data that can be returned. */ if (d.d_reclen > uio->uio_resid) { error = EJUSTRETURN; break; } /* Copy the new dirent structure into the output buffer and * advance pointers. */ error = uiomove(&d, d.d_reclen, uio); if (error == 0) { de = tmpfs_dir_next(node, &dc); if (cookies != NULL) { off = tmpfs_dirent_cookie(de); MPASS(*ncookies < maxcookies); cookies[(*ncookies)++] = off; } } } while (error == 0 && uio->uio_resid > 0 && de != NULL); /* Skip setting off when using cookies as it is already done above. */ if (cookies == NULL) off = tmpfs_dirent_cookie(de); /* Update the offset and cache. */ uio->uio_offset = off; node->tn_dir.tn_readdir_lastn = off; node->tn_dir.tn_readdir_lastp = de; tmpfs_set_status(node, TMPFS_NODE_ACCESSED); return error; } int tmpfs_dir_whiteout_add(struct vnode *dvp, struct componentname *cnp) { struct tmpfs_dirent *de; int error; error = tmpfs_alloc_dirent(VFS_TO_TMPFS(dvp->v_mount), NULL, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) return (error); tmpfs_dir_attach(dvp, de); return (0); } void tmpfs_dir_whiteout_remove(struct vnode *dvp, struct componentname *cnp) { struct tmpfs_dirent *de; de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); MPASS(de != NULL && de->td_node == NULL); tmpfs_dir_detach(dvp, de); tmpfs_free_dirent(VFS_TO_TMPFS(dvp->v_mount), de); } /* * Resizes the aobj associated with the regular file pointed to by 'vp' to the * size 'newsize'. 'vp' must point to a vnode that represents a regular file. * 'newsize' must be positive. * * Returns zero on success or an appropriate error code on failure. */ int tmpfs_reg_resize(struct vnode *vp, off_t newsize, boolean_t ignerr) { struct tmpfs_mount *tmp; struct tmpfs_node *node; vm_object_t uobj; vm_page_t m; vm_pindex_t idx, newpages, oldpages; off_t oldsize; int base, rv; MPASS(vp->v_type == VREG); MPASS(newsize >= 0); node = VP_TO_TMPFS_NODE(vp); uobj = node->tn_reg.tn_aobj; tmp = VFS_TO_TMPFS(vp->v_mount); /* * Convert the old and new sizes to the number of pages needed to * store them. It may happen that we do not need to do anything * because the last allocated page can accommodate the change on * its own. */ oldsize = node->tn_size; oldpages = OFF_TO_IDX(oldsize + PAGE_MASK); MPASS(oldpages == uobj->size); newpages = OFF_TO_IDX(newsize + PAGE_MASK); if (__predict_true(newpages == oldpages && newsize >= oldsize)) { node->tn_size = newsize; return (0); } if (newpages > oldpages && tmpfs_pages_check_avail(tmp, newpages - oldpages) == 0) return (ENOSPC); VM_OBJECT_WLOCK(uobj); if (newsize < oldsize) { /* * Zero the truncated part of the last page. */ base = newsize & PAGE_MASK; if (base != 0) { idx = OFF_TO_IDX(newsize); retry: m = vm_page_lookup(uobj, idx); if (m != NULL) { if (vm_page_sleep_if_busy(m, "tmfssz")) goto retry; MPASS(m->valid == VM_PAGE_BITS_ALL); } else if (vm_pager_has_page(uobj, idx, NULL, NULL)) { m = vm_page_alloc(uobj, idx, VM_ALLOC_NORMAL | VM_ALLOC_WAITFAIL); if (m == NULL) goto retry; rv = vm_pager_get_pages(uobj, &m, 1, NULL, NULL); vm_page_lock(m); if (rv == VM_PAGER_OK) { /* * Since the page was not resident, * and therefore not recently * accessed, immediately enqueue it * for asynchronous laundering. The * current operation is not regarded * as an access. */ vm_page_launder(m); vm_page_unlock(m); vm_page_xunbusy(m); } else { vm_page_free(m); vm_page_unlock(m); if (ignerr) m = NULL; else { VM_OBJECT_WUNLOCK(uobj); return (EIO); } } } if (m != NULL) { pmap_zero_page_area(m, base, PAGE_SIZE - base); vm_page_dirty(m); vm_pager_page_unswapped(m); } } /* * Release any swap space and free any whole pages. */ if (newpages < oldpages) { swap_pager_freespace(uobj, newpages, oldpages - newpages); vm_object_page_remove(uobj, newpages, 0, 0); } } uobj->size = newpages; VM_OBJECT_WUNLOCK(uobj); atomic_add_long(&tmp->tm_pages_used, newpages - oldpages); node->tn_size = newsize; return (0); } void tmpfs_check_mtime(struct vnode *vp) { struct tmpfs_node *node; struct vm_object *obj; ASSERT_VOP_ELOCKED(vp, "check_mtime"); if (vp->v_type != VREG) return; obj = vp->v_object; KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); /* unlocked read */ if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { VM_OBJECT_WLOCK(obj); if ((obj->flags & OBJ_TMPFS_DIRTY) != 0) { obj->flags &= ~OBJ_TMPFS_DIRTY; node = VP_TO_TMPFS_NODE(vp); node->tn_status |= TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED; } VM_OBJECT_WUNLOCK(obj); } } /* * Change flags of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chflags(struct vnode *vp, u_long flags, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chflags"); node = VP_TO_TMPFS_NODE(vp); if ((flags & ~(SF_APPEND | SF_ARCHIVED | SF_IMMUTABLE | SF_NOUNLINK | UF_APPEND | UF_ARCHIVE | UF_HIDDEN | UF_IMMUTABLE | UF_NODUMP | UF_NOUNLINK | UF_OFFLINE | UF_OPAQUE | UF_READONLY | UF_REPARSE | UF_SPARSE | UF_SYSTEM)) != 0) return (EOPNOTSUPP); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* * Callers may only modify the file flags on objects they * have VADMIN rights for. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * Unprivileged processes are not permitted to unset system * flags, or modify flags if any system flags are set. */ if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { error = securelevel_gt(cred, 0); if (error) return (error); } } else { if (node->tn_flags & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND) || ((flags ^ node->tn_flags) & SF_SETTABLE)) return (EPERM); } node->tn_flags = flags; node->tn_status |= TMPFS_NODE_CHANGED; ASSERT_VOP_ELOCKED(vp, "chflags2"); return (0); } /* * Change access mode on the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chmod(struct vnode *vp, mode_t mode, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chmod"); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; /* * To modify the permissions on a file, must possess VADMIN * for that file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * Privileged processes may set the sticky bit on non-directories, * as well as set the setgid bit on a file with a group that the * process is not a member of. */ if (vp->v_type != VDIR && (mode & S_ISTXT)) { if (priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0)) return (EFTYPE); } if (!groupmember(node->tn_gid, cred) && (mode & S_ISGID)) { error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); if (error) return (error); } node->tn_mode &= ~ALLPERMS; node->tn_mode |= mode & ALLPERMS; node->tn_status |= TMPFS_NODE_CHANGED; ASSERT_VOP_ELOCKED(vp, "chmod2"); return (0); } /* * Change ownership of the given vnode. At least one of uid or gid must * be different than VNOVAL. If one is set to that value, the attribute * is unchanged. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chown(struct vnode *vp, uid_t uid, gid_t gid, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; uid_t ouid; gid_t ogid; ASSERT_VOP_ELOCKED(vp, "chown"); node = VP_TO_TMPFS_NODE(vp); /* Assign default values if they are unknown. */ MPASS(uid != VNOVAL || gid != VNOVAL); if (uid == VNOVAL) uid = node->tn_uid; if (gid == VNOVAL) gid = node->tn_gid; MPASS(uid != VNOVAL && gid != VNOVAL); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; /* * To modify the ownership of a file, must possess VADMIN for that * file. */ if ((error = VOP_ACCESS(vp, VADMIN, cred, p))) return (error); /* * To change the owner of a file, or change the group of a file to a * group of which we are not a member, the caller must have * privilege. */ if ((uid != node->tn_uid || (gid != node->tn_gid && !groupmember(gid, cred))) && (error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0))) return (error); ogid = node->tn_gid; ouid = node->tn_uid; node->tn_uid = uid; node->tn_gid = gid; node->tn_status |= TMPFS_NODE_CHANGED; if ((node->tn_mode & (S_ISUID | S_ISGID)) && (ouid != uid || ogid != gid)) { if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); } ASSERT_VOP_ELOCKED(vp, "chown2"); return (0); } /* * Change size of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chsize(struct vnode *vp, u_quad_t size, struct ucred *cred, struct thread *p) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chsize"); node = VP_TO_TMPFS_NODE(vp); /* Decide whether this is a valid operation based on the file type. */ error = 0; switch (vp->v_type) { case VDIR: return EISDIR; case VREG: if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VFIFO: /* Allow modifications of special files even if in the file * system is mounted read-only (we are not modifying the * files themselves, but the objects they represent). */ return 0; default: /* Anything else is unsupported. */ return EOPNOTSUPP; } /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; error = tmpfs_truncate(vp, size); /* tmpfs_truncate will raise the NOTE_EXTEND and NOTE_ATTRIB kevents * for us, as will update tn_status; no need to do that here. */ ASSERT_VOP_ELOCKED(vp, "chsize2"); return (error); } /* * Change access and modification times of the given vnode. * Caller should execute tmpfs_update on vp after a successful execution. * The vnode must be locked on entry and remain locked on exit. */ int tmpfs_chtimes(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *l) { int error; struct tmpfs_node *node; ASSERT_VOP_ELOCKED(vp, "chtimes"); node = VP_TO_TMPFS_NODE(vp); /* Disallow this operation if the file system is mounted read-only. */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return EROFS; /* Immutable or append-only files cannot be modified, either. */ if (node->tn_flags & (IMMUTABLE | APPEND)) return EPERM; error = vn_utimes_perm(vp, vap, cred, l); if (error != 0) return (error); if (vap->va_atime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_ACCESSED; if (vap->va_mtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_status |= TMPFS_NODE_MODIFIED; tmpfs_itimes(vp, &vap->va_atime, &vap->va_mtime); if (vap->va_birthtime.tv_sec != VNOVAL) node->tn_birthtime = vap->va_birthtime; ASSERT_VOP_ELOCKED(vp, "chtimes2"); return (0); } void tmpfs_set_status(struct tmpfs_node *node, int status) { if ((node->tn_status & status) == status) return; TMPFS_NODE_LOCK(node); node->tn_status |= status; TMPFS_NODE_UNLOCK(node); } /* Sync timestamps */ void tmpfs_itimes(struct vnode *vp, const struct timespec *acc, const struct timespec *mod) { struct tmpfs_node *node; struct timespec now; ASSERT_VOP_LOCKED(vp, "tmpfs_itimes"); node = VP_TO_TMPFS_NODE(vp); if ((node->tn_status & (TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED)) == 0) return; vfs_timestamp(&now); TMPFS_NODE_LOCK(node); if (node->tn_status & TMPFS_NODE_ACCESSED) { if (acc == NULL) acc = &now; node->tn_atime = *acc; } if (node->tn_status & TMPFS_NODE_MODIFIED) { if (mod == NULL) mod = &now; node->tn_mtime = *mod; } if (node->tn_status & TMPFS_NODE_CHANGED) node->tn_ctime = now; node->tn_status &= ~(TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED); TMPFS_NODE_UNLOCK(node); /* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */ random_harvest_queue(node, sizeof(*node), 1, RANDOM_FS_ATIME); } void tmpfs_update(struct vnode *vp) { tmpfs_itimes(vp, NULL, NULL); } int tmpfs_truncate(struct vnode *vp, off_t length) { int error; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); if (length < 0) { error = EINVAL; goto out; } if (node->tn_size == length) { error = 0; goto out; } if (length > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); error = tmpfs_reg_resize(vp, length, FALSE); if (error == 0) node->tn_status |= TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; out: tmpfs_update(vp); return (error); } static __inline int tmpfs_dirtree_cmp(struct tmpfs_dirent *a, struct tmpfs_dirent *b) { if (a->td_hash > b->td_hash) return (1); else if (a->td_hash < b->td_hash) return (-1); return (0); } RB_GENERATE_STATIC(tmpfs_dir, tmpfs_dirent, uh.td_entries, tmpfs_dirtree_cmp); Index: head/sys/fs/tmpfs/tmpfs_vfsops.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_vfsops.c (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_vfsops.c (revision 326268) @@ -1,502 +1,504 @@ /* $NetBSD: tmpfs_vfsops.c,v 1.10 2005/12/11 12:24:29 christos Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * Efficient memory file system. * * tmpfs is a file system that uses FreeBSD's virtual memory * sub-system to store file data and metadata in an efficient way. * This means that it does not follow the structure of an on-disk file * system because it simply does not need to. Instead, it uses * memory-specific data structures and algorithms to automatically * allocate and release resources. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Default permission for root node */ #define TMPFS_DEFAULT_ROOT_MODE (S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) MALLOC_DEFINE(M_TMPFSMNT, "tmpfs mount", "tmpfs mount structures"); MALLOC_DEFINE(M_TMPFSNAME, "tmpfs name", "tmpfs file names"); static int tmpfs_mount(struct mount *); static int tmpfs_unmount(struct mount *, int); static int tmpfs_root(struct mount *, int flags, struct vnode **); static int tmpfs_fhtovp(struct mount *, struct fid *, int, struct vnode **); static int tmpfs_statfs(struct mount *, struct statfs *); static void tmpfs_susp_clean(struct mount *); static const char *tmpfs_opts[] = { "from", "size", "maxfilesize", "inodes", "uid", "gid", "mode", "export", "union", "nonc", NULL }; static const char *tmpfs_updateopts[] = { "from", "export", NULL }; static int tmpfs_node_ctor(void *mem, int size, void *arg, int flags) { struct tmpfs_node *node = (struct tmpfs_node *)mem; node->tn_gen++; node->tn_size = 0; node->tn_status = 0; node->tn_flags = 0; node->tn_links = 0; node->tn_vnode = NULL; node->tn_vpstate = 0; return (0); } static void tmpfs_node_dtor(void *mem, int size, void *arg) { struct tmpfs_node *node = (struct tmpfs_node *)mem; node->tn_type = VNON; } static int tmpfs_node_init(void *mem, int size, int flags) { struct tmpfs_node *node = (struct tmpfs_node *)mem; node->tn_id = 0; mtx_init(&node->tn_interlock, "tmpfs node interlock", NULL, MTX_DEF); node->tn_gen = arc4random(); return (0); } static void tmpfs_node_fini(void *mem, int size) { struct tmpfs_node *node = (struct tmpfs_node *)mem; mtx_destroy(&node->tn_interlock); } static int tmpfs_mount(struct mount *mp) { const size_t nodes_per_page = howmany(PAGE_SIZE, sizeof(struct tmpfs_dirent) + sizeof(struct tmpfs_node)); struct tmpfs_mount *tmp; struct tmpfs_node *root; struct thread *td = curthread; int error; bool nonc; /* Size counters. */ u_quad_t pages; off_t nodes_max, size_max, maxfilesize; /* Root node attributes. */ uid_t root_uid; gid_t root_gid; mode_t root_mode; struct vattr va; if (!prison_allow(td->td_ucred, PR_ALLOW_MOUNT_TMPFS)) return (EPERM); if (vfs_filteropt(mp->mnt_optnew, tmpfs_opts)) return (EINVAL); if (mp->mnt_flag & MNT_UPDATE) { /* Only support update mounts for certain options. */ if (vfs_filteropt(mp->mnt_optnew, tmpfs_updateopts) != 0) return (EOPNOTSUPP); if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) != ((struct tmpfs_mount *)mp->mnt_data)->tm_ronly) return (EOPNOTSUPP); return (0); } vn_lock(mp->mnt_vnodecovered, LK_SHARED | LK_RETRY); error = VOP_GETATTR(mp->mnt_vnodecovered, &va, mp->mnt_cred); VOP_UNLOCK(mp->mnt_vnodecovered, 0); if (error) return (error); if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "gid", "%d", &root_gid) != 1) root_gid = va.va_gid; if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "uid", "%d", &root_uid) != 1) root_uid = va.va_uid; if (mp->mnt_cred->cr_ruid != 0 || vfs_scanopt(mp->mnt_optnew, "mode", "%ho", &root_mode) != 1) root_mode = va.va_mode; if (vfs_getopt_size(mp->mnt_optnew, "inodes", &nodes_max) != 0) nodes_max = 0; if (vfs_getopt_size(mp->mnt_optnew, "size", &size_max) != 0) size_max = 0; if (vfs_getopt_size(mp->mnt_optnew, "maxfilesize", &maxfilesize) != 0) maxfilesize = 0; nonc = vfs_getopt(mp->mnt_optnew, "nonc", NULL, NULL) == 0; /* Do not allow mounts if we do not have enough memory to preserve * the minimum reserved pages. */ if (tmpfs_mem_avail() < TMPFS_PAGES_MINRESERVED) return (ENOSPC); /* Get the maximum number of memory pages this file system is * allowed to use, based on the maximum size the user passed in * the mount structure. A value of zero is treated as if the * maximum available space was requested. */ if (size_max == 0 || size_max > OFF_MAX - PAGE_SIZE || (SIZE_MAX < OFF_MAX && size_max / PAGE_SIZE >= SIZE_MAX)) pages = SIZE_MAX; else { size_max = roundup(size_max, PAGE_SIZE); pages = howmany(size_max, PAGE_SIZE); } MPASS(pages > 0); if (nodes_max <= 3) { if (pages < INT_MAX / nodes_per_page) nodes_max = pages * nodes_per_page; else nodes_max = INT_MAX; } if (nodes_max > INT_MAX) nodes_max = INT_MAX; MPASS(nodes_max >= 3); /* Allocate the tmpfs mount structure and fill it. */ tmp = (struct tmpfs_mount *)malloc(sizeof(struct tmpfs_mount), M_TMPFSMNT, M_WAITOK | M_ZERO); mtx_init(&tmp->tm_allnode_lock, "tmpfs allnode lock", NULL, MTX_DEF); tmp->tm_nodes_max = nodes_max; tmp->tm_nodes_inuse = 0; tmp->tm_refcount = 1; tmp->tm_maxfilesize = maxfilesize > 0 ? maxfilesize : OFF_MAX; LIST_INIT(&tmp->tm_nodes_used); tmp->tm_pages_max = pages; tmp->tm_pages_used = 0; tmp->tm_ino_unr = new_unrhdr(2, INT_MAX, &tmp->tm_allnode_lock); tmp->tm_dirent_pool = uma_zcreate("TMPFS dirent", sizeof(struct tmpfs_dirent), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); tmp->tm_node_pool = uma_zcreate("TMPFS node", sizeof(struct tmpfs_node), tmpfs_node_ctor, tmpfs_node_dtor, tmpfs_node_init, tmpfs_node_fini, UMA_ALIGN_PTR, 0); tmp->tm_ronly = (mp->mnt_flag & MNT_RDONLY) != 0; tmp->tm_nonc = nonc; /* Allocate the root node. */ error = tmpfs_alloc_node(mp, tmp, VDIR, root_uid, root_gid, root_mode & ALLPERMS, NULL, NULL, VNOVAL, &root); if (error != 0 || root == NULL) { uma_zdestroy(tmp->tm_node_pool); uma_zdestroy(tmp->tm_dirent_pool); delete_unrhdr(tmp->tm_ino_unr); free(tmp, M_TMPFSMNT); return (error); } KASSERT(root->tn_id == 2, ("tmpfs root with invalid ino: %ju", (uintmax_t)root->tn_id)); tmp->tm_root = root; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; MNT_IUNLOCK(mp); mp->mnt_data = tmp; mp->mnt_stat.f_namemax = MAXNAMLEN; vfs_getnewfsid(mp); vfs_mountedfrom(mp, "tmpfs"); return 0; } /* ARGSUSED2 */ static int tmpfs_unmount(struct mount *mp, int mntflags) { struct tmpfs_mount *tmp; struct tmpfs_node *node; int error, flags; flags = (mntflags & MNT_FORCE) != 0 ? FORCECLOSE : 0; tmp = VFS_TO_TMPFS(mp); /* Stop writers */ error = vfs_write_suspend_umnt(mp); if (error != 0) return (error); /* * At this point, nodes cannot be destroyed by any other * thread because write suspension is started. */ for (;;) { error = vflush(mp, 0, flags, curthread); if (error != 0) { vfs_write_resume(mp, VR_START_WRITE); return (error); } MNT_ILOCK(mp); if (mp->mnt_nvnodelistsize == 0) { MNT_IUNLOCK(mp); break; } MNT_IUNLOCK(mp); if ((mntflags & MNT_FORCE) == 0) { vfs_write_resume(mp, VR_START_WRITE); return (EBUSY); } } TMPFS_LOCK(tmp); while ((node = LIST_FIRST(&tmp->tm_nodes_used)) != NULL) { TMPFS_NODE_LOCK(node); if (node->tn_type == VDIR) tmpfs_dir_destroy(tmp, node); if (tmpfs_free_node_locked(tmp, node, true)) TMPFS_LOCK(tmp); else TMPFS_NODE_UNLOCK(node); } mp->mnt_data = NULL; tmpfs_free_tmp(tmp); vfs_write_resume(mp, VR_START_WRITE); MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (0); } void tmpfs_free_tmp(struct tmpfs_mount *tmp) { MPASS(tmp->tm_refcount > 0); tmp->tm_refcount--; if (tmp->tm_refcount > 0) { TMPFS_UNLOCK(tmp); return; } TMPFS_UNLOCK(tmp); uma_zdestroy(tmp->tm_dirent_pool); uma_zdestroy(tmp->tm_node_pool); clear_unrhdr(tmp->tm_ino_unr); delete_unrhdr(tmp->tm_ino_unr); mtx_destroy(&tmp->tm_allnode_lock); MPASS(tmp->tm_pages_used == 0); MPASS(tmp->tm_nodes_inuse == 0); free(tmp, M_TMPFSMNT); } static int tmpfs_root(struct mount *mp, int flags, struct vnode **vpp) { int error; error = tmpfs_alloc_vp(mp, VFS_TO_TMPFS(mp)->tm_root, flags, vpp); if (error == 0) (*vpp)->v_vflag |= VV_ROOT; return (error); } static int tmpfs_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct tmpfs_fid *tfhp; struct tmpfs_mount *tmp; struct tmpfs_node *node; int error; tmp = VFS_TO_TMPFS(mp); tfhp = (struct tmpfs_fid *)fhp; if (tfhp->tf_len != sizeof(struct tmpfs_fid)) return (EINVAL); if (tfhp->tf_id >= tmp->tm_nodes_max) return (EINVAL); TMPFS_LOCK(tmp); LIST_FOREACH(node, &tmp->tm_nodes_used, tn_entries) { if (node->tn_id == tfhp->tf_id && node->tn_gen == tfhp->tf_gen) { tmpfs_ref_node(node); break; } } TMPFS_UNLOCK(tmp); if (node != NULL) { error = tmpfs_alloc_vp(mp, node, LK_EXCLUSIVE, vpp); tmpfs_free_node(tmp, node); } else error = EINVAL; return (error); } /* ARGSUSED2 */ static int tmpfs_statfs(struct mount *mp, struct statfs *sbp) { struct tmpfs_mount *tmp; size_t used; tmp = VFS_TO_TMPFS(mp); sbp->f_iosize = PAGE_SIZE; sbp->f_bsize = PAGE_SIZE; used = tmpfs_pages_used(tmp); if (tmp->tm_pages_max != ULONG_MAX) sbp->f_blocks = tmp->tm_pages_max; else sbp->f_blocks = used + tmpfs_mem_avail(); if (sbp->f_blocks <= used) sbp->f_bavail = 0; else sbp->f_bavail = sbp->f_blocks - used; sbp->f_bfree = sbp->f_bavail; used = tmp->tm_nodes_inuse; sbp->f_files = tmp->tm_nodes_max; if (sbp->f_files <= used) sbp->f_ffree = 0; else sbp->f_ffree = sbp->f_files - used; /* sbp->f_owner = tmp->tn_uid; */ return 0; } static int tmpfs_sync(struct mount *mp, int waitfor) { struct vnode *vp, *mvp; struct vm_object *obj; if (waitfor == MNT_SUSPEND) { MNT_ILOCK(mp); mp->mnt_kern_flag |= MNTK_SUSPEND2 | MNTK_SUSPENDED; MNT_IUNLOCK(mp); } else if (waitfor == MNT_LAZY) { /* * Handle lazy updates of mtime from writes to mmaped * regions. Use MNT_VNODE_FOREACH_ALL instead of * MNT_VNODE_FOREACH_ACTIVE, since unmap of the * tmpfs-backed vnode does not call vinactive(), due * to vm object type is OBJT_SWAP. */ MNT_VNODE_FOREACH_ALL(vp, mp, mvp) { if (vp->v_type != VREG) { VI_UNLOCK(vp); continue; } obj = vp->v_object; KASSERT((obj->flags & (OBJ_TMPFS_NODE | OBJ_TMPFS)) == (OBJ_TMPFS_NODE | OBJ_TMPFS), ("non-tmpfs obj")); /* * Unlocked read, avoid taking vnode lock if * not needed. Lost update will be handled on * the next call. */ if ((obj->flags & OBJ_TMPFS_DIRTY) == 0) { VI_UNLOCK(vp); continue; } if (vget(vp, LK_EXCLUSIVE | LK_RETRY | LK_INTERLOCK, curthread) != 0) continue; tmpfs_check_mtime(vp); vput(vp); } } return (0); } /* * The presence of a susp_clean method tells the VFS to track writes. */ static void tmpfs_susp_clean(struct mount *mp __unused) { } /* * tmpfs vfs operations. */ struct vfsops tmpfs_vfsops = { .vfs_mount = tmpfs_mount, .vfs_unmount = tmpfs_unmount, .vfs_root = tmpfs_root, .vfs_statfs = tmpfs_statfs, .vfs_fhtovp = tmpfs_fhtovp, .vfs_sync = tmpfs_sync, .vfs_susp_clean = tmpfs_susp_clean, }; VFS_SET(tmpfs_vfsops, tmpfs, VFCF_JAIL); Index: head/sys/fs/tmpfs/tmpfs_vnops.c =================================================================== --- head/sys/fs/tmpfs/tmpfs_vnops.c (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_vnops.c (revision 326268) @@ -1,1572 +1,1574 @@ /* $NetBSD: tmpfs_vnops.c,v 1.39 2007/07/23 15:41:01 jmmv Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005, 2006 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /* * tmpfs vnode interface. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include SYSCTL_DECL(_vfs_tmpfs); static volatile int tmpfs_rename_restarts; SYSCTL_INT(_vfs_tmpfs, OID_AUTO, rename_restarts, CTLFLAG_RD, __DEVOLATILE(int *, &tmpfs_rename_restarts), 0, "Times rename had to restart due to lock contention"); static int tmpfs_vn_get_ino_alloc(struct mount *mp, void *arg, int lkflags, struct vnode **rvp) { return (tmpfs_alloc_vp(mp, arg, lkflags, rvp)); } static int tmpfs_lookup1(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) { struct tmpfs_dirent *de; struct tmpfs_node *dnode, *pnode; struct tmpfs_mount *tm; int error; dnode = VP_TO_TMPFS_DIR(dvp); *vpp = NULLVP; /* Check accessibility of requested node as a first step. */ error = VOP_ACCESS(dvp, VEXEC, cnp->cn_cred, cnp->cn_thread); if (error != 0) goto out; /* We cannot be requesting the parent directory of the root node. */ MPASS(IMPLIES(dnode->tn_type == VDIR && dnode->tn_dir.tn_parent == dnode, !(cnp->cn_flags & ISDOTDOT))); TMPFS_ASSERT_LOCKED(dnode); if (dnode->tn_dir.tn_parent == NULL) { error = ENOENT; goto out; } if (cnp->cn_flags & ISDOTDOT) { tm = VFS_TO_TMPFS(dvp->v_mount); pnode = dnode->tn_dir.tn_parent; tmpfs_ref_node(pnode); error = vn_vget_ino_gen(dvp, tmpfs_vn_get_ino_alloc, pnode, cnp->cn_lkflags, vpp); tmpfs_free_node(tm, pnode); if (error != 0) goto out; } else if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') { VREF(dvp); *vpp = dvp; error = 0; } else { de = tmpfs_dir_lookup(dnode, NULL, cnp); if (de != NULL && de->td_node == NULL) cnp->cn_flags |= ISWHITEOUT; if (de == NULL || de->td_node == NULL) { /* * The entry was not found in the directory. * This is OK if we are creating or renaming an * entry and are working on the last component of * the path name. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == CREATE || \ cnp->cn_nameiop == RENAME || (cnp->cn_nameiop == DELETE && cnp->cn_flags & DOWHITEOUT && cnp->cn_flags & ISWHITEOUT))) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error != 0) goto out; /* * Keep the component name in the buffer for * future uses. */ cnp->cn_flags |= SAVENAME; error = EJUSTRETURN; } else error = ENOENT; } else { struct tmpfs_node *tnode; /* * The entry was found, so get its associated * tmpfs_node. */ tnode = de->td_node; /* * If we are not at the last path component and * found a non-directory or non-link entry (which * may itself be pointing to a directory), raise * an error. */ if ((tnode->tn_type != VDIR && tnode->tn_type != VLNK) && !(cnp->cn_flags & ISLASTCN)) { error = ENOTDIR; goto out; } /* * If we are deleting or renaming the entry, keep * track of its tmpfs_dirent so that it can be * easily deleted later. */ if ((cnp->cn_flags & ISLASTCN) && (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { error = VOP_ACCESS(dvp, VWRITE, cnp->cn_cred, cnp->cn_thread); if (error != 0) goto out; /* Allocate a new vnode on the matching entry. */ error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp); if (error != 0) goto out; if ((dnode->tn_mode & S_ISTXT) && VOP_ACCESS(dvp, VADMIN, cnp->cn_cred, cnp->cn_thread) && VOP_ACCESS(*vpp, VADMIN, cnp->cn_cred, cnp->cn_thread)) { error = EPERM; vput(*vpp); *vpp = NULL; goto out; } cnp->cn_flags |= SAVENAME; } else { error = tmpfs_alloc_vp(dvp->v_mount, tnode, cnp->cn_lkflags, vpp); if (error != 0) goto out; } } } /* * Store the result of this lookup in the cache. Avoid this if the * request was for creation, as it does not improve timings on * emprical tests. */ if ((cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) cache_enter(dvp, *vpp, cnp); out: /* * If there were no errors, *vpp cannot be null and it must be * locked. */ MPASS(IFF(error == 0, *vpp != NULLVP && VOP_ISLOCKED(*vpp))); return (error); } static int tmpfs_cached_lookup(struct vop_cachedlookup_args *v) { return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp)); } static int tmpfs_lookup(struct vop_lookup_args *v) { return (tmpfs_lookup1(v->a_dvp, v->a_vpp, v->a_cnp)); } static int tmpfs_create(struct vop_create_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; int error; MPASS(vap->va_type == VREG || vap->va_type == VSOCK); error = tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); if (error == 0 && (cnp->cn_flags & MAKEENTRY) != 0 && tmpfs_use_nc(dvp)) cache_enter(dvp, *vpp, cnp); return (error); } static int tmpfs_mknod(struct vop_mknod_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; if (vap->va_type != VBLK && vap->va_type != VCHR && vap->va_type != VFIFO) return EINVAL; return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } static int tmpfs_open(struct vop_open_args *v) { struct vnode *vp = v->a_vp; int mode = v->a_mode; int error; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); /* The file is still active but all its names have been removed * (e.g. by a "rmdir $(pwd)"). It cannot be opened any more as * it is about to die. */ if (node->tn_links < 1) return (ENOENT); /* If the file is marked append-only, deny write requests. */ if (node->tn_flags & APPEND && (mode & (FWRITE | O_APPEND)) == FWRITE) error = EPERM; else { error = 0; /* For regular files, the call below is nop. */ KASSERT(vp->v_type != VREG || (node->tn_reg.tn_aobj->flags & OBJ_DEAD) == 0, ("dead object")); vnode_create_vobject(vp, node->tn_size, v->a_td); } MPASS(VOP_ISLOCKED(vp)); return error; } static int tmpfs_close(struct vop_close_args *v) { struct vnode *vp = v->a_vp; /* Update node times. */ tmpfs_update(vp); return (0); } int tmpfs_access(struct vop_access_args *v) { struct vnode *vp = v->a_vp; accmode_t accmode = v->a_accmode; struct ucred *cred = v->a_cred; int error; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(vp)); node = VP_TO_TMPFS_NODE(vp); switch (vp->v_type) { case VDIR: /* FALLTHROUGH */ case VLNK: /* FALLTHROUGH */ case VREG: if (accmode & VWRITE && vp->v_mount->mnt_flag & MNT_RDONLY) { error = EROFS; goto out; } break; case VBLK: /* FALLTHROUGH */ case VCHR: /* FALLTHROUGH */ case VSOCK: /* FALLTHROUGH */ case VFIFO: break; default: error = EINVAL; goto out; } if (accmode & VWRITE && node->tn_flags & IMMUTABLE) { error = EPERM; goto out; } error = vaccess(vp->v_type, node->tn_mode, node->tn_uid, node->tn_gid, accmode, cred, NULL); out: MPASS(VOP_ISLOCKED(vp)); return error; } int tmpfs_getattr(struct vop_getattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; vm_object_t obj; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmpfs_update(vp); vap->va_type = vp->v_type; vap->va_mode = node->tn_mode; vap->va_nlink = node->tn_links; vap->va_uid = node->tn_uid; vap->va_gid = node->tn_gid; vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; vap->va_fileid = node->tn_id; vap->va_size = node->tn_size; vap->va_blocksize = PAGE_SIZE; vap->va_atime = node->tn_atime; vap->va_mtime = node->tn_mtime; vap->va_ctime = node->tn_ctime; vap->va_birthtime = node->tn_birthtime; vap->va_gen = node->tn_gen; vap->va_flags = node->tn_flags; vap->va_rdev = (vp->v_type == VBLK || vp->v_type == VCHR) ? node->tn_rdev : NODEV; if (vp->v_type == VREG) { obj = node->tn_reg.tn_aobj; vap->va_bytes = (u_quad_t)obj->resident_page_count * PAGE_SIZE; } else vap->va_bytes = node->tn_size; vap->va_filerev = 0; return 0; } int tmpfs_setattr(struct vop_setattr_args *v) { struct vnode *vp = v->a_vp; struct vattr *vap = v->a_vap; struct ucred *cred = v->a_cred; struct thread *td = curthread; int error; MPASS(VOP_ISLOCKED(vp)); error = 0; /* Abort if any unsettable attribute is given. */ if (vap->va_type != VNON || vap->va_nlink != VNOVAL || vap->va_fsid != VNOVAL || vap->va_fileid != VNOVAL || vap->va_blocksize != VNOVAL || vap->va_gen != VNOVAL || vap->va_rdev != VNOVAL || vap->va_bytes != VNOVAL) error = EINVAL; if (error == 0 && (vap->va_flags != VNOVAL)) error = tmpfs_chflags(vp, vap->va_flags, cred, td); if (error == 0 && (vap->va_size != VNOVAL)) error = tmpfs_chsize(vp, vap->va_size, cred, td); if (error == 0 && (vap->va_uid != VNOVAL || vap->va_gid != VNOVAL)) error = tmpfs_chown(vp, vap->va_uid, vap->va_gid, cred, td); if (error == 0 && (vap->va_mode != (mode_t)VNOVAL)) error = tmpfs_chmod(vp, vap->va_mode, cred, td); if (error == 0 && ((vap->va_atime.tv_sec != VNOVAL && vap->va_atime.tv_nsec != VNOVAL) || (vap->va_mtime.tv_sec != VNOVAL && vap->va_mtime.tv_nsec != VNOVAL) || (vap->va_birthtime.tv_sec != VNOVAL && vap->va_birthtime.tv_nsec != VNOVAL))) error = tmpfs_chtimes(vp, vap, cred, td); /* Update the node times. We give preference to the error codes * generated by this function rather than the ones that may arise * from tmpfs_update. */ tmpfs_update(vp); MPASS(VOP_ISLOCKED(vp)); return error; } static int tmpfs_read(struct vop_read_args *v) { struct vnode *vp; struct uio *uio; struct tmpfs_node *node; vp = v->a_vp; if (vp->v_type != VREG) return (EISDIR); uio = v->a_uio; if (uio->uio_offset < 0) return (EINVAL); node = VP_TO_TMPFS_NODE(vp); tmpfs_set_status(node, TMPFS_NODE_ACCESSED); return (uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio)); } static int tmpfs_write(struct vop_write_args *v) { struct vnode *vp; struct uio *uio; struct tmpfs_node *node; off_t oldsize; int error, ioflag; vp = v->a_vp; uio = v->a_uio; ioflag = v->a_ioflag; error = 0; node = VP_TO_TMPFS_NODE(vp); oldsize = node->tn_size; if (uio->uio_offset < 0 || vp->v_type != VREG) return (EINVAL); if (uio->uio_resid == 0) return (0); if (ioflag & IO_APPEND) uio->uio_offset = node->tn_size; if (uio->uio_offset + uio->uio_resid > VFS_TO_TMPFS(vp->v_mount)->tm_maxfilesize) return (EFBIG); if (vn_rlimit_fsize(vp, uio, uio->uio_td)) return (EFBIG); if (uio->uio_offset + uio->uio_resid > node->tn_size) { error = tmpfs_reg_resize(vp, uio->uio_offset + uio->uio_resid, FALSE); if (error != 0) goto out; } error = uiomove_object(node->tn_reg.tn_aobj, node->tn_size, uio); node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_MODIFIED | TMPFS_NODE_CHANGED; if (node->tn_mode & (S_ISUID | S_ISGID)) { if (priv_check_cred(v->a_cred, PRIV_VFS_RETAINSUGID, 0)) node->tn_mode &= ~(S_ISUID | S_ISGID); } if (error != 0) (void)tmpfs_reg_resize(vp, oldsize, TRUE); out: MPASS(IMPLIES(error == 0, uio->uio_resid == 0)); MPASS(IMPLIES(error != 0, oldsize == node->tn_size)); return (error); } static int tmpfs_fsync(struct vop_fsync_args *v) { struct vnode *vp = v->a_vp; MPASS(VOP_ISLOCKED(vp)); tmpfs_check_mtime(vp); tmpfs_update(vp); return 0; } static int tmpfs_remove(struct vop_remove_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(VOP_ISLOCKED(vp)); if (vp->v_type == VDIR) { error = EISDIR; goto out; } dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); de = tmpfs_dir_lookup(dnode, node, v->a_cnp); MPASS(de != NULL); /* Files marked as immutable or append-only cannot be deleted. */ if ((node->tn_flags & (IMMUTABLE | APPEND | NOUNLINK)) || (dnode->tn_flags & APPEND)) { error = EPERM; goto out; } /* Remove the entry from the directory; as it is a file, we do not * have to change the number of hard links of the directory. */ tmpfs_dir_detach(dvp, de); if (v->a_cnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(dvp, v->a_cnp); /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED; error = 0; out: return error; } static int tmpfs_link(struct vop_link_args *v) { struct vnode *dvp = v->a_tdvp; struct vnode *vp = v->a_vp; struct componentname *cnp = v->a_cnp; int error; struct tmpfs_dirent *de; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(cnp->cn_flags & HASBUF); MPASS(dvp != vp); /* XXX When can this be false? */ node = VP_TO_TMPFS_NODE(vp); /* Ensure that we do not overflow the maximum number of links imposed * by the system. */ MPASS(node->tn_links <= LINK_MAX); if (node->tn_links == LINK_MAX) { error = EMLINK; goto out; } /* We cannot create links of files marked immutable or append-only. */ if (node->tn_flags & (IMMUTABLE | APPEND)) { error = EPERM; goto out; } /* Allocate a new directory entry to represent the node. */ error = tmpfs_alloc_dirent(VFS_TO_TMPFS(vp->v_mount), node, cnp->cn_nameptr, cnp->cn_namelen, &de); if (error != 0) goto out; /* Insert the new directory entry into the appropriate directory. */ if (cnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(dvp, cnp); tmpfs_dir_attach(dvp, de); /* vp link count has changed, so update node times. */ node->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(vp); error = 0; out: return error; } /* * We acquire all but fdvp locks using non-blocking acquisitions. If we * fail to acquire any lock in the path we will drop all held locks, * acquire the new lock in a blocking fashion, and then release it and * restart the rename. This acquire/release step ensures that we do not * spin on a lock waiting for release. On error release all vnode locks * and decrement references the way tmpfs_rename() would do. */ static int tmpfs_rename_relock(struct vnode *fdvp, struct vnode **fvpp, struct vnode *tdvp, struct vnode **tvpp, struct componentname *fcnp, struct componentname *tcnp) { struct vnode *nvp; struct mount *mp; struct tmpfs_dirent *de; int error, restarts = 0; VOP_UNLOCK(tdvp, 0); if (*tvpp != NULL && *tvpp != tdvp) VOP_UNLOCK(*tvpp, 0); mp = fdvp->v_mount; relock: restarts += 1; error = vn_lock(fdvp, LK_EXCLUSIVE); if (error) goto releout; if (vn_lock(tdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { VOP_UNLOCK(fdvp, 0); error = vn_lock(tdvp, LK_EXCLUSIVE); if (error) goto releout; VOP_UNLOCK(tdvp, 0); goto relock; } /* * Re-resolve fvp to be certain it still exists and fetch the * correct vnode. */ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(fdvp), NULL, fcnp); if (de == NULL) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) error = EINVAL; else error = ENOENT; goto releout; } error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (error != 0) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); if (error != EBUSY) goto releout; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp, 0); /* * Concurrent rename race. */ if (nvp == tdvp) { vrele(nvp); error = EINVAL; goto releout; } vrele(*fvpp); *fvpp = nvp; goto relock; } vrele(*fvpp); *fvpp = nvp; VOP_UNLOCK(*fvpp, 0); /* * Re-resolve tvp and acquire the vnode lock if present. */ de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(tdvp), NULL, tcnp); /* * If tvp disappeared we just carry on. */ if (de == NULL && *tvpp != NULL) { vrele(*tvpp); *tvpp = NULL; } /* * Get the tvp ino if the lookup succeeded. We may have to restart * if the non-blocking acquire fails. */ if (de != NULL) { nvp = NULL; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE | LK_NOWAIT, &nvp); if (*tvpp != NULL) vrele(*tvpp); *tvpp = nvp; if (error != 0) { VOP_UNLOCK(fdvp, 0); VOP_UNLOCK(tdvp, 0); if (error != EBUSY) goto releout; error = tmpfs_alloc_vp(mp, de->td_node, LK_EXCLUSIVE, &nvp); if (error != 0) goto releout; VOP_UNLOCK(nvp, 0); /* * fdvp contains fvp, thus tvp (=fdvp) is not empty. */ if (nvp == fdvp) { error = ENOTEMPTY; goto releout; } goto relock; } } tmpfs_rename_restarts += restarts; return (0); releout: vrele(fdvp); vrele(*fvpp); vrele(tdvp); if (*tvpp != NULL) vrele(*tvpp); tmpfs_rename_restarts += restarts; return (error); } static int tmpfs_rename(struct vop_rename_args *v) { struct vnode *fdvp = v->a_fdvp; struct vnode *fvp = v->a_fvp; struct componentname *fcnp = v->a_fcnp; struct vnode *tdvp = v->a_tdvp; struct vnode *tvp = v->a_tvp; struct componentname *tcnp = v->a_tcnp; struct mount *mp = NULL; char *newname; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *fdnode; struct tmpfs_node *fnode; struct tmpfs_node *tnode; struct tmpfs_node *tdnode; MPASS(VOP_ISLOCKED(tdvp)); MPASS(IMPLIES(tvp != NULL, VOP_ISLOCKED(tvp))); MPASS(fcnp->cn_flags & HASBUF); MPASS(tcnp->cn_flags & HASBUF); /* Disallow cross-device renames. * XXX Why isn't this done by the caller? */ if (fvp->v_mount != tdvp->v_mount || (tvp != NULL && fvp->v_mount != tvp->v_mount)) { error = EXDEV; goto out; } /* If source and target are the same file, there is nothing to do. */ if (fvp == tvp) { error = 0; goto out; } /* If we need to move the directory between entries, lock the * source so that we can safely operate on it. */ if (fdvp != tdvp && fdvp != tvp) { if (vn_lock(fdvp, LK_EXCLUSIVE | LK_NOWAIT) != 0) { mp = tdvp->v_mount; error = vfs_busy(mp, 0); if (error != 0) { mp = NULL; goto out; } error = tmpfs_rename_relock(fdvp, &fvp, tdvp, &tvp, fcnp, tcnp); if (error != 0) { vfs_unbusy(mp); return (error); } ASSERT_VOP_ELOCKED(fdvp, "tmpfs_rename: fdvp not locked"); ASSERT_VOP_ELOCKED(tdvp, "tmpfs_rename: tdvp not locked"); if (tvp != NULL) ASSERT_VOP_ELOCKED(tvp, "tmpfs_rename: tvp not locked"); if (fvp == tvp) { error = 0; goto out_locked; } } } tmp = VFS_TO_TMPFS(tdvp->v_mount); tdnode = VP_TO_TMPFS_DIR(tdvp); tnode = (tvp == NULL) ? NULL : VP_TO_TMPFS_NODE(tvp); fdnode = VP_TO_TMPFS_DIR(fdvp); fnode = VP_TO_TMPFS_NODE(fvp); de = tmpfs_dir_lookup(fdnode, fnode, fcnp); /* Entry can disappear before we lock fdvp, * also avoid manipulating '.' and '..' entries. */ if (de == NULL) { if ((fcnp->cn_flags & ISDOTDOT) != 0 || (fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.')) error = EINVAL; else error = ENOENT; goto out_locked; } MPASS(de->td_node == fnode); /* If re-naming a directory to another preexisting directory * ensure that the target directory is empty so that its * removal causes no side effects. * Kern_rename guarantees the destination to be a directory * if the source is one. */ if (tvp != NULL) { MPASS(tnode != NULL); if ((tnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (tdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } if (fnode->tn_type == VDIR && tnode->tn_type == VDIR) { if (tnode->tn_size > 0) { error = ENOTEMPTY; goto out_locked; } } else if (fnode->tn_type == VDIR && tnode->tn_type != VDIR) { error = ENOTDIR; goto out_locked; } else if (fnode->tn_type != VDIR && tnode->tn_type == VDIR) { error = EISDIR; goto out_locked; } else { MPASS(fnode->tn_type != VDIR && tnode->tn_type != VDIR); } } if ((fnode->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) || (fdnode->tn_flags & (APPEND | IMMUTABLE))) { error = EPERM; goto out_locked; } /* Ensure that we have enough memory to hold the new name, if it * has to be changed. */ if (fcnp->cn_namelen != tcnp->cn_namelen || bcmp(fcnp->cn_nameptr, tcnp->cn_nameptr, fcnp->cn_namelen) != 0) { newname = malloc(tcnp->cn_namelen, M_TMPFSNAME, M_WAITOK); } else newname = NULL; /* If the node is being moved to another directory, we have to do * the move. */ if (fdnode != tdnode) { /* In case we are moving a directory, we have to adjust its * parent to point to the new parent. */ if (de->td_node->tn_type == VDIR) { struct tmpfs_node *n; /* Ensure the target directory is not a child of the * directory being moved. Otherwise, we'd end up * with stale nodes. */ n = tdnode; /* TMPFS_LOCK garanties that no nodes are freed while * traversing the list. Nodes can only be marked as * removed: tn_parent == NULL. */ TMPFS_LOCK(tmp); TMPFS_NODE_LOCK(n); while (n != n->tn_dir.tn_parent) { struct tmpfs_node *parent; if (n == fnode) { TMPFS_NODE_UNLOCK(n); TMPFS_UNLOCK(tmp); error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } parent = n->tn_dir.tn_parent; TMPFS_NODE_UNLOCK(n); if (parent == NULL) { n = NULL; break; } TMPFS_NODE_LOCK(parent); if (parent->tn_dir.tn_parent == NULL) { TMPFS_NODE_UNLOCK(parent); n = NULL; break; } n = parent; } TMPFS_UNLOCK(tmp); if (n == NULL) { error = EINVAL; if (newname != NULL) free(newname, M_TMPFSNAME); goto out_locked; } TMPFS_NODE_UNLOCK(n); /* Adjust the parent pointer. */ TMPFS_VALIDATE_DIR(fnode); TMPFS_NODE_LOCK(de->td_node); de->td_node->tn_dir.tn_parent = tdnode; TMPFS_NODE_UNLOCK(de->td_node); /* As a result of changing the target of the '..' * entry, the link count of the source and target * directories has to be adjusted. */ TMPFS_NODE_LOCK(tdnode); TMPFS_ASSERT_LOCKED(tdnode); tdnode->tn_links++; TMPFS_NODE_UNLOCK(tdnode); TMPFS_NODE_LOCK(fdnode); TMPFS_ASSERT_LOCKED(fdnode); fdnode->tn_links--; TMPFS_NODE_UNLOCK(fdnode); } } /* Do the move: just remove the entry from the source directory * and insert it into the target one. */ tmpfs_dir_detach(fdvp, de); if (fcnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(fdvp, fcnp); if (tcnp->cn_flags & ISWHITEOUT) tmpfs_dir_whiteout_remove(tdvp, tcnp); /* If the name has changed, we need to make it effective by changing * it in the directory entry. */ if (newname != NULL) { MPASS(tcnp->cn_namelen <= MAXNAMLEN); free(de->ud.td_name, M_TMPFSNAME); de->ud.td_name = newname; tmpfs_dirent_init(de, tcnp->cn_nameptr, tcnp->cn_namelen); fnode->tn_status |= TMPFS_NODE_CHANGED; tdnode->tn_status |= TMPFS_NODE_MODIFIED; } /* If we are overwriting an entry, we have to remove the old one * from the target directory. */ if (tvp != NULL) { struct tmpfs_dirent *tde; /* Remove the old entry from the target directory. */ tde = tmpfs_dir_lookup(tdnode, tnode, tcnp); tmpfs_dir_detach(tdvp, tde); /* Free the directory entry we just deleted. Note that the * node referred by it will not be removed until the vnode is * really reclaimed. */ tmpfs_free_dirent(VFS_TO_TMPFS(tvp->v_mount), tde); } tmpfs_dir_attach(tdvp, de); if (tmpfs_use_nc(fvp)) { cache_purge(fvp); if (tvp != NULL) cache_purge(tvp); cache_purge_negative(tdvp); } error = 0; out_locked: if (fdvp != tdvp && fdvp != tvp) VOP_UNLOCK(fdvp, 0); out: /* Release target nodes. */ /* XXX: I don't understand when tdvp can be the same as tvp, but * other code takes care of this... */ if (tdvp == tvp) vrele(tdvp); else vput(tdvp); if (tvp != NULL) vput(tvp); /* Release source nodes. */ vrele(fdvp); vrele(fvp); if (mp != NULL) vfs_unbusy(mp); return error; } static int tmpfs_mkdir(struct vop_mkdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; MPASS(vap->va_type == VDIR); return tmpfs_alloc_file(dvp, vpp, vap, cnp, NULL); } static int tmpfs_rmdir(struct vop_rmdir_args *v) { struct vnode *dvp = v->a_dvp; struct vnode *vp = v->a_vp; int error; struct tmpfs_dirent *de; struct tmpfs_mount *tmp; struct tmpfs_node *dnode; struct tmpfs_node *node; MPASS(VOP_ISLOCKED(dvp)); MPASS(VOP_ISLOCKED(vp)); tmp = VFS_TO_TMPFS(dvp->v_mount); dnode = VP_TO_TMPFS_DIR(dvp); node = VP_TO_TMPFS_DIR(vp); /* Directories with more than two entries ('.' and '..') cannot be * removed. */ if (node->tn_size > 0) { error = ENOTEMPTY; goto out; } if ((dnode->tn_flags & APPEND) || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND))) { error = EPERM; goto out; } /* This invariant holds only if we are not trying to remove "..". * We checked for that above so this is safe now. */ MPASS(node->tn_dir.tn_parent == dnode); /* Get the directory entry associated with node (vp). This was * filled by tmpfs_lookup while looking up the entry. */ de = tmpfs_dir_lookup(dnode, node, v->a_cnp); MPASS(TMPFS_DIRENT_MATCHES(de, v->a_cnp->cn_nameptr, v->a_cnp->cn_namelen)); /* Check flags to see if we are allowed to remove the directory. */ if ((dnode->tn_flags & APPEND) != 0 || (node->tn_flags & (NOUNLINK | IMMUTABLE | APPEND)) != 0) { error = EPERM; goto out; } /* Detach the directory entry from the directory (dnode). */ tmpfs_dir_detach(dvp, de); if (v->a_cnp->cn_flags & DOWHITEOUT) tmpfs_dir_whiteout_add(dvp, v->a_cnp); /* No vnode should be allocated for this entry from this point */ TMPFS_NODE_LOCK(node); node->tn_links--; node->tn_dir.tn_parent = NULL; node->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(node); TMPFS_NODE_LOCK(dnode); dnode->tn_links--; dnode->tn_status |= TMPFS_NODE_ACCESSED | TMPFS_NODE_CHANGED | TMPFS_NODE_MODIFIED; TMPFS_NODE_UNLOCK(dnode); if (tmpfs_use_nc(dvp)) { cache_purge(dvp); cache_purge(vp); } /* Free the directory entry we just deleted. Note that the node * referred by it will not be removed until the vnode is really * reclaimed. */ tmpfs_free_dirent(tmp, de); /* Release the deleted vnode (will destroy the node, notify * interested parties and clean it from the cache). */ dnode->tn_status |= TMPFS_NODE_CHANGED; tmpfs_update(dvp); error = 0; out: return error; } static int tmpfs_symlink(struct vop_symlink_args *v) { struct vnode *dvp = v->a_dvp; struct vnode **vpp = v->a_vpp; struct componentname *cnp = v->a_cnp; struct vattr *vap = v->a_vap; char *target = v->a_target; #ifdef notyet /* XXX FreeBSD BUG: kern_symlink is not setting VLNK */ MPASS(vap->va_type == VLNK); #else vap->va_type = VLNK; #endif return tmpfs_alloc_file(dvp, vpp, vap, cnp, target); } static int tmpfs_readdir(struct vop_readdir_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int *eofflag = v->a_eofflag; u_long **cookies = v->a_cookies; int *ncookies = v->a_ncookies; int error; ssize_t startresid; int maxcookies; struct tmpfs_node *node; /* This operation only makes sense on directory nodes. */ if (vp->v_type != VDIR) return ENOTDIR; maxcookies = 0; node = VP_TO_TMPFS_DIR(vp); startresid = uio->uio_resid; /* Allocate cookies for NFS and compat modules. */ if (cookies != NULL && ncookies != NULL) { maxcookies = howmany(node->tn_size, sizeof(struct tmpfs_dirent)) + 2; *cookies = malloc(maxcookies * sizeof(**cookies), M_TEMP, M_WAITOK); *ncookies = 0; } if (cookies == NULL) error = tmpfs_dir_getdents(node, uio, 0, NULL, NULL); else error = tmpfs_dir_getdents(node, uio, maxcookies, *cookies, ncookies); /* Buffer was filled without hitting EOF. */ if (error == EJUSTRETURN) error = (uio->uio_resid != startresid) ? 0 : EINVAL; if (error != 0 && cookies != NULL && ncookies != NULL) { free(*cookies, M_TEMP); *cookies = NULL; *ncookies = 0; } if (eofflag != NULL) *eofflag = (error == 0 && uio->uio_offset == TMPFS_DIRCOOKIE_EOF); return error; } static int tmpfs_readlink(struct vop_readlink_args *v) { struct vnode *vp = v->a_vp; struct uio *uio = v->a_uio; int error; struct tmpfs_node *node; MPASS(uio->uio_offset == 0); MPASS(vp->v_type == VLNK); node = VP_TO_TMPFS_NODE(vp); error = uiomove(node->tn_link, MIN(node->tn_size, uio->uio_resid), uio); tmpfs_set_status(node, TMPFS_NODE_ACCESSED); return (error); } static int tmpfs_inactive(struct vop_inactive_args *v) { struct vnode *vp; struct tmpfs_node *node; vp = v->a_vp; node = VP_TO_TMPFS_NODE(vp); if (node->tn_links == 0) vrecycle(vp); else tmpfs_check_mtime(vp); return (0); } int tmpfs_reclaim(struct vop_reclaim_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_mount *tmp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); tmp = VFS_TO_TMPFS(vp->v_mount); if (vp->v_type == VREG) tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj); else vnode_destroy_vobject(vp); vp->v_object = NULL; if (tmpfs_use_nc(vp)) cache_purge(vp); TMPFS_NODE_LOCK(node); tmpfs_free_vp(vp); /* If the node referenced by this vnode was deleted by the user, * we must free its associated data structures (now that the vnode * is being reclaimed). */ if (node->tn_links == 0 && (node->tn_vpstate & TMPFS_VNODE_ALLOCATING) == 0) { node->tn_vpstate = TMPFS_VNODE_DOOMED; TMPFS_NODE_UNLOCK(node); tmpfs_free_node(tmp, node); } else TMPFS_NODE_UNLOCK(node); MPASS(vp->v_data == NULL); return 0; } int tmpfs_print(struct vop_print_args *v) { struct vnode *vp = v->a_vp; struct tmpfs_node *node; node = VP_TO_TMPFS_NODE(vp); printf("tag VT_TMPFS, tmpfs_node %p, flags 0x%lx, links %jd\n", node, node->tn_flags, (uintmax_t)node->tn_links); printf("\tmode 0%o, owner %d, group %d, size %jd, status 0x%x\n", node->tn_mode, node->tn_uid, node->tn_gid, (intmax_t)node->tn_size, node->tn_status); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return 0; } static int tmpfs_pathconf(struct vop_pathconf_args *v) { int name = v->a_name; register_t *retval = v->a_retval; int error; error = 0; switch (name) { case _PC_NO_TRUNC: *retval = 1; break; case _PC_SYNC_IO: *retval = 1; break; case _PC_FILESIZEBITS: *retval = 64; break; default: error = vop_stdpathconf(v); } return error; } static int tmpfs_vptofh(struct vop_vptofh_args *ap) { struct tmpfs_fid *tfhp; struct tmpfs_node *node; tfhp = (struct tmpfs_fid *)ap->a_fhp; node = VP_TO_TMPFS_NODE(ap->a_vp); tfhp->tf_len = sizeof(struct tmpfs_fid); tfhp->tf_id = node->tn_id; tfhp->tf_gen = node->tn_gen; return (0); } static int tmpfs_whiteout(struct vop_whiteout_args *ap) { struct vnode *dvp = ap->a_dvp; struct componentname *cnp = ap->a_cnp; struct tmpfs_dirent *de; switch (ap->a_flags) { case LOOKUP: return (0); case CREATE: de = tmpfs_dir_lookup(VP_TO_TMPFS_DIR(dvp), NULL, cnp); if (de != NULL) return (de->td_node == NULL ? 0 : EEXIST); return (tmpfs_dir_whiteout_add(dvp, cnp)); case DELETE: tmpfs_dir_whiteout_remove(dvp, cnp); return (0); default: panic("tmpfs_whiteout: unknown op"); } } static int tmpfs_vptocnp_dir(struct tmpfs_node *tn, struct tmpfs_node *tnp, struct tmpfs_dirent **pde) { struct tmpfs_dir_cursor dc; struct tmpfs_dirent *de; for (de = tmpfs_dir_first(tnp, &dc); de != NULL; de = tmpfs_dir_next(tnp, &dc)) { if (de->td_node == tn) { *pde = de; return (0); } } return (ENOENT); } static int tmpfs_vptocnp_fill(struct vnode *vp, struct tmpfs_node *tn, struct tmpfs_node *tnp, char *buf, int *buflen, struct vnode **dvp) { struct tmpfs_dirent *de; int error, i; error = vn_vget_ino_gen(vp, tmpfs_vn_get_ino_alloc, tnp, LK_SHARED, dvp); if (error != 0) return (error); error = tmpfs_vptocnp_dir(tn, tnp, &de); if (error == 0) { i = *buflen; i -= de->td_namelen; if (i < 0) { error = ENOMEM; } else { bcopy(de->ud.td_name, buf + i, de->td_namelen); *buflen = i; } } if (error == 0) { if (vp != *dvp) VOP_UNLOCK(*dvp, 0); } else { if (vp != *dvp) vput(*dvp); else vrele(vp); } return (error); } static int tmpfs_vptocnp(struct vop_vptocnp_args *ap) { struct vnode *vp, **dvp; struct tmpfs_node *tn, *tnp, *tnp1; struct tmpfs_dirent *de; struct tmpfs_mount *tm; char *buf; int *buflen; int error; vp = ap->a_vp; dvp = ap->a_vpp; buf = ap->a_buf; buflen = ap->a_buflen; tm = VFS_TO_TMPFS(vp->v_mount); tn = VP_TO_TMPFS_NODE(vp); if (tn->tn_type == VDIR) { tnp = tn->tn_dir.tn_parent; if (tnp == NULL) return (ENOENT); tmpfs_ref_node(tnp); error = tmpfs_vptocnp_fill(vp, tn, tn->tn_dir.tn_parent, buf, buflen, dvp); tmpfs_free_node(tm, tnp); return (error); } restart: TMPFS_LOCK(tm); LIST_FOREACH_SAFE(tnp, &tm->tm_nodes_used, tn_entries, tnp1) { if (tnp->tn_type != VDIR) continue; TMPFS_NODE_LOCK(tnp); tmpfs_ref_node_locked(tnp); /* * tn_vnode cannot be instantiated while we hold the * node lock, so the directory cannot be changed while * we iterate over it. Do this to avoid instantiating * vnode for directories which cannot point to our * node. */ error = tnp->tn_vnode == NULL ? tmpfs_vptocnp_dir(tn, tnp, &de) : 0; if (error == 0) { TMPFS_NODE_UNLOCK(tnp); TMPFS_UNLOCK(tm); error = tmpfs_vptocnp_fill(vp, tn, tnp, buf, buflen, dvp); if (error == 0) { tmpfs_free_node(tm, tnp); return (0); } if ((vp->v_iflag & VI_DOOMED) != 0) { tmpfs_free_node(tm, tnp); return (ENOENT); } TMPFS_LOCK(tm); TMPFS_NODE_LOCK(tnp); } if (tmpfs_free_node_locked(tm, tnp, false)) { goto restart; } else { KASSERT(tnp->tn_refcount > 0, ("node %p refcount zero", tnp)); tnp1 = LIST_NEXT(tnp, tn_entries); TMPFS_NODE_UNLOCK(tnp); } } TMPFS_UNLOCK(tm); return (ENOENT); } /* * Vnode operations vector used for files stored in a tmpfs file system. */ struct vop_vector tmpfs_vnodeop_entries = { .vop_default = &default_vnodeops, .vop_lookup = vfs_cache_lookup, .vop_cachedlookup = tmpfs_cached_lookup, .vop_create = tmpfs_create, .vop_mknod = tmpfs_mknod, .vop_open = tmpfs_open, .vop_close = tmpfs_close, .vop_access = tmpfs_access, .vop_getattr = tmpfs_getattr, .vop_setattr = tmpfs_setattr, .vop_read = tmpfs_read, .vop_write = tmpfs_write, .vop_fsync = tmpfs_fsync, .vop_remove = tmpfs_remove, .vop_link = tmpfs_link, .vop_rename = tmpfs_rename, .vop_mkdir = tmpfs_mkdir, .vop_rmdir = tmpfs_rmdir, .vop_symlink = tmpfs_symlink, .vop_readdir = tmpfs_readdir, .vop_readlink = tmpfs_readlink, .vop_inactive = tmpfs_inactive, .vop_reclaim = tmpfs_reclaim, .vop_print = tmpfs_print, .vop_pathconf = tmpfs_pathconf, .vop_vptofh = tmpfs_vptofh, .vop_whiteout = tmpfs_whiteout, .vop_bmap = VOP_EOPNOTSUPP, .vop_vptocnp = tmpfs_vptocnp, }; /* * Same vector for mounts which do not use namecache. */ struct vop_vector tmpfs_vnodeop_nonc_entries = { .vop_default = &tmpfs_vnodeop_entries, .vop_lookup = tmpfs_lookup, }; Index: head/sys/fs/tmpfs/tmpfs_vnops.h =================================================================== --- head/sys/fs/tmpfs/tmpfs_vnops.h (revision 326267) +++ head/sys/fs/tmpfs/tmpfs_vnops.h (revision 326268) @@ -1,55 +1,57 @@ /* $NetBSD: tmpfs_vnops.h,v 1.7 2005/12/03 17:34:44 christos Exp $ */ /*- + * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * * Copyright (c) 2005 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Julio M. Merino Vidal, developed as part of Google's Summer of Code * 2005 program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _FS_TMPFS_TMPFS_VNOPS_H_ #define _FS_TMPFS_TMPFS_VNOPS_H_ #if !defined(_KERNEL) #error not supposed to be exposed to userland. #endif /* * Declarations for tmpfs_vnops.c. */ extern struct vop_vector tmpfs_vnodeop_entries; extern struct vop_vector tmpfs_vnodeop_nonc_entries; vop_access_t tmpfs_access; vop_getattr_t tmpfs_getattr; vop_setattr_t tmpfs_setattr; vop_print_t tmpfs_print; vop_reclaim_t tmpfs_reclaim; #endif /* _FS_TMPFS_TMPFS_VNOPS_H_ */ Index: head/sys/fs/udf/ecma167-udf.h =================================================================== --- head/sys/fs/udf/ecma167-udf.h (revision 326267) +++ head/sys/fs/udf/ecma167-udf.h (revision 326268) @@ -1,386 +1,388 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001, 2002 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* ecma167-udf.h */ /* Structure/definitions/constants a la ECMA 167 rev. 3 */ /* Tag identifiers */ enum { TAGID_PRI_VOL = 1, TAGID_ANCHOR = 2, TAGID_VOL = 3, TAGID_IMP_VOL = 4, TAGID_PARTITION = 5, TAGID_LOGVOL = 6, TAGID_UNALLOC_SPACE = 7, TAGID_TERM = 8, TAGID_LOGVOL_INTEGRITY = 9, TAGID_FSD = 256, TAGID_FID = 257, TAGID_FENTRY = 261 }; /* Descriptor tag [3/7.2] */ struct desc_tag { uint16_t id; uint16_t descriptor_ver; uint8_t cksum; uint8_t reserved; uint16_t serial_num; uint16_t desc_crc; uint16_t desc_crc_len; uint32_t tag_loc; } __packed; /* Recorded Address [4/7.1] */ struct lb_addr { uint32_t lb_num; uint16_t part_num; } __packed; /* Extent Descriptor [3/7.1] */ struct extent_ad { uint32_t len; uint32_t loc; } __packed; /* Short Allocation Descriptor [4/14.14.1] */ struct short_ad { uint32_t len; uint32_t pos; } __packed; /* Long Allocation Descriptor [4/14.14.2] */ struct long_ad { uint32_t len; struct lb_addr loc; uint16_t ad_flags; uint32_t ad_id; } __packed; /* Extended Allocation Descriptor [4/14.14.3] */ struct ext_ad { uint32_t ex_len; uint32_t rec_len; uint32_t inf_len; struct lb_addr ex_loc; uint8_t reserved[2]; } __packed; union icb { struct short_ad s_ad; struct long_ad l_ad; struct ext_ad e_ad; }; /* Character set spec [1/7.2.1] */ struct charspec { uint8_t type; uint8_t inf[63]; } __packed; /* Timestamp [1/7.3] */ struct timestamp { uint16_t type_tz; uint16_t year; uint8_t month; uint8_t day; uint8_t hour; uint8_t minute; uint8_t second; uint8_t centisec; uint8_t hund_usec; uint8_t usec; } __packed; /* Entity Identifier [1/7.4] */ #define UDF_REGID_ID_SIZE 23 struct regid { uint8_t flags; uint8_t id[UDF_REGID_ID_SIZE]; uint8_t id_suffix[8]; } __packed; /* ICB Tag [4/14.6] */ struct icb_tag { uint32_t prev_num_dirs; uint16_t strat_type; uint8_t strat_param[2]; uint16_t max_num_entries; uint8_t reserved; uint8_t file_type; struct lb_addr parent_icb; uint16_t flags; } __packed; #define UDF_ICB_TAG_FLAGS_SETUID 0x40 #define UDF_ICB_TAG_FLAGS_SETGID 0x80 #define UDF_ICB_TAG_FLAGS_STICKY 0x100 /* Anchor Volume Descriptor Pointer [3/10.2] */ struct anchor_vdp { struct desc_tag tag; struct extent_ad main_vds_ex; struct extent_ad reserve_vds_ex; } __packed; /* Volume Descriptor Pointer [3/10.3] */ struct vol_desc_ptr { struct desc_tag tag; uint32_t vds_number; struct extent_ad next_vds_ex; } __packed; /* Primary Volume Descriptor [3/10.1] */ struct pri_vol_desc { struct desc_tag tag; uint32_t seq_num; uint32_t pdv_num; char vol_id[32]; uint16_t vds_num; uint16_t max_vol_seq; uint16_t ichg_lvl; uint16_t max_ichg_lvl; uint32_t charset_list; uint32_t max_charset_list; char volset_id[128]; struct charspec desc_charset; struct charspec explanatory_charset; struct extent_ad vol_abstract; struct extent_ad vol_copyright; struct regid app_id; struct timestamp time; struct regid imp_id; uint8_t imp_use[64]; uint32_t prev_vds_lov; uint16_t flags; uint8_t reserved[22]; } __packed; /* Logical Volume Descriptor [3/10.6] */ struct logvol_desc { struct desc_tag tag; uint32_t seq_num; struct charspec desc_charset; char logvol_id[128]; uint32_t lb_size; struct regid domain_id; union { struct long_ad fsd_loc; uint8_t logvol_content_use[16]; } _lvd_use; uint32_t mt_l; /* Partition map length */ uint32_t n_pm; /* Number of partition maps */ struct regid imp_id; uint8_t imp_use[128]; struct extent_ad integrity_seq_id; uint8_t maps[1]; } __packed; /* Type 1 Partition Map [3/10.7.2] */ struct part_map_1 { uint8_t type; uint8_t len; uint16_t vol_seq_num; uint16_t part_num; } __packed; #define UDF_PMAP_TYPE1_SIZE 6 /* Type 2 Partition Map [3/10.7.3] */ struct part_map_2 { uint8_t type; uint8_t len; uint8_t part_id[62]; } __packed; #define UDF_PMAP_TYPE2_SIZE 64 /* Virtual Partition Map [UDF 2.01/2.2.8] */ struct part_map_virt { uint8_t type; uint8_t len; uint8_t reserved[2]; struct regid id; uint16_t vol_seq_num; uint16_t part_num; uint8_t reserved1[24]; } __packed; /* Sparable Partition Map [UDF 2.01/2.2.9] */ struct part_map_spare { uint8_t type; uint8_t len; uint8_t reserved[2]; struct regid id; uint16_t vol_seq_num; uint16_t part_num; uint16_t packet_len; uint8_t n_st; /* Number of Sparing Tables */ uint8_t reserved1; uint32_t st_size; uint32_t st_loc[1]; } __packed; union udf_pmap { struct part_map_1 pm1; struct part_map_2 pm2; struct part_map_virt pmv; struct part_map_spare pms; }; /* Sparing Map Entry [UDF 2.01/2.2.11] */ struct spare_map_entry { uint32_t org; uint32_t map; } __packed; /* Sparing Table [UDF 2.01/2.2.11] */ struct udf_sparing_table { struct desc_tag tag; struct regid id; uint16_t rt_l; /* Relocation Table len */ uint8_t reserved[2]; uint32_t seq_num; struct spare_map_entry entries[1]; } __packed; /* Partition Descriptor [3/10.5] */ struct part_desc { struct desc_tag tag; uint32_t seq_num; uint16_t flags; uint16_t part_num; struct regid contents; uint8_t contents_use[128]; uint32_t access_type; uint32_t start_loc; uint32_t part_len; struct regid imp_id; uint8_t imp_use[128]; uint8_t reserved[156]; } __packed; /* File Set Descriptor [4/14.1] */ struct fileset_desc { struct desc_tag tag; struct timestamp time; uint16_t ichg_lvl; uint16_t max_ichg_lvl; uint32_t charset_list; uint32_t max_charset_list; uint32_t fileset_num; uint32_t fileset_desc_num; struct charspec logvol_id_charset; char logvol_id[128]; struct charspec fileset_charset; char fileset_id[32]; char copyright_file_id[32]; char abstract_file_id[32]; struct long_ad rootdir_icb; struct regid domain_id; struct long_ad next_ex; struct long_ad streamdir_icb; uint8_t reserved[32]; } __packed; /* File Identifier Descriptor [4/14.4] */ struct fileid_desc { struct desc_tag tag; uint16_t file_num; uint8_t file_char; uint8_t l_fi; /* Length of file identifier area */ struct long_ad icb; uint16_t l_iu; /* Length of implementation use area */ uint8_t data[1]; } __packed; #define UDF_FID_SIZE 38 #define UDF_FILE_CHAR_VIS (1 << 0) /* Visible */ #define UDF_FILE_CHAR_DIR (1 << 1) /* Directory */ #define UDF_FILE_CHAR_DEL (1 << 2) /* Deleted */ #define UDF_FILE_CHAR_PAR (1 << 3) /* Parent Directory */ #define UDF_FILE_CHAR_META (1 << 4) /* Stream metadata */ /* File Entry [4/14.9] */ struct file_entry { struct desc_tag tag; struct icb_tag icbtag; uint32_t uid; uint32_t gid; uint32_t perm; uint16_t link_cnt; uint8_t rec_format; uint8_t rec_disp_attr; uint32_t rec_len; uint64_t inf_len; uint64_t logblks_rec; struct timestamp atime; struct timestamp mtime; struct timestamp attrtime; uint32_t ckpoint; struct long_ad ex_attr_icb; struct regid imp_id; uint64_t unique_id; uint32_t l_ea; /* Length of extended attribute area */ uint32_t l_ad; /* Length of allocation descriptors */ uint8_t data[1]; } __packed; #define UDF_FENTRY_SIZE 176 #define UDF_FENTRY_PERM_USER_MASK 0x07 #define UDF_FENTRY_PERM_GRP_MASK 0xE0 #define UDF_FENTRY_PERM_OWNER_MASK 0x1C00 /* Path Component [4/14.16.1] */ struct path_component { uint8_t type; uint8_t length; uint16_t version; uint8_t identifier[1]; } __packed; #define UDF_PATH_ROOT 2 #define UDF_PATH_DOTDOT 3 #define UDF_PATH_DOT 4 #define UDF_PATH_PATH 5 union dscrptr { struct desc_tag tag; struct anchor_vdp avdp; struct vol_desc_ptr vdp; struct pri_vol_desc pvd; struct logvol_desc lvd; struct part_desc pd; struct fileset_desc fsd; struct fileid_desc fid; struct file_entry fe; }; /* Useful defines */ #define GETICB(ad_type, fentry, offset) \ (struct ad_type *)&fentry->data[offset] #define GETICBLEN(ad_type, icb) le32toh(((struct ad_type *)(icb))->len) Index: head/sys/fs/udf/udf.h =================================================================== --- head/sys/fs/udf/udf.h (revision 326267) +++ head/sys/fs/udf/udf.h (revision 326268) @@ -1,126 +1,128 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001, 2002 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define UDF_HASHTBLSIZE 100 struct udf_node { struct vnode *i_vnode; struct udf_mnt *udfmp; ino_t hash_id; long diroff; struct file_entry *fentry; }; struct udf_mnt { int im_flags; struct mount *im_mountp; struct g_consumer *im_cp; struct bufobj *im_bo; struct cdev *im_dev; struct vnode *im_devvp; int bsize; int bshift; int bmask; uint32_t part_start; uint32_t part_len; uint64_t root_id; struct long_ad root_icb; int p_sectors; int s_table_entries; struct udf_sparing_table *s_table; void *im_d2l; /* disk->local iconv handle */ #if 0 void *im_l2d; /* local->disk iconv handle */ #endif }; struct udf_dirstream { struct udf_node *node; struct udf_mnt *udfmp; struct buf *bp; uint8_t *data; uint8_t *buf; int fsize; int off; int this_off; int offset; int size; int error; int fid_fragment; }; struct ifid { u_short ifid_len; u_short ifid_pad; int ifid_ino; long ifid_start; }; #define VFSTOUDFFS(mp) ((struct udf_mnt *)((mp)->mnt_data)) #define VTON(vp) ((struct udf_node *)((vp)->v_data)) /* * The block layer refers to things in terms of 512 byte blocks by default. * btodb() is expensive, so speed things up. * XXX Can the block layer be forced to use a different block size? */ #define RDSECTOR(devvp, sector, size, bp) \ bread(devvp, sector << (udfmp->bshift - DEV_BSHIFT), size, NOCRED, bp) MALLOC_DECLARE(M_UDFFENTRY); static __inline int udf_readdevblks(struct udf_mnt *udfmp, int sector, int size, struct buf **bp) { return (RDSECTOR(udfmp->im_devvp, sector, (size + udfmp->bmask) & ~udfmp->bmask, bp)); } /* * Produce a suitable file number from an ICB. The passed in ICB is expected * to be in little endian (meaning that it hasn't been swapped for big * endian machines yet). * XXX If the fileno resolves to 0, we might be in big trouble. * XXX Assumes the ICB is a long_ad. This struct is compatible with short_ad, * but not ext_ad. */ static __inline ino_t udf_getid(struct long_ad *icb) { return (le32toh(icb->loc.lb_num)); } int udf_allocv(struct mount *, struct vnode **, struct thread *); int udf_checktag(struct desc_tag *, uint16_t); int udf_vget(struct mount *, ino_t, int, struct vnode **); extern uma_zone_t udf_zone_trans; extern uma_zone_t udf_zone_node; extern uma_zone_t udf_zone_ds; extern struct vop_vector udf_fifoops; Index: head/sys/fs/udf/udf_iconv.c =================================================================== --- head/sys/fs/udf/udf_iconv.c (revision 326267) +++ head/sys/fs/udf/udf_iconv.c (revision 326268) @@ -1,36 +1,38 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Ryuichiro Imura * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include VFS_DECLARE_ICONV(udf); Index: head/sys/fs/udf/udf_mount.h =================================================================== --- head/sys/fs/udf/udf_mount.h (revision 326267) +++ head/sys/fs/udf/udf_mount.h (revision 326268) @@ -1,29 +1,31 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2003 Ryuichiro Imura * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #define UDFMNT_KICONV 0x00000001 Index: head/sys/fs/udf/udf_vfsops.c =================================================================== --- head/sys/fs/udf/udf_vfsops.c (revision 326267) +++ head/sys/fs/udf/udf_vfsops.c (revision 326268) @@ -1,823 +1,825 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001, 2002 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* udf_vfsops.c */ /* Implement the VFS side of things */ /* * Ok, here's how it goes. The UDF specs are pretty clear on how each data * structure is made up, but not very clear on how they relate to each other. * Here is the skinny... This demostrates a filesystem with one file in the * root directory. Subdirectories are treated just as normal files, but they * have File Id Descriptors of their children as their file data. As for the * Anchor Volume Descriptor Pointer, it can exist in two of the following three * places: sector 256, sector n (the max sector of the disk), or sector * n - 256. It's a pretty good bet that one will exist at sector 256 though. * One caveat is unclosed CD media. For that, sector 256 cannot be written, * so the Anchor Volume Descriptor Pointer can exist at sector 512 until the * media is closed. * * Sector: * 256: * n: Anchor Volume Descriptor Pointer * n - 256: | * | * |-->Main Volume Descriptor Sequence * | | * | | * | |-->Logical Volume Descriptor * | | * |-->Partition Descriptor | * | | * | | * |-->Fileset Descriptor * | * | * |-->Root Dir File Entry * | * | * |-->File data: * File Id Descriptor * | * | * |-->File Entry * | * | * |-->File data */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_UDFMOUNT, "udf_mount", "UDF mount structure"); MALLOC_DEFINE(M_UDFFENTRY, "udf_fentry", "UDF file entry structure"); struct iconv_functions *udf_iconv = NULL; /* Zones */ uma_zone_t udf_zone_trans = NULL; uma_zone_t udf_zone_node = NULL; uma_zone_t udf_zone_ds = NULL; static vfs_init_t udf_init; static vfs_uninit_t udf_uninit; static vfs_mount_t udf_mount; static vfs_root_t udf_root; static vfs_statfs_t udf_statfs; static vfs_unmount_t udf_unmount; static vfs_fhtovp_t udf_fhtovp; static int udf_find_partmaps(struct udf_mnt *, struct logvol_desc *); static struct vfsops udf_vfsops = { .vfs_fhtovp = udf_fhtovp, .vfs_init = udf_init, .vfs_mount = udf_mount, .vfs_root = udf_root, .vfs_statfs = udf_statfs, .vfs_uninit = udf_uninit, .vfs_unmount = udf_unmount, .vfs_vget = udf_vget, }; VFS_SET(udf_vfsops, udf, VFCF_READONLY); MODULE_VERSION(udf, 1); static int udf_mountfs(struct vnode *, struct mount *); static int udf_init(struct vfsconf *foo) { /* * This code used to pre-allocate a certain number of pages for each * pool, reducing the need to grow the zones later on. UMA doesn't * advertise any such functionality, unfortunately =-< */ udf_zone_trans = uma_zcreate("UDF translation buffer, zone", MAXNAMLEN * sizeof(unicode_t), NULL, NULL, NULL, NULL, 0, 0); udf_zone_node = uma_zcreate("UDF Node zone", sizeof(struct udf_node), NULL, NULL, NULL, NULL, 0, 0); udf_zone_ds = uma_zcreate("UDF Dirstream zone", sizeof(struct udf_dirstream), NULL, NULL, NULL, NULL, 0, 0); if ((udf_zone_node == NULL) || (udf_zone_trans == NULL) || (udf_zone_ds == NULL)) { printf("Cannot create allocation zones.\n"); return (ENOMEM); } return 0; } static int udf_uninit(struct vfsconf *foo) { if (udf_zone_trans != NULL) { uma_zdestroy(udf_zone_trans); udf_zone_trans = NULL; } if (udf_zone_node != NULL) { uma_zdestroy(udf_zone_node); udf_zone_node = NULL; } if (udf_zone_ds != NULL) { uma_zdestroy(udf_zone_ds); udf_zone_ds = NULL; } return (0); } static int udf_mount(struct mount *mp) { struct vnode *devvp; /* vnode of the mount device */ struct thread *td; struct udf_mnt *imp = NULL; struct vfsoptlist *opts; char *fspec, *cs_disk, *cs_local; int error, len, *udf_flags; struct nameidata nd, *ndp = &nd; td = curthread; opts = mp->mnt_optnew; /* * Unconditionally mount as read-only. */ MNT_ILOCK(mp); mp->mnt_flag |= MNT_RDONLY; MNT_IUNLOCK(mp); /* * No root filesystem support. Probably not a big deal, since the * bootloader doesn't understand UDF. */ if (mp->mnt_flag & MNT_ROOTFS) return (ENOTSUP); fspec = NULL; error = vfs_getopt(opts, "from", (void **)&fspec, &len); if (!error && fspec[len - 1] != '\0') return (EINVAL); if (mp->mnt_flag & MNT_UPDATE) { return (0); } /* Check that the mount device exists */ if (fspec == NULL) return (EINVAL); NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); if ((error = namei(ndp))) return (error); NDFREE(ndp, NDF_ONLY_PNBUF); devvp = ndp->ni_vp; if (vn_isdisk(devvp, &error) == 0) { vput(devvp); return (error); } /* Check the access rights on the mount device */ error = VOP_ACCESS(devvp, VREAD, td->td_ucred, td); if (error) error = priv_check(td, PRIV_VFS_MOUNT_PERM); if (error) { vput(devvp); return (error); } if ((error = udf_mountfs(devvp, mp))) { vrele(devvp); return (error); } imp = VFSTOUDFFS(mp); udf_flags = NULL; error = vfs_getopt(opts, "flags", (void **)&udf_flags, &len); if (error || len != sizeof(int)) return (EINVAL); imp->im_flags = *udf_flags; if (imp->im_flags & UDFMNT_KICONV && udf_iconv) { cs_disk = NULL; error = vfs_getopt(opts, "cs_disk", (void **)&cs_disk, &len); if (!error && cs_disk[len - 1] != '\0') return (EINVAL); cs_local = NULL; error = vfs_getopt(opts, "cs_local", (void **)&cs_local, &len); if (!error && cs_local[len - 1] != '\0') return (EINVAL); udf_iconv->open(cs_local, cs_disk, &imp->im_d2l); #if 0 udf_iconv->open(cs_disk, cs_local, &imp->im_l2d); #endif } vfs_mountedfrom(mp, fspec); return 0; }; /* * Check the descriptor tag for both the correct id and correct checksum. * Return zero if all is good, EINVAL if not. */ int udf_checktag(struct desc_tag *tag, uint16_t id) { uint8_t *itag; uint8_t i, cksum = 0; itag = (uint8_t *)tag; if (le16toh(tag->id) != id) return (EINVAL); for (i = 0; i < 16; i++) cksum = cksum + itag[i]; cksum = cksum - itag[4]; if (cksum == tag->cksum) return (0); return (EINVAL); } static int udf_mountfs(struct vnode *devvp, struct mount *mp) { struct buf *bp = NULL; struct cdev *dev; struct anchor_vdp avdp; struct udf_mnt *udfmp = NULL; struct part_desc *pd; struct logvol_desc *lvd; struct fileset_desc *fsd; struct file_entry *root_fentry; uint32_t sector, size, mvds_start, mvds_end; uint32_t logical_secsize; uint32_t fsd_offset = 0; uint16_t part_num = 0, fsd_part = 0; int error = EINVAL; int logvol_found = 0, part_found = 0, fsd_found = 0; int bsize; struct g_consumer *cp; struct bufobj *bo; dev = devvp->v_rdev; dev_ref(dev); g_topology_lock(); error = g_vfs_open(devvp, &cp, "udf", 0); g_topology_unlock(); VOP_UNLOCK(devvp, 0); if (error) goto bail; bo = &devvp->v_bufobj; if (devvp->v_rdev->si_iosize_max != 0) mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; if (mp->mnt_iosize_max > MAXPHYS) mp->mnt_iosize_max = MAXPHYS; /* XXX: should be M_WAITOK */ udfmp = malloc(sizeof(struct udf_mnt), M_UDFMOUNT, M_NOWAIT | M_ZERO); if (udfmp == NULL) { printf("Cannot allocate UDF mount struct\n"); error = ENOMEM; goto bail; } mp->mnt_data = udfmp; mp->mnt_stat.f_fsid.val[0] = dev2udev(devvp->v_rdev); mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; MNT_ILOCK(mp); mp->mnt_flag |= MNT_LOCAL; mp->mnt_kern_flag |= MNTK_LOOKUP_SHARED | MNTK_EXTENDED_SHARED; MNT_IUNLOCK(mp); udfmp->im_mountp = mp; udfmp->im_dev = dev; udfmp->im_devvp = devvp; udfmp->im_d2l = NULL; udfmp->im_cp = cp; udfmp->im_bo = bo; #if 0 udfmp->im_l2d = NULL; #endif /* * The UDF specification defines a logical sectorsize of 2048 * for DVD media. */ logical_secsize = 2048; if (((logical_secsize % cp->provider->sectorsize) != 0) || (logical_secsize < cp->provider->sectorsize)) { error = EINVAL; goto bail; } bsize = cp->provider->sectorsize; /* * Get the Anchor Volume Descriptor Pointer from sector 256. * XXX Should also check sector n - 256, n, and 512. */ sector = 256; if ((error = bread(devvp, sector * btodb(logical_secsize), bsize, NOCRED, &bp)) != 0) goto bail; if ((error = udf_checktag((struct desc_tag *)bp->b_data, TAGID_ANCHOR))) goto bail; bcopy(bp->b_data, &avdp, sizeof(struct anchor_vdp)); brelse(bp); bp = NULL; /* * Extract the Partition Descriptor and Logical Volume Descriptor * from the Volume Descriptor Sequence. * XXX Should we care about the partition type right now? * XXX What about multiple partitions? */ mvds_start = le32toh(avdp.main_vds_ex.loc); mvds_end = mvds_start + (le32toh(avdp.main_vds_ex.len) - 1) / bsize; for (sector = mvds_start; sector < mvds_end; sector++) { if ((error = bread(devvp, sector * btodb(logical_secsize), bsize, NOCRED, &bp)) != 0) { printf("Can't read sector %d of VDS\n", sector); goto bail; } lvd = (struct logvol_desc *)bp->b_data; if (!udf_checktag(&lvd->tag, TAGID_LOGVOL)) { udfmp->bsize = le32toh(lvd->lb_size); udfmp->bmask = udfmp->bsize - 1; udfmp->bshift = ffs(udfmp->bsize) - 1; fsd_part = le16toh(lvd->_lvd_use.fsd_loc.loc.part_num); fsd_offset = le32toh(lvd->_lvd_use.fsd_loc.loc.lb_num); if (udf_find_partmaps(udfmp, lvd)) break; logvol_found = 1; } pd = (struct part_desc *)bp->b_data; if (!udf_checktag(&pd->tag, TAGID_PARTITION)) { part_found = 1; part_num = le16toh(pd->part_num); udfmp->part_len = le32toh(pd->part_len); udfmp->part_start = le32toh(pd->start_loc); } brelse(bp); bp = NULL; if ((part_found) && (logvol_found)) break; } if (!part_found || !logvol_found) { error = EINVAL; goto bail; } if (fsd_part != part_num) { printf("FSD does not lie within the partition!\n"); error = EINVAL; goto bail; } /* * Grab the Fileset Descriptor * Thanks to Chuck McCrobie for pointing * me in the right direction here. */ sector = udfmp->part_start + fsd_offset; if ((error = RDSECTOR(devvp, sector, udfmp->bsize, &bp)) != 0) { printf("Cannot read sector %d of FSD\n", sector); goto bail; } fsd = (struct fileset_desc *)bp->b_data; if (!udf_checktag(&fsd->tag, TAGID_FSD)) { fsd_found = 1; bcopy(&fsd->rootdir_icb, &udfmp->root_icb, sizeof(struct long_ad)); } brelse(bp); bp = NULL; if (!fsd_found) { printf("Couldn't find the fsd\n"); error = EINVAL; goto bail; } /* * Find the file entry for the root directory. */ sector = le32toh(udfmp->root_icb.loc.lb_num) + udfmp->part_start; size = le32toh(udfmp->root_icb.len); if ((error = udf_readdevblks(udfmp, sector, size, &bp)) != 0) { printf("Cannot read sector %d\n", sector); goto bail; } root_fentry = (struct file_entry *)bp->b_data; if ((error = udf_checktag(&root_fentry->tag, TAGID_FENTRY))) { printf("Invalid root file entry!\n"); goto bail; } brelse(bp); bp = NULL; return 0; bail: if (udfmp != NULL) free(udfmp, M_UDFMOUNT); if (bp != NULL) brelse(bp); if (cp != NULL) { g_topology_lock(); g_vfs_close(cp); g_topology_unlock(); } dev_rel(dev); return error; }; static int udf_unmount(struct mount *mp, int mntflags) { struct udf_mnt *udfmp; int error, flags = 0; udfmp = VFSTOUDFFS(mp); if (mntflags & MNT_FORCE) flags |= FORCECLOSE; if ((error = vflush(mp, 0, flags, curthread))) return (error); if (udfmp->im_flags & UDFMNT_KICONV && udf_iconv) { if (udfmp->im_d2l) udf_iconv->close(udfmp->im_d2l); #if 0 if (udfmp->im_l2d) udf_iconv->close(udfmp->im_l2d); #endif } g_topology_lock(); g_vfs_close(udfmp->im_cp); g_topology_unlock(); vrele(udfmp->im_devvp); dev_rel(udfmp->im_dev); if (udfmp->s_table != NULL) free(udfmp->s_table, M_UDFMOUNT); free(udfmp, M_UDFMOUNT); mp->mnt_data = NULL; MNT_ILOCK(mp); mp->mnt_flag &= ~MNT_LOCAL; MNT_IUNLOCK(mp); return (0); } static int udf_root(struct mount *mp, int flags, struct vnode **vpp) { struct udf_mnt *udfmp; ino_t id; udfmp = VFSTOUDFFS(mp); id = udf_getid(&udfmp->root_icb); return (udf_vget(mp, id, flags, vpp)); } static int udf_statfs(struct mount *mp, struct statfs *sbp) { struct udf_mnt *udfmp; udfmp = VFSTOUDFFS(mp); sbp->f_bsize = udfmp->bsize; sbp->f_iosize = udfmp->bsize; sbp->f_blocks = udfmp->part_len; sbp->f_bfree = 0; sbp->f_bavail = 0; sbp->f_files = 0; sbp->f_ffree = 0; return 0; } int udf_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) { struct buf *bp; struct vnode *devvp; struct udf_mnt *udfmp; struct thread *td; struct vnode *vp; struct udf_node *unode; struct file_entry *fe; int error, sector, size; error = vfs_hash_get(mp, ino, flags, curthread, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* * We must promote to an exclusive lock for vnode creation. This * can happen if lookup is passed LOCKSHARED. */ if ((flags & LK_TYPE_MASK) == LK_SHARED) { flags &= ~LK_TYPE_MASK; flags |= LK_EXCLUSIVE; } /* * We do not lock vnode creation as it is believed to be too * expensive for such rare case as simultaneous creation of vnode * for same ino by different processes. We just allow them to race * and check later to decide who wins. Let the race begin! */ td = curthread; udfmp = VFSTOUDFFS(mp); unode = uma_zalloc(udf_zone_node, M_WAITOK | M_ZERO); if ((error = udf_allocv(mp, &vp, td))) { printf("Error from udf_allocv\n"); uma_zfree(udf_zone_node, unode); return (error); } unode->i_vnode = vp; unode->hash_id = ino; unode->udfmp = udfmp; vp->v_data = unode; lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); error = insmntque(vp, mp); if (error != 0) { uma_zfree(udf_zone_node, unode); return (error); } error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); if (error || *vpp != NULL) return (error); /* * Copy in the file entry. Per the spec, the size can only be 1 block. */ sector = ino + udfmp->part_start; devvp = udfmp->im_devvp; if ((error = RDSECTOR(devvp, sector, udfmp->bsize, &bp)) != 0) { printf("Cannot read sector %d\n", sector); vgone(vp); vput(vp); brelse(bp); *vpp = NULL; return (error); } fe = (struct file_entry *)bp->b_data; if (udf_checktag(&fe->tag, TAGID_FENTRY)) { printf("Invalid file entry!\n"); vgone(vp); vput(vp); brelse(bp); *vpp = NULL; return (ENOMEM); } size = UDF_FENTRY_SIZE + le32toh(fe->l_ea) + le32toh(fe->l_ad); unode->fentry = malloc(size, M_UDFFENTRY, M_NOWAIT | M_ZERO); if (unode->fentry == NULL) { printf("Cannot allocate file entry block\n"); vgone(vp); vput(vp); brelse(bp); *vpp = NULL; return (ENOMEM); } bcopy(bp->b_data, unode->fentry, size); brelse(bp); bp = NULL; switch (unode->fentry->icbtag.file_type) { default: vp->v_type = VBAD; break; case 4: vp->v_type = VDIR; break; case 5: vp->v_type = VREG; break; case 6: vp->v_type = VBLK; break; case 7: vp->v_type = VCHR; break; case 9: vp->v_type = VFIFO; vp->v_op = &udf_fifoops; break; case 10: vp->v_type = VSOCK; break; case 12: vp->v_type = VLNK; break; } if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); if (ino == udf_getid(&udfmp->root_icb)) vp->v_vflag |= VV_ROOT; *vpp = vp; return (0); } static int udf_fhtovp(struct mount *mp, struct fid *fhp, int flags, struct vnode **vpp) { struct ifid *ifhp; struct vnode *nvp; struct udf_node *np; off_t fsize; int error; ifhp = (struct ifid *)fhp; if ((error = VFS_VGET(mp, ifhp->ifid_ino, LK_EXCLUSIVE, &nvp)) != 0) { *vpp = NULLVP; return (error); } np = VTON(nvp); fsize = le64toh(np->fentry->inf_len); *vpp = nvp; vnode_create_vobject(*vpp, fsize, curthread); return (0); } static int udf_find_partmaps(struct udf_mnt *udfmp, struct logvol_desc *lvd) { struct part_map_spare *pms; struct regid *pmap_id; struct buf *bp; unsigned char regid_id[UDF_REGID_ID_SIZE + 1]; int i, k, ptype, psize, error; uint8_t *pmap = (uint8_t *) &lvd->maps[0]; for (i = 0; i < le32toh(lvd->n_pm); i++) { ptype = pmap[0]; psize = pmap[1]; if (((ptype != 1) && (ptype != 2)) || ((psize != UDF_PMAP_TYPE1_SIZE) && (psize != UDF_PMAP_TYPE2_SIZE))) { printf("Invalid partition map found\n"); return (1); } if (ptype == 1) { /* Type 1 map. We don't care */ pmap += UDF_PMAP_TYPE1_SIZE; continue; } /* Type 2 map. Gotta find out the details */ pmap_id = (struct regid *)&pmap[4]; bzero(®id_id[0], UDF_REGID_ID_SIZE); bcopy(&pmap_id->id[0], ®id_id[0], UDF_REGID_ID_SIZE); if (bcmp(®id_id[0], "*UDF Sparable Partition", UDF_REGID_ID_SIZE)) { printf("Unsupported partition map: %s\n", ®id_id[0]); return (1); } pms = (struct part_map_spare *)pmap; pmap += UDF_PMAP_TYPE2_SIZE; udfmp->s_table = malloc(le32toh(pms->st_size), M_UDFMOUNT, M_NOWAIT | M_ZERO); if (udfmp->s_table == NULL) return (ENOMEM); /* Calculate the number of sectors per packet. */ /* XXX Logical or physical? */ udfmp->p_sectors = le16toh(pms->packet_len) / udfmp->bsize; /* * XXX If reading the first Sparing Table fails, should look * for another table. */ if ((error = udf_readdevblks(udfmp, le32toh(pms->st_loc[0]), le32toh(pms->st_size), &bp)) != 0) { if (bp != NULL) brelse(bp); printf("Failed to read Sparing Table at sector %d\n", le32toh(pms->st_loc[0])); free(udfmp->s_table, M_UDFMOUNT); return (error); } bcopy(bp->b_data, udfmp->s_table, le32toh(pms->st_size)); brelse(bp); if (udf_checktag(&udfmp->s_table->tag, 0)) { printf("Invalid sparing table found\n"); free(udfmp->s_table, M_UDFMOUNT); return (EINVAL); } /* See how many valid entries there are here. The list is * supposed to be sorted. 0xfffffff0 and higher are not valid */ for (k = 0; k < le16toh(udfmp->s_table->rt_l); k++) { udfmp->s_table_entries = k; if (le32toh(udfmp->s_table->entries[k].org) >= 0xfffffff0) break; } } return (0); } Index: head/sys/fs/udf/udf_vnops.c =================================================================== --- head/sys/fs/udf/udf_vnops.c (revision 326267) +++ head/sys/fs/udf/udf_vnops.c (revision 326268) @@ -1,1483 +1,1485 @@ /*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * * Copyright (c) 2001, 2002 Scott Long * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* udf_vnops.c */ /* Take care of the vnode side of things */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern struct iconv_functions *udf_iconv; static vop_access_t udf_access; static vop_getattr_t udf_getattr; static vop_open_t udf_open; static vop_ioctl_t udf_ioctl; static vop_pathconf_t udf_pathconf; static vop_print_t udf_print; static vop_read_t udf_read; static vop_readdir_t udf_readdir; static vop_readlink_t udf_readlink; static vop_setattr_t udf_setattr; static vop_strategy_t udf_strategy; static vop_bmap_t udf_bmap; static vop_cachedlookup_t udf_lookup; static vop_reclaim_t udf_reclaim; static vop_vptofh_t udf_vptofh; static int udf_readatoffset(struct udf_node *node, int *size, off_t offset, struct buf **bp, uint8_t **data); static int udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector, uint32_t *max_size); static struct vop_vector udf_vnodeops = { .vop_default = &default_vnodeops, .vop_access = udf_access, .vop_bmap = udf_bmap, .vop_cachedlookup = udf_lookup, .vop_getattr = udf_getattr, .vop_ioctl = udf_ioctl, .vop_lookup = vfs_cache_lookup, .vop_open = udf_open, .vop_pathconf = udf_pathconf, .vop_print = udf_print, .vop_read = udf_read, .vop_readdir = udf_readdir, .vop_readlink = udf_readlink, .vop_reclaim = udf_reclaim, .vop_setattr = udf_setattr, .vop_strategy = udf_strategy, .vop_vptofh = udf_vptofh, }; struct vop_vector udf_fifoops = { .vop_default = &fifo_specops, .vop_access = udf_access, .vop_getattr = udf_getattr, .vop_print = udf_print, .vop_reclaim = udf_reclaim, .vop_setattr = udf_setattr, .vop_vptofh = udf_vptofh, }; static MALLOC_DEFINE(M_UDFFID, "udf_fid", "UDF FileId structure"); static MALLOC_DEFINE(M_UDFDS, "udf_ds", "UDF Dirstream structure"); #define UDF_INVALID_BMAP -1 int udf_allocv(struct mount *mp, struct vnode **vpp, struct thread *td) { int error; struct vnode *vp; error = getnewvnode("udf", mp, &udf_vnodeops, &vp); if (error) { printf("udf_allocv: failed to allocate new vnode\n"); return (error); } *vpp = vp; return (0); } /* Convert file entry permission (5 bits per owner/group/user) to a mode_t */ static mode_t udf_permtomode(struct udf_node *node) { uint32_t perm; uint16_t flags; mode_t mode; perm = le32toh(node->fentry->perm); flags = le16toh(node->fentry->icbtag.flags); mode = perm & UDF_FENTRY_PERM_USER_MASK; mode |= ((perm & UDF_FENTRY_PERM_GRP_MASK) >> 2); mode |= ((perm & UDF_FENTRY_PERM_OWNER_MASK) >> 4); mode |= ((flags & UDF_ICB_TAG_FLAGS_STICKY) << 4); mode |= ((flags & UDF_ICB_TAG_FLAGS_SETGID) << 6); mode |= ((flags & UDF_ICB_TAG_FLAGS_SETUID) << 8); return (mode); } static int udf_access(struct vop_access_args *a) { struct vnode *vp; struct udf_node *node; accmode_t accmode; mode_t mode; vp = a->a_vp; node = VTON(vp); accmode = a->a_accmode; if (accmode & VWRITE) { switch (vp->v_type) { case VDIR: case VLNK: case VREG: return (EROFS); /* NOT REACHED */ default: break; } } mode = udf_permtomode(node); return (vaccess(vp->v_type, mode, node->fentry->uid, node->fentry->gid, accmode, a->a_cred, NULL)); } static int udf_open(struct vop_open_args *ap) { struct udf_node *np = VTON(ap->a_vp); off_t fsize; fsize = le64toh(np->fentry->inf_len); vnode_create_vobject(ap->a_vp, fsize, ap->a_td); return 0; } static const int mon_lens[2][12] = { {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334}, {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335} }; static int udf_isaleapyear(int year) { int i; i = (year % 4) ? 0 : 1; i &= (year % 100) ? 1 : 0; i |= (year % 400) ? 0 : 1; return i; } /* * Timezone calculation compliments of Julian Elischer . */ static void udf_timetotimespec(struct timestamp *time, struct timespec *t) { int i, lpyear, daysinyear, year, startyear; union { uint16_t u_tz_offset; int16_t s_tz_offset; } tz; /* * DirectCD seems to like using bogus year values. * Don't trust time->month as it will be used for an array index. */ year = le16toh(time->year); if (year < 1970 || time->month < 1 || time->month > 12) { t->tv_sec = 0; t->tv_nsec = 0; return; } /* Calculate the time and day */ t->tv_sec = time->second; t->tv_sec += time->minute * 60; t->tv_sec += time->hour * 3600; t->tv_sec += (time->day - 1) * 3600 * 24; /* Calculate the month */ lpyear = udf_isaleapyear(year); t->tv_sec += mon_lens[lpyear][time->month - 1] * 3600 * 24; /* Speed up the calculation */ startyear = 1970; if (year > 2009) { t->tv_sec += 1262304000; startyear += 40; } else if (year > 1999) { t->tv_sec += 946684800; startyear += 30; } else if (year > 1989) { t->tv_sec += 631152000; startyear += 20; } else if (year > 1979) { t->tv_sec += 315532800; startyear += 10; } daysinyear = (year - startyear) * 365; for (i = startyear; i < year; i++) daysinyear += udf_isaleapyear(i); t->tv_sec += daysinyear * 3600 * 24; /* Calculate microseconds */ t->tv_nsec = time->centisec * 10000 + time->hund_usec * 100 + time->usec; /* * Calculate the time zone. The timezone is 12 bit signed 2's * complement, so we gotta do some extra magic to handle it right. */ tz.u_tz_offset = le16toh(time->type_tz); tz.u_tz_offset &= 0x0fff; if (tz.u_tz_offset & 0x0800) tz.u_tz_offset |= 0xf000; /* extend the sign to 16 bits */ if ((le16toh(time->type_tz) & 0x1000) && (tz.s_tz_offset != -2047)) t->tv_sec -= tz.s_tz_offset * 60; return; } static int udf_getattr(struct vop_getattr_args *a) { struct vnode *vp; struct udf_node *node; struct vattr *vap; struct file_entry *fentry; struct timespec ts; ts.tv_sec = 0; vp = a->a_vp; vap = a->a_vap; node = VTON(vp); fentry = node->fentry; vap->va_fsid = dev2udev(node->udfmp->im_dev); vap->va_fileid = node->hash_id; vap->va_mode = udf_permtomode(node); vap->va_nlink = le16toh(fentry->link_cnt); /* * XXX The spec says that -1 is valid for uid/gid and indicates an * invalid uid/gid. How should this be represented? */ vap->va_uid = (le32toh(fentry->uid) == -1) ? 0 : le32toh(fentry->uid); vap->va_gid = (le32toh(fentry->gid) == -1) ? 0 : le32toh(fentry->gid); udf_timetotimespec(&fentry->atime, &vap->va_atime); udf_timetotimespec(&fentry->mtime, &vap->va_mtime); vap->va_ctime = vap->va_mtime; /* XXX Stored as an Extended Attribute */ vap->va_rdev = NODEV; if (vp->v_type & VDIR) { /* * Directories that are recorded within their ICB will show * as having 0 blocks recorded. Since tradition dictates * that directories consume at least one logical block, * make it appear so. */ if (fentry->logblks_rec != 0) { vap->va_size = le64toh(fentry->logblks_rec) * node->udfmp->bsize; } else { vap->va_size = node->udfmp->bsize; } } else { vap->va_size = le64toh(fentry->inf_len); } vap->va_flags = 0; vap->va_gen = 1; vap->va_blocksize = node->udfmp->bsize; vap->va_bytes = le64toh(fentry->inf_len); vap->va_type = vp->v_type; vap->va_filerev = 0; /* XXX */ return (0); } static int udf_setattr(struct vop_setattr_args *a) { struct vnode *vp; struct vattr *vap; vp = a->a_vp; vap = a->a_vap; if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) return (EROFS); if (vap->va_size != (u_quad_t)VNOVAL) { switch (vp->v_type) { case VDIR: return (EISDIR); case VLNK: case VREG: return (EROFS); case VCHR: case VBLK: case VSOCK: case VFIFO: case VNON: case VBAD: case VMARKER: return (0); } } return (0); } /* * File specific ioctls. */ static int udf_ioctl(struct vop_ioctl_args *a) { printf("%s called\n", __func__); return (ENOTTY); } /* * I'm not sure that this has much value in a read-only filesystem, but * cd9660 has it too. */ static int udf_pathconf(struct vop_pathconf_args *a) { switch (a->a_name) { case _PC_FILESIZEBITS: *a->a_retval = 64; return (0); case _PC_LINK_MAX: *a->a_retval = 65535; return (0); case _PC_NAME_MAX: *a->a_retval = NAME_MAX; return (0); case _PC_SYMLINK_MAX: *a->a_retval = MAXPATHLEN; return (0); case _PC_NO_TRUNC: *a->a_retval = 1; return (0); default: return (vop_stdpathconf(a)); } } static int udf_print(struct vop_print_args *ap) { struct vnode *vp = ap->a_vp; struct udf_node *node = VTON(vp); printf(" ino %lu, on dev %s", (u_long)node->hash_id, devtoname(node->udfmp->im_dev)); if (vp->v_type == VFIFO) fifo_printinfo(vp); printf("\n"); return (0); } #define lblkno(udfmp, loc) ((loc) >> (udfmp)->bshift) #define blkoff(udfmp, loc) ((loc) & (udfmp)->bmask) #define lblktosize(udfmp, blk) ((blk) << (udfmp)->bshift) static inline int is_data_in_fentry(const struct udf_node *node) { const struct file_entry *fentry = node->fentry; return ((le16toh(fentry->icbtag.flags) & 0x7) == 3); } static int udf_read(struct vop_read_args *ap) { struct vnode *vp = ap->a_vp; struct uio *uio = ap->a_uio; struct udf_node *node = VTON(vp); struct udf_mnt *udfmp; struct file_entry *fentry; struct buf *bp; uint8_t *data; daddr_t lbn, rablock; off_t diff, fsize; ssize_t n; int error = 0; long size, on; if (uio->uio_resid == 0) return (0); if (uio->uio_offset < 0) return (EINVAL); if (is_data_in_fentry(node)) { fentry = node->fentry; data = &fentry->data[le32toh(fentry->l_ea)]; fsize = le32toh(fentry->l_ad); n = uio->uio_resid; diff = fsize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; error = uiomove(data + uio->uio_offset, (int)n, uio); return (error); } fsize = le64toh(node->fentry->inf_len); udfmp = node->udfmp; do { lbn = lblkno(udfmp, uio->uio_offset); on = blkoff(udfmp, uio->uio_offset); n = min((u_int)(udfmp->bsize - on), uio->uio_resid); diff = fsize - uio->uio_offset; if (diff <= 0) return (0); if (diff < n) n = diff; size = udfmp->bsize; rablock = lbn + 1; if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { if (lblktosize(udfmp, rablock) < fsize) { error = cluster_read(vp, fsize, lbn, size, NOCRED, uio->uio_resid, (ap->a_ioflag >> 16), 0, &bp); } else { error = bread(vp, lbn, size, NOCRED, &bp); } } else { error = bread(vp, lbn, size, NOCRED, &bp); } if (error != 0) { brelse(bp); return (error); } n = min(n, size - bp->b_resid); error = uiomove(bp->b_data + on, (int)n, uio); brelse(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); return (error); } /* * Call the OSTA routines to translate the name from a CS0 dstring to a * 16-bit Unicode String. Hooks need to be placed in here to translate from * Unicode to the encoding that the kernel/user expects. Return the length * of the translated string. */ static int udf_transname(char *cs0string, char *destname, int len, struct udf_mnt *udfmp) { unicode_t *transname; char *unibuf, *unip; int i, destlen; ssize_t unilen = 0; size_t destleft = MAXNAMLEN; /* Convert 16-bit Unicode to destname */ if (udfmp->im_flags & UDFMNT_KICONV && udf_iconv) { /* allocate a buffer big enough to hold an 8->16 bit expansion */ unibuf = uma_zalloc(udf_zone_trans, M_WAITOK); unip = unibuf; if ((unilen = (ssize_t)udf_UncompressUnicodeByte(len, cs0string, unibuf)) == -1) { printf("udf: Unicode translation failed\n"); uma_zfree(udf_zone_trans, unibuf); return 0; } while (unilen > 0 && destleft > 0) { udf_iconv->conv(udfmp->im_d2l, __DECONST(const char **, &unibuf), (size_t *)&unilen, (char **)&destname, &destleft); /* Unconverted character found */ if (unilen > 0 && destleft > 0) { *destname++ = '?'; destleft--; unibuf += 2; unilen -= 2; } } uma_zfree(udf_zone_trans, unip); *destname = '\0'; destlen = MAXNAMLEN - (int)destleft; } else { /* allocate a buffer big enough to hold an 8->16 bit expansion */ transname = uma_zalloc(udf_zone_trans, M_WAITOK); if ((unilen = (ssize_t)udf_UncompressUnicode(len, cs0string, transname)) == -1) { printf("udf: Unicode translation failed\n"); uma_zfree(udf_zone_trans, transname); return 0; } for (i = 0; i < unilen ; i++) { if (transname[i] & 0xff00) { destname[i] = '.'; /* Fudge the 16bit chars */ } else { destname[i] = transname[i] & 0xff; } } uma_zfree(udf_zone_trans, transname); destname[unilen] = 0; destlen = (int)unilen; } return (destlen); } /* * Compare a CS0 dstring with a name passed in from the VFS layer. Return * 0 on a successful match, nonzero otherwise. Unicode work may need to be done * here also. */ static int udf_cmpname(char *cs0string, char *cmpname, int cs0len, int cmplen, struct udf_mnt *udfmp) { char *transname; int error = 0; /* This is overkill, but not worth creating a new zone */ transname = uma_zalloc(udf_zone_trans, M_WAITOK); cs0len = udf_transname(cs0string, transname, cs0len, udfmp); /* Easy check. If they aren't the same length, they aren't equal */ if ((cs0len == 0) || (cs0len != cmplen)) error = -1; else error = bcmp(transname, cmpname, cmplen); uma_zfree(udf_zone_trans, transname); return (error); } struct udf_uiodir { struct dirent *dirent; u_long *cookies; int ncookies; int acookies; int eofflag; }; static int udf_uiodir(struct udf_uiodir *uiodir, int de_size, struct uio *uio, long cookie) { if (uiodir->cookies != NULL) { if (++uiodir->acookies > uiodir->ncookies) { uiodir->eofflag = 0; return (-1); } *uiodir->cookies++ = cookie; } if (uio->uio_resid < de_size) { uiodir->eofflag = 0; return (-1); } return (uiomove(uiodir->dirent, de_size, uio)); } static struct udf_dirstream * udf_opendir(struct udf_node *node, int offset, int fsize, struct udf_mnt *udfmp) { struct udf_dirstream *ds; ds = uma_zalloc(udf_zone_ds, M_WAITOK | M_ZERO); ds->node = node; ds->offset = offset; ds->udfmp = udfmp; ds->fsize = fsize; return (ds); } static struct fileid_desc * udf_getfid(struct udf_dirstream *ds) { struct fileid_desc *fid; int error, frag_size = 0, total_fid_size; /* End of directory? */ if (ds->offset + ds->off >= ds->fsize) { ds->error = 0; return (NULL); } /* Grab the first extent of the directory */ if (ds->off == 0) { ds->size = 0; error = udf_readatoffset(ds->node, &ds->size, ds->offset, &ds->bp, &ds->data); if (error) { ds->error = error; if (ds->bp != NULL) brelse(ds->bp); return (NULL); } } /* * Clean up from a previous fragmented FID. * XXX Is this the right place for this? */ if (ds->fid_fragment && ds->buf != NULL) { ds->fid_fragment = 0; free(ds->buf, M_UDFFID); } fid = (struct fileid_desc*)&ds->data[ds->off]; /* * Check to see if the fid is fragmented. The first test * ensures that we don't wander off the end of the buffer * looking for the l_iu and l_fi fields. */ if (ds->off + UDF_FID_SIZE > ds->size || ds->off + le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE > ds->size){ /* Copy what we have of the fid into a buffer */ frag_size = ds->size - ds->off; if (frag_size >= ds->udfmp->bsize) { printf("udf: invalid FID fragment\n"); ds->error = EINVAL; return (NULL); } /* * File ID descriptors can only be at most one * logical sector in size. */ ds->buf = malloc(ds->udfmp->bsize, M_UDFFID, M_WAITOK | M_ZERO); bcopy(fid, ds->buf, frag_size); /* Reduce all of the casting magic */ fid = (struct fileid_desc*)ds->buf; if (ds->bp != NULL) brelse(ds->bp); /* Fetch the next allocation */ ds->offset += ds->size; ds->size = 0; error = udf_readatoffset(ds->node, &ds->size, ds->offset, &ds->bp, &ds->data); if (error) { ds->error = error; return (NULL); } /* * If the fragment was so small that we didn't get * the l_iu and l_fi fields, copy those in. */ if (frag_size < UDF_FID_SIZE) bcopy(ds->data, &ds->buf[frag_size], UDF_FID_SIZE - frag_size); /* * Now that we have enough of the fid to work with, * copy in the rest of the fid from the new * allocation. */ total_fid_size = UDF_FID_SIZE + le16toh(fid->l_iu) + fid->l_fi; if (total_fid_size > ds->udfmp->bsize) { printf("udf: invalid FID\n"); ds->error = EIO; return (NULL); } bcopy(ds->data, &ds->buf[frag_size], total_fid_size - frag_size); ds->fid_fragment = 1; } else { total_fid_size = le16toh(fid->l_iu) + fid->l_fi + UDF_FID_SIZE; } /* * Update the offset. Align on a 4 byte boundary because the * UDF spec says so. */ ds->this_off = ds->offset + ds->off; if (!ds->fid_fragment) { ds->off += (total_fid_size + 3) & ~0x03; } else { ds->off = (total_fid_size - frag_size + 3) & ~0x03; } return (fid); } static void udf_closedir(struct udf_dirstream *ds) { if (ds->bp != NULL) brelse(ds->bp); if (ds->fid_fragment && ds->buf != NULL) free(ds->buf, M_UDFFID); uma_zfree(udf_zone_ds, ds); } static int udf_readdir(struct vop_readdir_args *a) { struct vnode *vp; struct uio *uio; struct dirent dir; struct udf_node *node; struct udf_mnt *udfmp; struct fileid_desc *fid; struct udf_uiodir uiodir; struct udf_dirstream *ds; u_long *cookies = NULL; int ncookies; int error = 0; vp = a->a_vp; uio = a->a_uio; node = VTON(vp); udfmp = node->udfmp; uiodir.eofflag = 1; if (a->a_ncookies != NULL) { /* * Guess how many entries are needed. If we run out, this * function will be called again and thing will pick up were * it left off. */ ncookies = uio->uio_resid / 8; cookies = malloc(sizeof(u_long) * ncookies, M_TEMP, M_WAITOK); if (cookies == NULL) return (ENOMEM); uiodir.ncookies = ncookies; uiodir.cookies = cookies; uiodir.acookies = 0; } else { uiodir.cookies = NULL; } /* * Iterate through the file id descriptors. Give the parent dir * entry special attention. */ ds = udf_opendir(node, uio->uio_offset, le64toh(node->fentry->inf_len), node->udfmp); while ((fid = udf_getfid(ds)) != NULL) { /* XXX Should we return an error on a bad fid? */ if (udf_checktag(&fid->tag, TAGID_FID)) { printf("Invalid FID tag\n"); hexdump(fid, UDF_FID_SIZE, NULL, 0); error = EIO; break; } /* Is this a deleted file? */ if (fid->file_char & UDF_FILE_CHAR_DEL) continue; if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) { /* Do up the '.' and '..' entries. Dummy values are * used for the cookies since the offset here is * usually zero, and NFS doesn't like that value */ dir.d_fileno = node->hash_id; dir.d_type = DT_DIR; dir.d_name[0] = '.'; dir.d_name[1] = '\0'; dir.d_namlen = 1; dir.d_reclen = GENERIC_DIRSIZ(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, 1); if (error) break; dir.d_fileno = udf_getid(&fid->icb); dir.d_type = DT_DIR; dir.d_name[0] = '.'; dir.d_name[1] = '.'; dir.d_name[2] = '\0'; dir.d_namlen = 2; dir.d_reclen = GENERIC_DIRSIZ(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, 2); } else { dir.d_namlen = udf_transname(&fid->data[fid->l_iu], &dir.d_name[0], fid->l_fi, udfmp); dir.d_fileno = udf_getid(&fid->icb); dir.d_type = (fid->file_char & UDF_FILE_CHAR_DIR) ? DT_DIR : DT_UNKNOWN; dir.d_reclen = GENERIC_DIRSIZ(&dir); uiodir.dirent = &dir; error = udf_uiodir(&uiodir, dir.d_reclen, uio, ds->this_off); } if (error) break; uio->uio_offset = ds->offset + ds->off; } /* tell the calling layer whether we need to be called again */ *a->a_eofflag = uiodir.eofflag; if (error < 0) error = 0; if (!error) error = ds->error; udf_closedir(ds); if (a->a_ncookies != NULL) { if (error) free(cookies, M_TEMP); else { *a->a_ncookies = uiodir.acookies; *a->a_cookies = cookies; } } return (error); } static int udf_readlink(struct vop_readlink_args *ap) { struct path_component *pc, *end; struct vnode *vp; struct uio uio; struct iovec iov[1]; struct udf_node *node; void *buf; char *cp; int error, len, root; /* * A symbolic link in UDF is a list of variable-length path * component structures. We build a pathname in the caller's * uio by traversing this list. */ vp = ap->a_vp; node = VTON(vp); len = le64toh(node->fentry->inf_len); buf = malloc(len, M_DEVBUF, M_WAITOK); iov[0].iov_len = len; iov[0].iov_base = buf; uio.uio_iov = iov; uio.uio_iovcnt = 1; uio.uio_offset = 0; uio.uio_resid = iov[0].iov_len; uio.uio_segflg = UIO_SYSSPACE; uio.uio_rw = UIO_READ; uio.uio_td = curthread; error = VOP_READ(vp, &uio, 0, ap->a_cred); if (error) goto error; pc = buf; end = (void *)((char *)buf + len); root = 0; while (pc < end) { switch (pc->type) { case UDF_PATH_ROOT: /* Only allow this at the beginning of a path. */ if ((void *)pc != buf) { error = EINVAL; goto error; } cp = "/"; len = 1; root = 1; break; case UDF_PATH_DOT: cp = "."; len = 1; break; case UDF_PATH_DOTDOT: cp = ".."; len = 2; break; case UDF_PATH_PATH: if (pc->length == 0) { error = EINVAL; goto error; } /* * XXX: We only support CS8 which appears to map * to ASCII directly. */ switch (pc->identifier[0]) { case 8: cp = pc->identifier + 1; len = pc->length - 1; break; default: error = EOPNOTSUPP; goto error; } break; default: error = EINVAL; goto error; } /* * If this is not the first component, insert a path * separator. */ if (pc != buf) { /* If we started with root we already have a "/". */ if (root) goto skipslash; root = 0; if (ap->a_uio->uio_resid < 1) { error = ENAMETOOLONG; goto error; } error = uiomove("/", 1, ap->a_uio); if (error) break; } skipslash: /* Append string at 'cp' of length 'len' to our path. */ if (len > ap->a_uio->uio_resid) { error = ENAMETOOLONG; goto error; } error = uiomove(cp, len, ap->a_uio); if (error) break; /* Advance to next component. */ pc = (void *)((char *)pc + 4 + pc->length); } error: free(buf, M_DEVBUF); return (error); } static int udf_strategy(struct vop_strategy_args *a) { struct buf *bp; struct vnode *vp; struct udf_node *node; struct bufobj *bo; off_t offset; uint32_t maxsize; daddr_t sector; int error; bp = a->a_bp; vp = a->a_vp; node = VTON(vp); if (bp->b_blkno == bp->b_lblkno) { offset = lblktosize(node->udfmp, bp->b_lblkno); error = udf_bmap_internal(node, offset, §or, &maxsize); if (error) { clrbuf(bp); bp->b_blkno = -1; bufdone(bp); return (0); } /* bmap gives sector numbers, bio works with device blocks */ bp->b_blkno = sector << (node->udfmp->bshift - DEV_BSHIFT); } bo = node->udfmp->im_bo; bp->b_iooffset = dbtob(bp->b_blkno); BO_STRATEGY(bo, bp); return (0); } static int udf_bmap(struct vop_bmap_args *a) { struct udf_node *node; uint32_t max_size; daddr_t lsector; int nblk; int error; node = VTON(a->a_vp); if (a->a_bop != NULL) *a->a_bop = &node->udfmp->im_devvp->v_bufobj; if (a->a_bnp == NULL) return (0); if (a->a_runb) *a->a_runb = 0; /* * UDF_INVALID_BMAP means data embedded into fentry, this is an internal * error that should not be propagated to calling code. * Most obvious mapping for this error is EOPNOTSUPP as we can not truly * translate block numbers in this case. * Incidentally, this return code will make vnode pager to use VOP_READ * to get data for mmap-ed pages and udf_read knows how to do the right * thing for this kind of files. */ error = udf_bmap_internal(node, a->a_bn << node->udfmp->bshift, &lsector, &max_size); if (error == UDF_INVALID_BMAP) return (EOPNOTSUPP); if (error) return (error); /* Translate logical to physical sector number */ *a->a_bnp = lsector << (node->udfmp->bshift - DEV_BSHIFT); /* * Determine maximum number of readahead blocks following the * requested block. */ if (a->a_runp) { nblk = (max_size >> node->udfmp->bshift) - 1; if (nblk <= 0) *a->a_runp = 0; else if (nblk >= (MAXBSIZE >> node->udfmp->bshift)) *a->a_runp = (MAXBSIZE >> node->udfmp->bshift) - 1; else *a->a_runp = nblk; } if (a->a_runb) { *a->a_runb = 0; } return (0); } /* * The all powerful VOP_LOOKUP(). */ static int udf_lookup(struct vop_cachedlookup_args *a) { struct vnode *dvp; struct vnode *tdp = NULL; struct vnode **vpp = a->a_vpp; struct udf_node *node; struct udf_mnt *udfmp; struct fileid_desc *fid = NULL; struct udf_dirstream *ds; u_long nameiop; u_long flags; char *nameptr; long namelen; ino_t id = 0; int offset, error = 0; int fsize, lkflags, ltype, numdirpasses; dvp = a->a_dvp; node = VTON(dvp); udfmp = node->udfmp; nameiop = a->a_cnp->cn_nameiop; flags = a->a_cnp->cn_flags; lkflags = a->a_cnp->cn_lkflags; nameptr = a->a_cnp->cn_nameptr; namelen = a->a_cnp->cn_namelen; fsize = le64toh(node->fentry->inf_len); /* * If this is a LOOKUP and we've already partially searched through * the directory, pick up where we left off and flag that the * directory may need to be searched twice. For a full description, * see /sys/fs/cd9660/cd9660_lookup.c:cd9660_lookup() */ if (nameiop != LOOKUP || node->diroff == 0 || node->diroff > fsize) { offset = 0; numdirpasses = 1; } else { offset = node->diroff; numdirpasses = 2; nchstats.ncs_2passes++; } lookloop: ds = udf_opendir(node, offset, fsize, udfmp); while ((fid = udf_getfid(ds)) != NULL) { /* XXX Should we return an error on a bad fid? */ if (udf_checktag(&fid->tag, TAGID_FID)) { printf("udf_lookup: Invalid tag\n"); error = EIO; break; } /* Is this a deleted file? */ if (fid->file_char & UDF_FILE_CHAR_DEL) continue; if ((fid->l_fi == 0) && (fid->file_char & UDF_FILE_CHAR_PAR)) { if (flags & ISDOTDOT) { id = udf_getid(&fid->icb); break; } } else { if (!(udf_cmpname(&fid->data[fid->l_iu], nameptr, fid->l_fi, namelen, udfmp))) { id = udf_getid(&fid->icb); break; } } } if (!error) error = ds->error; /* XXX Bail out here? */ if (error) { udf_closedir(ds); return (error); } /* Did we have a match? */ if (id) { /* * Remember where this entry was if it's the final * component. */ if ((flags & ISLASTCN) && nameiop == LOOKUP) node->diroff = ds->offset + ds->off; if (numdirpasses == 2) nchstats.ncs_pass2++; udf_closedir(ds); if (flags & ISDOTDOT) { error = vn_vget_ino(dvp, id, lkflags, &tdp); } else if (node->hash_id == id) { VREF(dvp); /* we want ourself, ie "." */ /* * When we lookup "." we still can be asked to lock it * differently. */ ltype = lkflags & LK_TYPE_MASK; if (ltype != VOP_ISLOCKED(dvp)) { if (ltype == LK_EXCLUSIVE) vn_lock(dvp, LK_UPGRADE | LK_RETRY); else /* if (ltype == LK_SHARED) */ vn_lock(dvp, LK_DOWNGRADE | LK_RETRY); } tdp = dvp; } else error = udf_vget(udfmp->im_mountp, id, lkflags, &tdp); if (!error) { *vpp = tdp; /* Put this entry in the cache */ if (flags & MAKEENTRY) cache_enter(dvp, *vpp, a->a_cnp); } } else { /* Name wasn't found on this pass. Do another pass? */ if (numdirpasses == 2) { numdirpasses--; offset = 0; udf_closedir(ds); goto lookloop; } udf_closedir(ds); /* Enter name into cache as non-existant */ if (flags & MAKEENTRY) cache_enter(dvp, *vpp, a->a_cnp); if ((flags & ISLASTCN) && (nameiop == CREATE || nameiop == RENAME)) { error = EROFS; } else { error = ENOENT; } } return (error); } static int udf_reclaim(struct vop_reclaim_args *a) { struct vnode *vp; struct udf_node *unode; vp = a->a_vp; unode = VTON(vp); /* * Destroy the vm object and flush associated pages. */ vnode_destroy_vobject(vp); if (unode != NULL) { vfs_hash_remove(vp); if (unode->fentry != NULL) free(unode->fentry, M_UDFFENTRY); uma_zfree(udf_zone_node, unode); vp->v_data = NULL; } return (0); } static int udf_vptofh(struct vop_vptofh_args *a) { struct udf_node *node; struct ifid *ifhp; node = VTON(a->a_vp); ifhp = (struct ifid *)a->a_fhp; ifhp->ifid_len = sizeof(struct ifid); ifhp->ifid_ino = node->hash_id; return (0); } /* * Read the block and then set the data pointer to correspond with the * offset passed in. Only read in at most 'size' bytes, and then set 'size' * to the number of bytes pointed to. If 'size' is zero, try to read in a * whole extent. * * Note that *bp may be assigned error or not. * */ static int udf_readatoffset(struct udf_node *node, int *size, off_t offset, struct buf **bp, uint8_t **data) { struct udf_mnt *udfmp = node->udfmp; struct vnode *vp = node->i_vnode; struct file_entry *fentry; struct buf *bp1; uint32_t max_size; daddr_t sector; off_t off; int adj_size; int error; /* * This call is made *not* only to detect UDF_INVALID_BMAP case, * max_size is used as an ad-hoc read-ahead hint for "normal" case. */ error = udf_bmap_internal(node, offset, §or, &max_size); if (error == UDF_INVALID_BMAP) { /* * This error means that the file *data* is stored in the * allocation descriptor field of the file entry. */ fentry = node->fentry; *data = &fentry->data[le32toh(fentry->l_ea)]; *size = le32toh(fentry->l_ad); if (offset >= *size) *size = 0; else { *data += offset; *size -= offset; } return (0); } else if (error != 0) { return (error); } /* Adjust the size so that it is within range */ if (*size == 0 || *size > max_size) *size = max_size; /* * Because we will read starting at block boundary, we need to adjust * how much we need to read so that all promised data is in. * Also, we can't promise to read more than MAXBSIZE bytes starting * from block boundary, so adjust what we promise too. */ off = blkoff(udfmp, offset); *size = min(*size, MAXBSIZE - off); adj_size = (*size + off + udfmp->bmask) & ~udfmp->bmask; *bp = NULL; if ((error = bread(vp, lblkno(udfmp, offset), adj_size, NOCRED, bp))) { printf("warning: udf_readlblks returned error %d\n", error); /* note: *bp may be non-NULL */ return (error); } bp1 = *bp; *data = (uint8_t *)&bp1->b_data[offset & udfmp->bmask]; return (0); } /* * Translate a file offset into a logical block and then into a physical * block. * max_size - maximum number of bytes that can be read starting from given * offset, rather than beginning of calculated sector number */ static int udf_bmap_internal(struct udf_node *node, off_t offset, daddr_t *sector, uint32_t *max_size) { struct udf_mnt *udfmp; struct file_entry *fentry; void *icb; struct icb_tag *tag; uint32_t icblen = 0; daddr_t lsector; int ad_offset, ad_num = 0; int i, p_offset; udfmp = node->udfmp; fentry = node->fentry; tag = &fentry->icbtag; switch (le16toh(tag->strat_type)) { case 4: break; case 4096: printf("Cannot deal with strategy4096 yet!\n"); return (ENODEV); default: printf("Unknown strategy type %d\n", tag->strat_type); return (ENODEV); } switch (le16toh(tag->flags) & 0x7) { case 0: /* * The allocation descriptor field is filled with short_ad's. * If the offset is beyond the current extent, look for the * next extent. */ do { offset -= icblen; ad_offset = sizeof(struct short_ad) * ad_num; if (ad_offset > le32toh(fentry->l_ad)) { printf("File offset out of bounds\n"); return (EINVAL); } icb = GETICB(short_ad, fentry, le32toh(fentry->l_ea) + ad_offset); icblen = GETICBLEN(short_ad, icb); ad_num++; } while(offset >= icblen); lsector = (offset >> udfmp->bshift) + le32toh(((struct short_ad *)(icb))->pos); *max_size = icblen - offset; break; case 1: /* * The allocation descriptor field is filled with long_ad's * If the offset is beyond the current extent, look for the * next extent. */ do { offset -= icblen; ad_offset = sizeof(struct long_ad) * ad_num; if (ad_offset > le32toh(fentry->l_ad)) { printf("File offset out of bounds\n"); return (EINVAL); } icb = GETICB(long_ad, fentry, le32toh(fentry->l_ea) + ad_offset); icblen = GETICBLEN(long_ad, icb); ad_num++; } while(offset >= icblen); lsector = (offset >> udfmp->bshift) + le32toh(((struct long_ad *)(icb))->loc.lb_num); *max_size = icblen - offset; break; case 3: /* * This type means that the file *data* is stored in the * allocation descriptor field of the file entry. */ *max_size = 0; *sector = node->hash_id + udfmp->part_start; return (UDF_INVALID_BMAP); case 2: /* DirectCD does not use extended_ad's */ default: printf("Unsupported allocation descriptor %d\n", tag->flags & 0x7); return (ENODEV); } *sector = lsector + udfmp->part_start; /* * Check the sparing table. Each entry represents the beginning of * a packet. */ if (udfmp->s_table != NULL) { for (i = 0; i< udfmp->s_table_entries; i++) { p_offset = lsector - le32toh(udfmp->s_table->entries[i].org); if ((p_offset < udfmp->p_sectors) && (p_offset >= 0)) { *sector = le32toh(udfmp->s_table->entries[i].map) + p_offset; break; } } } return (0); }