Index: lib/libkvm/kvm_proc.c =================================================================== --- lib/libkvm/kvm_proc.c +++ lib/libkvm/kvm_proc.c @@ -221,6 +221,7 @@ kp->ki_tracep = proc.p_tracevp; kp->ki_textvp = proc.p_textvp; kp->ki_fd = proc.p_fd; + kp->ki_pd = proc.p_pd; kp->ki_vmspace = proc.p_vmspace; if (proc.p_sigacts != NULL) { if (KREAD(kd, (u_long)proc.p_sigacts, &sigacts)) { Index: lib/libprocstat/libprocstat.c =================================================================== --- lib/libprocstat/libprocstat.c +++ lib/libprocstat/libprocstat.c @@ -460,6 +460,7 @@ { struct file file; struct filedesc filed; + struct pwddesc pathsd; struct fdescenttbl *fdt; struct pwd pwd; unsigned long pwd_addr; @@ -484,15 +485,20 @@ kd = procstat->kd; if (kd == NULL) return (NULL); - if (kp->ki_fd == NULL) + if (kp->ki_fd == NULL || kp->ki_pd == NULL) return (NULL); if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &filed, sizeof(filed))) { warnx("can't read filedesc at %p", (void *)kp->ki_fd); return (NULL); } + if (!kvm_read_all(kd, (unsigned long)kp->ki_pd, &pathsd, + sizeof(pathsd))) { + warnx("can't read pwddesc at %p", (void *)kp->ki_pd); + return (NULL); + } haspwd = false; - pwd_addr = (unsigned long)(FILEDESC_KVM_LOAD_PWD(&filed)); + pwd_addr = (unsigned long)(PWDDESC_KVM_LOAD_PWD(&pathsd)); if (pwd_addr != 0) { if (!kvm_read_all(kd, pwd_addr, &pwd, sizeof(pwd))) { warnx("can't read fd_pwd at %p", (void *)pwd_addr); @@ -2086,18 +2092,18 @@ static int procstat_getumask_kvm(kvm_t *kd, struct kinfo_proc *kp, unsigned short *maskp) { - struct filedesc fd; + struct pwddesc pd; assert(kd != NULL); assert(kp != NULL); - if (kp->ki_fd == NULL) + if (kp->ki_pd == NULL) return (-1); - if (!kvm_read_all(kd, (unsigned long)kp->ki_fd, &fd, sizeof(fd))) { - warnx("can't read filedesc at %p for pid %d", kp->ki_fd, + if (!kvm_read_all(kd, (unsigned long)kp->ki_pd, &pd, sizeof(pd))) { + warnx("can't read pwddesc at %p for pid %d", kp->ki_pd, kp->ki_pid); return (-1); } - *maskp = fd.fd_cmask; + *maskp = pd.pd_cmask; return (0); } Index: sys/compat/cloudabi/cloudabi_file.c =================================================================== --- sys/compat/cloudabi/cloudabi_file.c +++ sys/compat/cloudabi/cloudabi_file.c @@ -265,7 +265,7 @@ } NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, path, uap->dirfd.fd, &rights, td); - error = vn_open(&nd, &fflags, 0777 & ~td->td_proc->p_fd->fd_cmask, fp); + error = vn_open(&nd, &fflags, 0777 & ~td->td_proc->p_pd->pd_cmask, fp); cloudabi_freestr(path); if (error != 0) { /* Custom operations provided. */ Index: sys/fs/fuse/fuse_internal.c =================================================================== --- sys/fs/fuse/fuse_internal.c +++ sys/fs/fuse/fuse_internal.c @@ -497,7 +497,7 @@ fmni.rdev = vap->va_rdev; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(fmni); - fmni.umask = curthread->td_proc->p_fd->fd_cmask; + fmni.umask = curthread->td_proc->p_pd->pd_cmask; } else { insize = FUSE_COMPAT_MKNOD_IN_SIZE; } Index: sys/fs/fuse/fuse_vnops.c =================================================================== --- sys/fs/fuse/fuse_vnops.c +++ sys/fs/fuse/fuse_vnops.c @@ -668,7 +668,7 @@ fci->flags = O_CREAT | flags; if (fuse_libabi_geq(data, 7, 12)) { insize = sizeof(*fci); - fci->umask = td->td_proc->p_fd->fd_cmask; + fci->umask = td->td_proc->p_pd->pd_cmask; } else { insize = sizeof(struct fuse_open_in); } @@ -1269,7 +1269,7 @@ return ENXIO; } fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); - fmdi.umask = curthread->td_proc->p_fd->fd_cmask; + fmdi.umask = curthread->td_proc->p_pd->pd_cmask; return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, sizeof(fmdi), VDIR)); Index: sys/fs/unionfs/union_subr.c =================================================================== --- sys/fs/unionfs/union_subr.c +++ sys/fs/unionfs/union_subr.c @@ -486,7 +486,7 @@ } break; default: /* UNIONFS_TRADITIONAL */ - uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask; + uva->va_mode = 0777 & ~td->td_proc->p_pd->pd_cmask; uva->va_uid = ump->um_uid; uva->va_gid = ump->um_gid; break; Index: sys/kern/imgact_elf.c =================================================================== --- sys/kern/imgact_elf.c +++ sys/kern/imgact_elf.c @@ -2507,12 +2507,12 @@ int structsize; p = (struct proc *)arg; - size = sizeof(structsize) + sizeof(p->p_fd->fd_cmask); + size = sizeof(structsize) + sizeof(p->p_pd->pd_cmask); if (sb != NULL) { KASSERT(*sizep == size, ("invalid size")); - structsize = sizeof(p->p_fd->fd_cmask); + structsize = sizeof(p->p_pd->pd_cmask); sbuf_bcat(sb, &structsize, sizeof(structsize)); - sbuf_bcat(sb, &p->p_fd->fd_cmask, sizeof(p->p_fd->fd_cmask)); + sbuf_bcat(sb, &p->p_pd->pd_cmask, sizeof(p->p_pd->pd_cmask)); } *sizep = size; } Index: sys/kern/init_main.c =================================================================== --- sys/kern/init_main.c +++ sys/kern/init_main.c @@ -556,6 +556,7 @@ siginit(&proc0); /* Create the file descriptor table. */ + p->p_pd = pdinit(NULL, false); p->p_fd = fdinit(NULL, false, NULL); p->p_fdtol = NULL; Index: sys/kern/kern_descrip.c =================================================================== --- sys/kern/kern_descrip.c +++ sys/kern/kern_descrip.c @@ -93,6 +93,7 @@ static MALLOC_DEFINE(M_FILEDESC, "filedesc", "Open file descriptor table"); static MALLOC_DEFINE(M_PWD, "pwd", "Descriptor table vnodes"); +static MALLOC_DEFINE(M_PWDDESC, "pwddesc", "Pwd descriptors"); static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "filedesc_to_leader", "file desc to leader structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); @@ -2019,7 +2020,6 @@ /* * Build a new filedesc structure from another. - * Copy the current, root, and jail root vnode references. * * If fdp is not NULL, return with it shared locked. */ @@ -2028,7 +2028,6 @@ { struct filedesc0 *newfdp0; struct filedesc *newfdp; - struct pwd *newpwd; if (prepfiles) MPASS(lastfile != NULL); @@ -2042,20 +2041,14 @@ FILEDESC_LOCK_INIT(newfdp); refcount_init(&newfdp->fd_refcnt, 1); refcount_init(&newfdp->fd_holdcnt, 1); - newfdp->fd_cmask = CMASK; newfdp->fd_map = newfdp0->fd_dmap; newfdp->fd_files = (struct fdescenttbl *)&newfdp0->fd_dfiles; newfdp->fd_files->fdt_nfiles = NDFILE; - if (fdp == NULL) { - newpwd = pwd_alloc(); - smr_serialized_store(&newfdp->fd_pwd, newpwd, true); + if (fdp == NULL) return (newfdp); - } FILEDESC_SLOCK(fdp); - newpwd = pwd_hold_filedesc(fdp); - smr_serialized_store(&newfdp->fd_pwd, newpwd, true); if (!prepfiles) { FILEDESC_SUNLOCK(fdp); return (newfdp); @@ -2073,6 +2066,38 @@ return (newfdp); } +/* + * Build a pwddesc structure from another. + * Copy the current, root, and jail root vnode references. + * + * If pdp is not NULL, return with it shared locked. + */ +struct pwddesc * +pdinit(struct pwddesc *pdp, bool keeplock) +{ + struct pwddesc *newpdp; + struct pwd *newpwd; + + newpdp = malloc(sizeof(*newpdp), M_PWDDESC, M_WAITOK | M_ZERO); + + PWDDESC_LOCK_INIT(newpdp); + refcount_init(&newpdp->pd_refcount, 1); + newpdp->pd_cmask = CMASK; + + if (pdp == NULL) { + newpwd = pwd_alloc(); + smr_serialized_store(&newpdp->pd_pwd, newpwd, true); + return (newpdp); + } + + PWDDESC_XLOCK(pdp); + newpwd = pwd_hold_pwddesc(pdp); + smr_serialized_store(&newpdp->pd_pwd, newpwd, true); + if (!keeplock) + PWDDESC_XUNLOCK(pdp); + return (newpdp); +} + static struct filedesc * fdhold(struct proc *p) { @@ -2085,6 +2110,18 @@ return (fdp); } +static struct pwddesc * +pdhold(struct proc *p) +{ + struct pwddesc *pdp; + + PROC_LOCK_ASSERT(p, MA_OWNED); + pdp = p->p_pd; + if (pdp != NULL) + refcount_acquire(&pdp->pd_refcount); + return (pdp); +} + static void fddrop(struct filedesc *fdp) { @@ -2098,6 +2135,28 @@ uma_zfree(filedesc0_zone, fdp); } +static void +pddrop(struct pwddesc *pdp) +{ + struct pwd *pwd; + + if (refcount_release_if_not_last(&pdp->pd_refcount)) + return; + + PWDDESC_XLOCK(pdp); + if (refcount_release(&pdp->pd_refcount) == 0) { + PWDDESC_XUNLOCK(pdp); + return; + } + pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); + pwd_set(pdp, NULL); + PWDDESC_XUNLOCK(pdp); + pwd_drop(pwd); + + PWDDESC_LOCK_DESTROY(pdp); + free(pdp, M_PWDDESC); +} + /* * Share a filedesc structure. */ @@ -2109,6 +2168,16 @@ return (fdp); } +/* + * Share a pwddesc structure. + */ +struct pwddesc * +pdshare(struct pwddesc *pdp) +{ + refcount_acquire(&pdp->pd_refcount); + return (pdp); +} + /* * Unshare a filedesc structure, if necessary by making a copy */ @@ -2126,6 +2195,25 @@ p->p_fd = tmp; } +/* + * Unshare a pwddesc structure. + */ +void +pdunshare(struct thread *td) +{ + struct pwddesc *pdp; + struct proc *p; + + p = td->td_proc; + /* Not shared. */ + if (p->p_pd->pd_refcount == 1) + return; + + pdp = pdcopy(p->p_pd); + pdescfree(td); + p->p_pd = pdp; +} + void fdinstall_remapped(struct thread *td, struct filedesc *fdp) { @@ -2166,11 +2254,26 @@ } if (newfdp->fd_freefile == -1) newfdp->fd_freefile = i; - newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); return (newfdp); } +/* + * Copy a pwddesc structure. + */ +struct pwddesc * +pdcopy(struct pwddesc *pdp) +{ + struct pwddesc *newpdp; + + MPASS(pdp != NULL); + + newpdp = pdinit(pdp, true); + newpdp->pd_cmask = pdp->pd_cmask; + PWDDESC_XUNLOCK(pdp); + return (newpdp); +} + /* * Copies a filedesc structure, while remapping all file descriptors * stored inside using a translation table. @@ -2222,7 +2325,6 @@ filecaps_copy(&ofde->fde_caps, &nfde->fde_caps, true); fdused_init(newfdp, i); } - newfdp->fd_cmask = fdp->fd_cmask; FILEDESC_SUNLOCK(fdp); *ret = newfdp; return (0); @@ -2356,7 +2458,6 @@ { struct proc *p; struct filedesc *fdp; - struct pwd *pwd; p = td->td_proc; fdp = p->p_fd; @@ -2377,21 +2478,29 @@ if (refcount_release(&fdp->fd_refcnt) == 0) return; - FILEDESC_XLOCK(fdp); - pwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); - pwd_set(fdp, NULL); - FILEDESC_XUNLOCK(fdp); - - pwd_drop(pwd); - fdescfree_fds(td, fdp, 1); } +void +pdescfree(struct thread *td) +{ + struct proc *p; + struct pwddesc *pdp; + + p = td->td_proc; + pdp = p->p_pd; + MPASS(pdp != NULL); + + PROC_LOCK(p); + p->p_pd = NULL; + PROC_UNLOCK(p); + + pddrop(pdp); +} + void fdescfree_remapped(struct filedesc *fdp) { - - pwd_drop(smr_serialized_load(&fdp->fd_pwd, true)); fdescfree_fds(curthread, fdp, 0); } @@ -3442,12 +3551,12 @@ } struct pwd * -pwd_hold_filedesc(struct filedesc *fdp) +pwd_hold_pwddesc(struct pwddesc *pdp) { struct pwd *pwd; - FILEDESC_LOCK_ASSERT(fdp); - pwd = FILEDESC_LOCKED_LOAD_PWD(fdp); + PWDDESC_ASSERT_XLOCKED(pdp); + pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (pwd != NULL) refcount_acquire(&pwd->pwd_refcount); return (pwd); @@ -3467,22 +3576,22 @@ struct pwd * pwd_hold(struct thread *td) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *pwd; - fdp = td->td_proc->p_fd; + pdp = td->td_proc->p_pd; vfs_smr_enter(); - pwd = vfs_smr_entered_load(&fdp->fd_pwd); + pwd = vfs_smr_entered_load(&pdp->pd_pwd); if (pwd_hold_smr(pwd)) { vfs_smr_exit(); return (pwd); } vfs_smr_exit(); - FILEDESC_SLOCK(fdp); - pwd = pwd_hold_filedesc(fdp); + PWDDESC_XLOCK(pdp); + pwd = pwd_hold_pwddesc(pdp); MPASS(pwd != NULL); - FILEDESC_SUNLOCK(fdp); + PWDDESC_XUNLOCK(pdp); return (pwd); } @@ -3491,7 +3600,7 @@ { struct pwd *pwd; - pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd); + pwd = vfs_smr_entered_load(&curproc->p_pd->pd_pwd); MPASS(pwd != NULL); return (pwd); } @@ -3531,23 +3640,29 @@ int pwd_chroot(struct thread *td, struct vnode *vp) { + struct pwddesc *pdp; struct filedesc *fdp; struct pwd *newpwd, *oldpwd; int error; fdp = td->td_proc->p_fd; + pdp = td->td_proc->p_pd; newpwd = pwd_alloc(); - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + FILEDESC_SLOCK(fdp); + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (chroot_allow_open_directories == 0 || (chroot_allow_open_directories == 1 && oldpwd->pwd_rdir != rootvnode)) { error = chroot_refuse_vdir_fds(fdp); + FILEDESC_SUNLOCK(fdp); if (error != 0) { - FILEDESC_XUNLOCK(fdp); + PWDDESC_XUNLOCK(pdp); pwd_drop(newpwd); return (error); } + } else { + FILEDESC_SUNLOCK(fdp); } vrefact(vp); @@ -3557,8 +3672,8 @@ newpwd->pwd_jdir = vp; } pwd_fill(oldpwd, newpwd); - pwd_set(fdp, newpwd); - FILEDESC_XUNLOCK(fdp); + pwd_set(pdp, newpwd); + PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); return (0); } @@ -3566,40 +3681,40 @@ void pwd_chdir(struct thread *td, struct vnode *vp) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *newpwd, *oldpwd; VNPASS(vp->v_usecount > 0, vp); newpwd = pwd_alloc(); - fdp = td->td_proc->p_fd; - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + pdp = td->td_proc->p_pd; + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); newpwd->pwd_cdir = vp; pwd_fill(oldpwd, newpwd); - pwd_set(fdp, newpwd); - FILEDESC_XUNLOCK(fdp); + pwd_set(pdp, newpwd); + PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } void pwd_ensure_dirs(void) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *oldpwd, *newpwd; - fdp = curproc->p_fd; - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + pdp = curproc->p_pd; + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (oldpwd->pwd_cdir != NULL && oldpwd->pwd_rdir != NULL) { - FILEDESC_XUNLOCK(fdp); + PWDDESC_XUNLOCK(pdp); return; } - FILEDESC_XUNLOCK(fdp); + PWDDESC_XUNLOCK(pdp); newpwd = pwd_alloc(); - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); pwd_fill(oldpwd, newpwd); if (newpwd->pwd_cdir == NULL) { vrefact(rootvnode); @@ -3609,29 +3724,29 @@ vrefact(rootvnode); newpwd->pwd_rdir = rootvnode; } - pwd_set(fdp, newpwd); - FILEDESC_XUNLOCK(fdp); + pwd_set(pdp, newpwd); + PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } void pwd_set_rootvnode(void) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *oldpwd, *newpwd; - fdp = curproc->p_fd; + pdp = curproc->p_pd; newpwd = pwd_alloc(); - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); vrefact(rootvnode); newpwd->pwd_cdir = rootvnode; vrefact(rootvnode); newpwd->pwd_rdir = rootvnode; pwd_fill(oldpwd, newpwd); - pwd_set(fdp, newpwd); - FILEDESC_XUNLOCK(fdp); + pwd_set(pdp, newpwd); + PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); } @@ -3642,7 +3757,7 @@ void mountcheckdirs(struct vnode *olddp, struct vnode *newdp) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *newpwd, *oldpwd; struct prison *pr; struct proc *p; @@ -3655,18 +3770,18 @@ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - fdp = fdhold(p); + pdp = pdhold(p); PROC_UNLOCK(p); - if (fdp == NULL) + if (pdp == NULL) continue; - FILEDESC_XLOCK(fdp); - oldpwd = FILEDESC_XLOCKED_LOAD_PWD(fdp); + PWDDESC_XLOCK(pdp); + oldpwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); if (oldpwd == NULL || (oldpwd->pwd_cdir != olddp && oldpwd->pwd_rdir != olddp && oldpwd->pwd_jdir != olddp)) { - FILEDESC_XUNLOCK(fdp); - fddrop(fdp); + PWDDESC_XUNLOCK(pdp); + pddrop(pdp); continue; } if (oldpwd->pwd_cdir == olddp) { @@ -3682,10 +3797,10 @@ newpwd->pwd_jdir = newdp; } pwd_fill(oldpwd, newpwd); - pwd_set(fdp, newpwd); - FILEDESC_XUNLOCK(fdp); + pwd_set(pdp, newpwd); + PWDDESC_XUNLOCK(pdp); pwd_drop(oldpwd); - fddrop(fdp); + pddrop(pdp); newpwd = pwd_alloc(); } sx_sunlock(&allproc_lock); @@ -3958,6 +4073,7 @@ struct export_fd_buf { struct filedesc *fdp; + struct pwddesc *pdp; struct sbuf *sb; ssize_t remainder; struct kinfo_file kif; @@ -4005,12 +4121,12 @@ if (efbuf->remainder == 0) return (0); - if (efbuf->fdp != NULL) - FILEDESC_SUNLOCK(efbuf->fdp); + if (efbuf->pdp != NULL) + PWDDESC_XUNLOCK(efbuf->pdp); export_vnode_to_kinfo(vp, fd, fflags, &efbuf->kif, efbuf->flags); error = export_kinfo_to_sb(efbuf); - if (efbuf->fdp != NULL) - FILEDESC_SLOCK(efbuf->fdp); + if (efbuf->pdp != NULL) + PWDDESC_XLOCK(efbuf->pdp); return (error); } @@ -4025,6 +4141,7 @@ { struct file *fp; struct filedesc *fdp; + struct pwddesc *pdp; struct export_fd_buf *efbuf; struct vnode *cttyvp, *textvp, *tracevp; struct pwd *pwd; @@ -4049,9 +4166,11 @@ vrefact(cttyvp); } fdp = fdhold(p); + pdp = pdhold(p); PROC_UNLOCK(p); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); efbuf->fdp = NULL; + efbuf->pdp = NULL; efbuf->sb = sb; efbuf->remainder = maxlen; efbuf->flags = flags; @@ -4064,11 +4183,12 @@ export_vnode_to_sb(cttyvp, KF_FD_TYPE_CTTY, FREAD | FWRITE, efbuf); error = 0; - if (fdp == NULL) + if (pdp == NULL || fdp == NULL) goto fail; efbuf->fdp = fdp; - FILEDESC_SLOCK(fdp); - pwd = pwd_hold_filedesc(fdp); + efbuf->pdp = pdp; + PWDDESC_XLOCK(pdp); + pwd = pwd_hold_pwddesc(pdp); if (pwd != NULL) { /* working directory */ if (pwd->pwd_cdir != NULL) { @@ -4086,6 +4206,10 @@ export_vnode_to_sb(pwd->pwd_jdir, KF_FD_TYPE_JAIL, FREAD, efbuf); } } + PWDDESC_XUNLOCK(pdp); + if (pwd != NULL) + pwd_drop(pwd); + FILEDESC_SLOCK(fdp); lastfile = fdlastfile(fdp); for (i = 0; fdp->fd_refcnt > 0 && i <= lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) @@ -4106,10 +4230,11 @@ break; } FILEDESC_SUNLOCK(fdp); - if (pwd != NULL) - pwd_drop(pwd); - fddrop(fdp); fail: + if (fdp != NULL) + fddrop(fdp); + if (pdp != NULL) + pddrop(pdp); free(efbuf, M_TEMP); return (error); } @@ -4180,16 +4305,16 @@ static int export_vnode_for_osysctl(struct vnode *vp, int type, struct kinfo_file *kif, - struct kinfo_ofile *okif, struct filedesc *fdp, struct sysctl_req *req) + struct kinfo_ofile *okif, struct pwddesc *pdp, struct sysctl_req *req) { int error; vrefact(vp); - FILEDESC_SUNLOCK(fdp); + PWDDESC_XUNLOCK(pdp); export_vnode_to_kinfo(vp, type, 0, kif, KERN_FILEDESC_PACK_KINFO); kinfo_to_okinfo(kif, okif); error = SYSCTL_OUT(req, okif, sizeof(*okif)); - FILEDESC_SLOCK(fdp); + PWDDESC_XLOCK(pdp); return (error); } @@ -4202,6 +4327,7 @@ struct kinfo_ofile *okif; struct kinfo_file *kif; struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *pwd; int error, i, lastfile, *name; struct file *fp; @@ -4212,24 +4338,33 @@ if (error != 0) return (error); fdp = fdhold(p); + if (fdp != NULL) + pdp = pdhold(p); PROC_UNLOCK(p); - if (fdp == NULL) + if (fdp == NULL || pdp == NULL) { + if (fdp != NULL) + fddrop(fdp); return (ENOENT); + } kif = malloc(sizeof(*kif), M_TEMP, M_WAITOK); okif = malloc(sizeof(*okif), M_TEMP, M_WAITOK); - FILEDESC_SLOCK(fdp); - pwd = pwd_hold_filedesc(fdp); + PWDDESC_XLOCK(pdp); + pwd = pwd_hold_pwddesc(pdp); if (pwd != NULL) { if (pwd->pwd_cdir != NULL) export_vnode_for_osysctl(pwd->pwd_cdir, KF_FD_TYPE_CWD, kif, - okif, fdp, req); + okif, pdp, req); if (pwd->pwd_rdir != NULL) export_vnode_for_osysctl(pwd->pwd_rdir, KF_FD_TYPE_ROOT, kif, - okif, fdp, req); + okif, pdp, req); if (pwd->pwd_jdir != NULL) export_vnode_for_osysctl(pwd->pwd_jdir, KF_FD_TYPE_JAIL, kif, - okif, fdp, req); + okif, pdp, req); } + PWDDESC_XUNLOCK(pdp); + if (pwd != NULL) + pwd_drop(pwd); + FILEDESC_SLOCK(fdp); lastfile = fdlastfile(fdp); for (i = 0; fdp->fd_refcnt > 0 && i <= lastfile; i++) { if ((fp = fdp->fd_ofiles[i].fde_file) == NULL) @@ -4244,9 +4379,8 @@ break; } FILEDESC_SUNLOCK(fdp); - if (pwd != NULL) - pwd_drop(pwd); fddrop(fdp); + pddrop(pdp); free(kif, M_TEMP); free(okif, M_TEMP); return (0); @@ -4298,7 +4432,7 @@ int kern_proc_cwd_out(struct proc *p, struct sbuf *sb, ssize_t maxlen) { - struct filedesc *fdp; + struct pwddesc *pdp; struct pwd *pwd; struct export_fd_buf *efbuf; struct vnode *cdir; @@ -4306,18 +4440,18 @@ PROC_LOCK_ASSERT(p, MA_OWNED); - fdp = fdhold(p); + pdp = pdhold(p); PROC_UNLOCK(p); - if (fdp == NULL) + if (pdp == NULL) return (EINVAL); efbuf = malloc(sizeof(*efbuf), M_TEMP, M_WAITOK); - efbuf->fdp = fdp; + efbuf->pdp = pdp; efbuf->sb = sb; efbuf->remainder = maxlen; - FILEDESC_SLOCK(fdp); - pwd = FILEDESC_LOCKED_LOAD_PWD(fdp); + PWDDESC_XLOCK(pdp); + pwd = PWDDESC_XLOCKED_LOAD_PWD(pdp); cdir = pwd->pwd_cdir; if (cdir == NULL) { error = EINVAL; @@ -4325,8 +4459,8 @@ vrefact(cdir); error = export_vnode_to_sb(cdir, KF_FD_TYPE_CWD, FREAD, efbuf); } - FILEDESC_SUNLOCK(fdp); - fddrop(fdp); + PWDDESC_XUNLOCK(pdp); + pddrop(pdp); free(efbuf, M_TEMP); return (error); } Index: sys/kern/kern_exec.c =================================================================== --- sys/kern/kern_exec.c +++ sys/kern/kern_exec.c @@ -700,6 +700,7 @@ * cannot be shared after an exec. */ fdunshare(td); + pdunshare(td); /* close files on exec */ fdcloseexec(td); } Index: sys/kern/kern_exit.c =================================================================== --- sys/kern/kern_exit.c +++ sys/kern/kern_exit.c @@ -366,6 +366,7 @@ * Close open files and release open-file table. * This may block! */ + pdescfree(td); fdescfree(td); /* Index: sys/kern/kern_fork.c =================================================================== --- sys/kern/kern_fork.c +++ sys/kern/kern_fork.c @@ -332,16 +332,22 @@ */ if (flags & RFCFDG) { struct filedesc *fdtmp; + struct pwddesc *pdtmp; + pdtmp = pdinit(td->td_proc->p_pd, false); fdtmp = fdinit(td->td_proc->p_fd, false, NULL); + pdescfree(td); fdescfree(td); p1->p_fd = fdtmp; + p1->p_pd = pdtmp; } /* * Unshare file descriptors (from parent). */ - if (flags & RFFDG) + if (flags & RFFDG) { fdunshare(td); + pdunshare(td); + } fail: if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && @@ -360,6 +366,7 @@ struct proc *p1, *pptr; struct filedesc *fd; struct filedesc_to_leader *fdtol; + struct pwddesc *pd; struct sigacts *newsigacts; p1 = td->td_proc; @@ -403,12 +410,15 @@ * Copy filedesc. */ if (fr->fr_flags & RFCFDG) { + pd = pdinit(p1->p_pd, false); fd = fdinit(p1->p_fd, false, NULL); fdtol = NULL; } else if (fr->fr_flags & RFFDG) { + pd = pdcopy(p1->p_pd); fd = fdcopy(p1->p_fd); fdtol = NULL; } else { + pd = pdshare(p1->p_pd); fd = fdshare(p1->p_fd); if (p1->p_fdtol == NULL) p1->p_fdtol = filedesc_to_leader_alloc(NULL, NULL, @@ -498,6 +508,7 @@ p2->p_textvp = p1->p_textvp; p2->p_fd = fd; p2->p_fdtol = fdtol; + p2->p_pd = pd; if (p1->p_flag2 & P2_INHERIT_PROTECTED) { p2->p_flag |= P_PROTECTED; Index: sys/kern/kern_proc.c =================================================================== --- sys/kern/kern_proc.c +++ sys/kern/kern_proc.c @@ -1143,6 +1143,7 @@ kp->ki_traceflag = p->p_traceflag; #endif kp->ki_fd = p->p_fd; + kp->ki_pd = p->p_pd; kp->ki_vmspace = p->p_vmspace; kp->ki_flag = p->p_flag; kp->ki_flag2 = p->p_flag2; @@ -2967,7 +2968,7 @@ u_int namelen = arg2; struct proc *p; int error; - u_short fd_cmask; + u_short cmask; pid_t pid; if (namelen != 1) @@ -2976,7 +2977,7 @@ pid = (pid_t)name[0]; p = curproc; if (pid == p->p_pid || pid == 0) { - fd_cmask = p->p_fd->fd_cmask; + cmask = p->p_pd->pd_cmask; goto out; } @@ -2984,10 +2985,10 @@ if (error != 0) return (error); - fd_cmask = p->p_fd->fd_cmask; + cmask = p->p_pd->pd_cmask; PRELE(p); out: - error = SYSCTL_OUT(req, &fd_cmask, sizeof(fd_cmask)); + error = SYSCTL_OUT(req, &cmask, sizeof(cmask)); return (error); } Index: sys/kern/kern_thread.c =================================================================== --- sys/kern/kern_thread.c +++ sys/kern/kern_thread.c @@ -86,15 +86,15 @@ "struct thread KBI td_frame"); _Static_assert(offsetof(struct thread, td_emuldata) == 0x6b0, "struct thread KBI td_emuldata"); -_Static_assert(offsetof(struct proc, p_flag) == 0xb0, +_Static_assert(offsetof(struct proc, p_flag) == 0xb8, "struct proc KBI p_flag"); -_Static_assert(offsetof(struct proc, p_pid) == 0xbc, +_Static_assert(offsetof(struct proc, p_pid) == 0xc4, "struct proc KBI p_pid"); -_Static_assert(offsetof(struct proc, p_filemon) == 0x3b8, +_Static_assert(offsetof(struct proc, p_filemon) == 0x3c0, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x3d0, +_Static_assert(offsetof(struct proc, p_comm) == 0x3d8, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x4b0, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x4b8, "struct proc KBI p_emuldata"); #endif #ifdef __i386__ @@ -106,15 +106,15 @@ "struct thread KBI td_frame"); _Static_assert(offsetof(struct thread, td_emuldata) == 0x344, "struct thread KBI td_emuldata"); -_Static_assert(offsetof(struct proc, p_flag) == 0x68, +_Static_assert(offsetof(struct proc, p_flag) == 0x6c, "struct proc KBI p_flag"); -_Static_assert(offsetof(struct proc, p_pid) == 0x74, +_Static_assert(offsetof(struct proc, p_pid) == 0x78, "struct proc KBI p_pid"); -_Static_assert(offsetof(struct proc, p_filemon) == 0x268, +_Static_assert(offsetof(struct proc, p_filemon) == 0x26c, "struct proc KBI p_filemon"); -_Static_assert(offsetof(struct proc, p_comm) == 0x27c, +_Static_assert(offsetof(struct proc, p_comm) == 0x280, "struct proc KBI p_comm"); -_Static_assert(offsetof(struct proc, p_emuldata) == 0x308, +_Static_assert(offsetof(struct proc, p_emuldata) == 0x30c, "struct proc KBI p_emuldata"); #endif Index: sys/kern/uipc_mqueue.c =================================================================== --- sys/kern/uipc_mqueue.c +++ sys/kern/uipc_mqueue.c @@ -2011,7 +2011,7 @@ { char path[MQFS_NAMELEN + 1]; struct mqfs_node *pn; - struct filedesc *fdp; + struct pwddesc *pdp; struct file *fp; struct mqueue *mq; int fd, error, len, cmode; @@ -2019,8 +2019,8 @@ AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); - fdp = td->td_proc->p_fd; - cmode = (((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT); + pdp = td->td_proc->p_pd; + cmode = (((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT); mq = NULL; if ((flags & O_CREAT) != 0 && attr != NULL) { if (attr->mq_maxmsg <= 0 || attr->mq_maxmsg > maxmsg) Index: sys/kern/uipc_sem.c =================================================================== --- sys/kern/uipc_sem.c +++ sys/kern/uipc_sem.c @@ -465,7 +465,7 @@ ksem_create(struct thread *td, const char *name, semid_t *semidp, mode_t mode, unsigned int value, int flags, int compat32) { - struct filedesc *fdp; + struct pwddesc *pdp; struct ksem *ks; struct file *fp; char *path; @@ -481,8 +481,8 @@ if (value > SEM_VALUE_MAX) return (EINVAL); - fdp = td->td_proc->p_fd; - mode = (mode & ~fdp->fd_cmask) & ACCESSPERMS; + pdp = td->td_proc->p_pd; + mode = (mode & ~pdp->pd_cmask) & ACCESSPERMS; error = falloc(td, &fp, &fd, O_CLOEXEC); if (error) { if (name == NULL) Index: sys/kern/uipc_shm.c =================================================================== --- sys/kern/uipc_shm.c +++ sys/kern/uipc_shm.c @@ -1031,7 +1031,7 @@ kern_shm_open2(struct thread *td, const char *userpath, int flags, mode_t mode, int shmflags, struct filecaps *fcaps, const char *name __unused) { - struct filedesc *fdp; + struct pwddesc *pdp; struct shmfd *shmfd; struct file *fp; char *path; @@ -1081,8 +1081,8 @@ if ((initial_seals & ~F_SEAL_SEAL) != 0) return (EINVAL); - fdp = td->td_proc->p_fd; - cmode = (mode & ~fdp->fd_cmask) & ACCESSPERMS; + pdp = td->td_proc->p_pd; + cmode = (mode & ~pdp->pd_cmask) & ACCESSPERMS; /* * shm_open(2) created shm should always have O_CLOEXEC set, as mandated Index: sys/kern/uipc_usrreq.c =================================================================== --- sys/kern/uipc_usrreq.c +++ sys/kern/uipc_usrreq.c @@ -660,7 +660,7 @@ } VATTR_NULL(&vattr); vattr.va_type = VSOCK; - vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); + vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_pd->pd_cmask); #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); Index: sys/kern/vfs_syscalls.c =================================================================== --- sys/kern/vfs_syscalls.c +++ sys/kern/vfs_syscalls.c @@ -1098,7 +1098,8 @@ int flags, int mode) { struct proc *p = td->td_proc; - struct filedesc *fdp = p->p_fd; + struct filedesc *fdp; + struct pwddesc *pdp; struct file *fp; struct vnode *vp; struct nameidata nd; @@ -1106,6 +1107,8 @@ int cmode, error, indx; indx = -1; + fdp = p->p_fd; + pdp = p->p_pd; AUDIT_ARG_FFLAGS(flags); AUDIT_ARG_MODE(mode); @@ -1137,7 +1140,7 @@ */ /* Set the flags early so the finit in devfs can pick them up. */ fp->f_flag = flags & FMASK; - cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; + cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, &rights, td); td->td_dupfd = -1; /* XXX check for fdopen */ @@ -1339,7 +1342,7 @@ } else { VATTR_NULL(&vattr); vattr.va_mode = (mode & ALLPERMS) & - ~td->td_proc->p_fd->fd_cmask; + ~td->td_proc->p_pd->pd_cmask; vattr.va_rdev = dev; whiteout = 0; @@ -1452,7 +1455,7 @@ } VATTR_NULL(&vattr); vattr.va_type = VFIFO; - vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; + vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); @@ -1722,7 +1725,7 @@ goto restart; } VATTR_NULL(&vattr); - vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; #ifdef MAC vattr.va_type = VLNK; error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, @@ -3785,7 +3788,7 @@ } VATTR_NULL(&vattr); vattr.va_type = VDIR; - vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; #ifdef MAC error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, &vattr); @@ -4185,13 +4188,13 @@ int sys_umask(struct thread *td, struct umask_args *uap) { - struct filedesc *fdp; + struct pwddesc *pdp; - fdp = td->td_proc->p_fd; - FILEDESC_XLOCK(fdp); - td->td_retval[0] = fdp->fd_cmask; - fdp->fd_cmask = uap->newmask & ALLPERMS; - FILEDESC_XUNLOCK(fdp); + pdp = td->td_proc->p_pd; + PWDDESC_XLOCK(pdp); + td->td_retval[0] = pdp->pd_cmask; + pdp->pd_cmask = uap->newmask & ALLPERMS; + PWDDESC_XUNLOCK(pdp); return (0); } Index: sys/sys/filedesc.h =================================================================== --- sys/sys/filedesc.h +++ sys/sys/filedesc.h @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -92,12 +93,17 @@ }; typedef SMR_POINTER(struct pwd *) smrpwd_t; +struct pwddesc { + struct mtx pd_lock; /* protects members of this struct */ + smrpwd_t pd_pwd; /* directories */ + volatile u_int pd_refcount; + u_short pd_cmask; /* mask for file creation */ +}; + struct filedesc { struct fdescenttbl *fd_files; /* open files table */ - smrpwd_t fd_pwd; /* directories */ NDSLOTTYPE *fd_map; /* bitmap of free fds */ int fd_freefile; /* approx. next free file */ - u_short fd_cmask; /* mask for file creation */ int fd_refcnt; /* thread reference count */ int fd_holdcnt; /* hold count on structure + mutex */ struct sx fd_sx; /* protects members of this struct */ @@ -134,6 +140,28 @@ #ifdef _KERNEL +/* Lock a paths descriptor table. */ +#define PWDDESC_LOCK(pdp) (&(pdp)->pd_lock) +#define PWDDESC_LOCK_INIT(pdp) \ + mtx_init(PWDDESC_LOCK(pdp), "pwddesc", NULL, MTX_DEF) +#define PWDDESC_LOCK_DESTROY(pdp) mtx_destroy(PWDDESC_LOCK(pdp)) +#define PWDDESC_XLOCK(pdp) mtx_lock(PWDDESC_LOCK(pdp)) +#define PWDDESC_XUNLOCK(pdp) mtx_unlock(PWDDESC_LOCK(pdp)) +#define PWDDESC_LOCK_ASSERT(pdp, what) \ + mtx_assert(PWDDESC_LOCK(pdp), (what)) +#define PWDDESC_ASSERT_XLOCKED(pdp) \ + PWDDESC_LOCK_ASSERT((pdp), MA_OWNED) +#define PWDDESC_ASSERT_UNLOCKED(pdp) \ + PWDDESC_LOCK_ASSERT((pdp), MA_NOTOWNED) + +#define PWDDESC_XLOCKED_LOAD_PWD(pdp) ({ \ + struct pwddesc *_pdp = (pdp); \ + struct pwd *_pwd; \ + _pwd = smr_serialized_load(&(_pdp)->pd_pwd, \ + (PWDDESC_ASSERT_XLOCKED(_pdp), true)); \ + _pwd; \ +}) + /* Lock a file descriptor table. */ #define FILEDESC_LOCK_INIT(fdp) sx_init(&(fdp)->fd_sx, "filedesc structure") #define FILEDESC_LOCK_DESTROY(fdp) sx_destroy(&(fdp)->fd_sx) @@ -149,31 +177,15 @@ SX_NOTRECURSED) #define FILEDESC_UNLOCK_ASSERT(fdp) sx_assert(&(fdp)->fd_sx, SX_UNLOCKED) -#define FILEDESC_LOCKED_LOAD_PWD(fdp) ({ \ - struct filedesc *_fdp = (fdp); \ - struct pwd *_pwd; \ - _pwd = smr_serialized_load(&(_fdp)->fd_pwd, \ - (FILEDESC_LOCK_ASSERT(_fdp), true)); \ - _pwd; \ -}) - -#define FILEDESC_XLOCKED_LOAD_PWD(fdp) ({ \ - struct filedesc *_fdp = (fdp); \ - struct pwd *_pwd; \ - _pwd = smr_serialized_load(&(_fdp)->fd_pwd, \ - (FILEDESC_XLOCK_ASSERT(_fdp), true)); \ - _pwd; \ -}) - #else /* * Accessor for libkvm et al. */ -#define FILEDESC_KVM_LOAD_PWD(fdp) ({ \ - struct filedesc *_fdp = (fdp); \ +#define PWDDESC_KVM_LOAD_PWD(pdp) ({ \ + struct pwddesc *_pdp = (pdp); \ struct pwd *_pwd; \ - _pwd = smr_kvm_load(&(_fdp)->fd_pwd); \ + _pwd = smr_kvm_load(&(_pdp)->pd_pwd); \ _pwd; \ }) @@ -296,21 +308,26 @@ #endif /* cdir/rdir/jdir manipulation functions. */ +struct pwddesc *pdcopy(struct pwddesc *pdp); +void pdescfree(struct thread *td); +struct pwddesc *pdinit(struct pwddesc *pdp, bool keeplock); +struct pwddesc *pdshare(struct pwddesc *pdp); +void pdunshare(struct thread *td); + void pwd_chdir(struct thread *td, struct vnode *vp); int pwd_chroot(struct thread *td, struct vnode *vp); void pwd_ensure_dirs(void); void pwd_set_rootvnode(void); -struct pwd *pwd_hold_filedesc(struct filedesc *fdp); +struct pwd *pwd_hold_pwddesc(struct pwddesc *pdp); bool pwd_hold_smr(struct pwd *pwd); struct pwd *pwd_hold(struct thread *td); void pwd_drop(struct pwd *pwd); static inline void -pwd_set(struct filedesc *fdp, struct pwd *newpwd) +pwd_set(struct pwddesc *pdp, struct pwd *newpwd) { - - smr_serialized_store(&fdp->fd_pwd, newpwd, - (FILEDESC_XLOCK_ASSERT(fdp), true)); + smr_serialized_store(&pdp->pd_pwd, newpwd, + (PWDDESC_ASSERT_XLOCKED(pdp), true)); } struct pwd *pwd_get_smr(void); Index: sys/sys/proc.h =================================================================== --- sys/sys/proc.h +++ sys/sys/proc.h @@ -593,6 +593,7 @@ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Open files. */ struct filedesc_to_leader *p_fdtol; /* (b) Tracking node */ + struct pwddesc *p_pd; /* (b) Cwd, chroot, jail, umask */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Resource limits. */ struct callout p_limco; /* (c) Limit callout handle */ Index: sys/sys/user.h =================================================================== --- sys/sys/user.h +++ sys/sys/user.h @@ -88,7 +88,7 @@ */ #define KI_NSPARE_INT 2 #define KI_NSPARE_LONG 12 -#define KI_NSPARE_PTR 6 +#define KI_NSPARE_PTR 5 #ifndef _KERNEL #ifndef KINFO_PROC_SIZE @@ -213,6 +213,7 @@ * front of ki_spareptrs, and longs from the end of ki_sparelongs. * That way the spare room from both arrays will remain contiguous. */ + struct pwddesc *ki_pd; /* pointer to process paths info */ void *ki_spareptrs[KI_NSPARE_PTR]; /* spare room for growth */ long ki_sparelongs[KI_NSPARE_LONG]; /* spare room for growth */ long ki_sflag; /* PS_* flags */