diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -282,6 +282,8 @@ vnode_t, struct nfsexstuff *); int nfsrvd_allocate(struct nfsrv_descript *, int, vnode_t, struct nfsexstuff *); +int nfsrvd_deallocate(struct nfsrv_descript *, int, + vnode_t, struct nfsexstuff *); int nfsrvd_copy_file_range(struct nfsrv_descript *, int, vnode_t, vnode_t, struct nfsexstuff *, struct nfsexstuff *); int nfsrvd_seek(struct nfsrv_descript *, int, @@ -752,6 +754,8 @@ int nfsvno_seek(struct nfsrv_descript *, struct vnode *, u_long, off_t *, int, bool *, struct ucred *, NFSPROC_T *); int nfsvno_allocate(struct vnode *, off_t, off_t, struct ucred *, NFSPROC_T *); +int nfsvno_deallocate(struct vnode *, off_t, off_t, struct ucred *, + NFSPROC_T *); int nfsvno_getxattr(struct vnode *, char *, uint32_t, struct ucred *, uint64_t, int, struct thread *, struct mbuf **, struct mbuf **, int *); int nfsvno_setxattr(struct vnode *, char *, int, struct mbuf *, char *, diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c --- a/sys/fs/nfsserver/nfs_nfsdport.c +++ b/sys/fs/nfsserver/nfs_nfsdport.c @@ -132,6 +132,8 @@ char *, int *); static int nfsrv_allocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); +static int nfsrv_deallocatedsrpc(fhandle_t *, off_t, off_t, struct ucred *, + NFSPROC_T *, struct vnode *, struct nfsmount **, int, int *); static int nfsrv_setacldsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, struct vnode *, struct nfsmount **, int, struct acl *, int *); static int nfsrv_setattrdsrpc(fhandle_t *, struct ucred *, NFSPROC_T *, @@ -4898,6 +4900,9 @@ } else if (ioproc == NFSPROC_ALLOCATE) error = nfsrv_allocatedsrpc(fh, off, *offp, cred, p, vp, &nmp[0], mirrorcnt, &failpos); + else if (ioproc == NFSPROC_DEALLOCATE) + error = nfsrv_deallocatedsrpc(fh, off, *offp, cred, p, + vp, &nmp[0], mirrorcnt, &failpos); else { error = nfsrv_getattrdsrpc(&fh[mirrorcnt - 1], cred, p, vp, nmp[mirrorcnt - 1], nap); @@ -5679,6 +5684,166 @@ return (error); } +/* + * Do a deallocate RPC on a DS data file, using this structure for the + * arguments, so that this function can be executed by a separate kernel + * process. + */ +struct nfsrvdeallocatedsdorpc { + int done; + int inprog; + struct task tsk; + fhandle_t fh; + off_t off; + off_t len; + struct nfsmount *nmp; + struct ucred *cred; + NFSPROC_T *p; + int err; +}; + +static int +nfsrv_deallocatedsdorpc(struct nfsmount *nmp, fhandle_t *fhp, off_t off, + off_t len, struct nfsvattr *nap, struct ucred *cred, NFSPROC_T *p) +{ + uint32_t *tl; + struct nfsrv_descript *nd; + nfsattrbit_t attrbits; + nfsv4stateid_t st; + int error; + + nd = malloc(sizeof(*nd), M_TEMP, M_WAITOK | M_ZERO); + nfscl_reqstart(nd, NFSPROC_DEALLOCATE, nmp, (u_int8_t *)fhp, + sizeof(fhandle_t), NULL, NULL, 0, 0); + + /* + * Use a stateid where other is an alternating 01010 pattern and + * seqid is 0xffffffff. This value is not defined as special by + * the RFC and is used by the FreeBSD NFS server to indicate an + * MDS->DS proxy operation. + */ + st.other[0] = 0x55555555; + st.other[1] = 0x55555555; + st.other[2] = 0x55555555; + st.seqid = 0xffffffff; + nfsm_stateidtom(nd, &st, NFSSTATEID_PUTSTATEID); + NFSM_BUILD(tl, uint32_t *, 2 * NFSX_HYPER + NFSX_UNSIGNED); + txdr_hyper(off, tl); tl += 2; + txdr_hyper(len, tl); tl += 2; + NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: len=%jd\n", (intmax_t)len); + + *tl = txdr_unsigned(NFSV4OP_GETATTR); + NFSGETATTR_ATTRBIT(&attrbits); + nfsrv_putattrbit(nd, &attrbits); + error = newnfs_request(nd, nmp, NULL, &nmp->nm_sockreq, NULL, p, + cred, NFS_PROG, NFS_VER4, NULL, 1, NULL, NULL); + if (error != 0) { + free(nd, M_TEMP); + return (error); + } + NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft allocaterpc=%d\n", + nd->nd_repstat); + if (nd->nd_repstat == 0) { + NFSM_DISSECT(tl, uint32_t *, 2 * NFSX_UNSIGNED); + error = nfsv4_loadattr(nd, NULL, nap, NULL, NULL, 0, NULL, NULL, + NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL); + } else + error = nd->nd_repstat; + NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc: aft loadattr=%d\n", error); +nfsmout: + m_freem(nd->nd_mrep); + free(nd, M_TEMP); + NFSD_DEBUG(4, "nfsrv_deallocatedsdorpc error=%d\n", error); + return (error); +} + +/* + * Start up the thread that will execute nfsrv_deallocatedsdorpc(). + */ +static void +start_deallocatedsdorpc(void *arg, int pending) +{ + struct nfsrvdeallocatedsdorpc *drpc; + + drpc = (struct nfsrvdeallocatedsdorpc *)arg; + drpc->err = nfsrv_deallocatedsdorpc(drpc->nmp, &drpc->fh, drpc->off, + drpc->len, NULL, drpc->cred, drpc->p); + drpc->done = 1; + NFSD_DEBUG(4, "start_deallocatedsdorpc: err=%d\n", drpc->err); +} + +static int +nfsrv_deallocatedsrpc(fhandle_t *fhp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p, struct vnode *vp, struct nfsmount **nmpp, int mirrorcnt, + int *failposp) +{ + struct nfsrvdeallocatedsdorpc *drpc, *tdrpc = NULL; + struct nfsvattr na; + int error, i, ret, timo; + + NFSD_DEBUG(4, "in nfsrv_deallocatedsrpc\n"); + drpc = NULL; + if (mirrorcnt > 1) + tdrpc = drpc = malloc(sizeof(*drpc) * (mirrorcnt - 1), M_TEMP, + M_WAITOK); + + /* + * Do the deallocate RPC for every DS, using a separate kernel process + * for every DS except the last one. + */ + error = 0; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + tdrpc->done = 0; + NFSBCOPY(fhp, &tdrpc->fh, sizeof(*fhp)); + tdrpc->off = off; + tdrpc->len = len; + tdrpc->nmp = *nmpp; + tdrpc->cred = cred; + tdrpc->p = p; + tdrpc->inprog = 0; + tdrpc->err = 0; + ret = EIO; + if (nfs_pnfsiothreads != 0) { + ret = nfs_pnfsio(start_deallocatedsdorpc, tdrpc); + NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: nfs_pnfsio=%d\n", + ret); + } + if (ret != 0) { + ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, + NULL, cred, p); + if (nfsds_failerr(ret) && *failposp == -1) + *failposp = i; + else if (error == 0 && ret != 0) + error = ret; + } + nmpp++; + fhp++; + } + ret = nfsrv_deallocatedsdorpc(*nmpp, fhp, off, len, &na, cred, p); + if (nfsds_failerr(ret) && *failposp == -1 && mirrorcnt > 1) + *failposp = mirrorcnt - 1; + else if (error == 0 && ret != 0) + error = ret; + if (error == 0) + error = nfsrv_setextattr(vp, &na, p); + NFSD_DEBUG(4, "nfsrv_deallocatedsrpc: aft setextat=%d\n", error); + tdrpc = drpc; + timo = hz / 50; /* Wait for 20msec. */ + if (timo < 1) + timo = 1; + for (i = 0; i < mirrorcnt - 1; i++, tdrpc++) { + /* Wait for RPCs on separate threads to complete. */ + while (tdrpc->inprog != 0 && tdrpc->done == 0) + tsleep(&tdrpc->tsk, PVFS, "srvalds", timo); + if (nfsds_failerr(tdrpc->err) && *failposp == -1) + *failposp = i; + else if (error == 0 && tdrpc->err != 0) + error = tdrpc->err; + } + free(drpc, M_TEMP); + return (error); +} + static int nfsrv_setattrdsdorpc(fhandle_t *fhp, struct ucred *cred, NFSPROC_T *p, struct vnode *vp, struct nfsmount *nmp, struct nfsvattr *nap, @@ -6425,6 +6590,42 @@ return (error); } +/* + * Deallocate vnode op call. + */ +int +nfsvno_deallocate(struct vnode *vp, off_t off, off_t len, struct ucred *cred, + NFSPROC_T *p) +{ + int error; + off_t olen; + + ASSERT_VOP_ELOCKED(vp, "nfsvno_deallocate vp"); + /* + * Attempt to deallocate on a DS file. A return of ENOENT implies + * there is no DS file to deallocate on. + */ + error = nfsrv_proxyds(vp, off, 0, cred, p, NFSPROC_DEALLOCATE, NULL, + NULL, NULL, NULL, NULL, &len, 0, NULL); + if (error != ENOENT) + return (error); + + /* + * Do the actual VOP_DEALLOCATE(), looping so long as + * progress is being made, to achieve completion. + */ + do { + olen = len; + error = VOP_DEALLOCATE(vp, &off, &len, 0, IO_SYNC, cred); + if (error == 0 && len > 0 && olen > len) + maybe_yield(); + } while (error == 0 && len > 0 && olen > len); + if (error == 0 && len > 0) + error = NFSERR_IO; + NFSEXITCODE(error); + return (error); +} + /* * Get Extended Atribute vnode op into an mbuf list. */ diff --git a/sys/fs/nfsserver/nfs_nfsdserv.c b/sys/fs/nfsserver/nfs_nfsdserv.c --- a/sys/fs/nfsserver/nfs_nfsdserv.c +++ b/sys/fs/nfsserver/nfs_nfsdserv.c @@ -5389,6 +5389,111 @@ return (error); } +/* + * nfs deallocate service + */ +int +nfsrvd_deallocate(struct nfsrv_descript *nd, __unused int isdgram, + vnode_t vp, struct nfsexstuff *exp) +{ + uint32_t *tl; + struct nfsvattr forat; + int error = 0, forat_ret = 1, gotproxystateid; + off_t off, len; + struct nfsstate st, *stp = &st; + struct nfslock lo, *lop = &lo; + nfsv4stateid_t stateid; + nfsquad_t clientid; + nfsattrbit_t attrbits; + + gotproxystateid = 0; + NFSM_DISSECT(tl, uint32_t *, NFSX_STATEID + 2 * NFSX_HYPER); + stp->ls_flags = (NFSLCK_CHECK | NFSLCK_WRITEACCESS); + lop->lo_flags = NFSLCK_WRITE; + stp->ls_ownerlen = 0; + stp->ls_op = NULL; + stp->ls_uid = nd->nd_cred->cr_uid; + stp->ls_stateid.seqid = fxdr_unsigned(u_int32_t, *tl++); + clientid.lval[0] = stp->ls_stateid.other[0] = *tl++; + clientid.lval[1] = stp->ls_stateid.other[1] = *tl++; + if ((nd->nd_flag & ND_IMPLIEDCLID) != 0) { + if ((nd->nd_flag & ND_NFSV41) != 0) + clientid.qval = nd->nd_clientid.qval; + else if (nd->nd_clientid.qval != clientid.qval) + printf("EEK2 multiple clids\n"); + } else { + if ((nd->nd_flag & ND_NFSV41) != 0) + printf("EEK! no clientid from session\n"); + nd->nd_flag |= ND_IMPLIEDCLID; + nd->nd_clientid.qval = clientid.qval; + } + stp->ls_stateid.other[2] = *tl++; + /* + * Don't allow this to be done for a DS. + */ + if ((nd->nd_flag & ND_DSSERVER) != 0) + nd->nd_repstat = NFSERR_NOTSUPP; + /* However, allow the proxy stateid. */ + if (stp->ls_stateid.seqid == 0xffffffff && + stp->ls_stateid.other[0] == 0x55555555 && + stp->ls_stateid.other[1] == 0x55555555 && + stp->ls_stateid.other[2] == 0x55555555) + gotproxystateid = 1; + off = fxdr_hyper(tl); tl += 2; + lop->lo_first = off; + len = fxdr_hyper(tl); + if (len < 0) + len = OFF_MAX; + NFSD_DEBUG(4, "dealloc: off=%jd len=%jd\n", (intmax_t)off, + (intmax_t)len); + lop->lo_end = lop->lo_first + len; + /* + * Sanity check the offset and length. + * off and len are off_t (signed int64_t) whereas + * lo_first and lo_end are uint64_t and, as such, + * if off >= 0 && len > 0, lo_end cannot overflow + * unless off_t is changed to something other than + * int64_t. Check lo_end < lo_first in case that + * is someday the case. + * The error to return is not specified by RFC 7862 so I + * made this compatible with the Linux knfsd. + */ + if (nd->nd_repstat == 0) { + if (off < 0 || lop->lo_end > NFSRV_MAXFILESIZE) + nd->nd_repstat = NFSERR_FBIG; + else if (len == 0 || lop->lo_end < lop->lo_first) + nd->nd_repstat = NFSERR_INVAL; + } + + if (nd->nd_repstat == 0 && vnode_vtype(vp) != VREG) + nd->nd_repstat = NFSERR_WRONGTYPE; + NFSZERO_ATTRBIT(&attrbits); + NFSSETBIT_ATTRBIT(&attrbits, NFSATTRBIT_OWNER); + forat_ret = nfsvno_getattr(vp, &forat, nd, curthread, 1, &attrbits); + if (nd->nd_repstat == 0) + nd->nd_repstat = forat_ret; + if (nd->nd_repstat == 0 && (forat.na_uid != nd->nd_cred->cr_uid || + NFSVNO_EXSTRICTACCESS(exp))) + nd->nd_repstat = nfsvno_accchk(vp, VWRITE, nd->nd_cred, exp, + curthread, NFSACCCHK_ALLOWOWNER, NFSACCCHK_VPISLOCKED, + NULL); + if (nd->nd_repstat == 0 && gotproxystateid == 0) + nd->nd_repstat = nfsrv_lockctrl(vp, &stp, &lop, NULL, clientid, + &stateid, exp, nd, curthread); + + if (nd->nd_repstat == 0) + nd->nd_repstat = nfsvno_deallocate(vp, off, len, nd->nd_cred, + curthread); + vput(vp); + NFSD_DEBUG(4, "eo deallocate=%d\n", nd->nd_repstat); + NFSEXITCODE2(0, nd); + return (0); +nfsmout: + vput(vp); + NFSEXITCODE2(error, nd); + return (error); +} + /* * nfs copy service */ diff --git a/sys/fs/nfsserver/nfs_nfsdsocket.c b/sys/fs/nfsserver/nfs_nfsdsocket.c --- a/sys/fs/nfsserver/nfs_nfsdsocket.c +++ b/sys/fs/nfsserver/nfs_nfsdsocket.c @@ -198,7 +198,7 @@ nfsrvd_allocate, (int (*)(struct nfsrv_descript *, int, vnode_t , struct nfsexstuff *))0, nfsrvd_notsupp, - nfsrvd_notsupp, + nfsrvd_deallocate, nfsrvd_ioadvise, nfsrvd_layouterror, nfsrvd_layoutstats,