diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 41f456d67e5b..f2d66cf74b2b 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -1,4176 +1,4176 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_ffclock.h" #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ktrace.h" #define __ELF_WORD_SIZE 32 #ifdef COMPAT_FREEBSD11 #define _WANT_FREEBSD11_KEVENT #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #ifdef INET #include #endif #include #include #include #include #include #include #include #include #ifdef __amd64__ #include #endif #include #include #include #include #include #include #include int compat_freebsd_32bit = 1; static void register_compat32_feature(void *arg) { if (!compat_freebsd_32bit) return; FEATURE_ADD("compat_freebsd32", "Compatible with 32-bit FreeBSD"); FEATURE_ADD("compat_freebsd_32bit", "Compatible with 32-bit FreeBSD (legacy feature name)"); } SYSINIT(freebsd32, SI_SUB_EXEC, SI_ORDER_ANY, register_compat32_feature, NULL); struct ptrace_io_desc32 { int piod_op; uint32_t piod_offs; uint32_t piod_addr; uint32_t piod_len; }; struct ptrace_vm_entry32 { int pve_entry; int pve_timestamp; uint32_t pve_start; uint32_t pve_end; uint32_t pve_offset; u_int pve_prot; u_int pve_pathlen; int32_t pve_fileid; u_int pve_fsid; uint32_t pve_path; }; #ifdef __amd64__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); CTASSERT(sizeof(struct bintime32) == 12); #else CTASSERT(sizeof(struct timeval32) == 16); CTASSERT(sizeof(struct timespec32) == 16); CTASSERT(sizeof(struct itimerval32) == 32); CTASSERT(sizeof(struct bintime32) == 16); #endif CTASSERT(sizeof(struct ostatfs32) == 256); #ifdef __amd64__ CTASSERT(sizeof(struct rusage32) == 72); #else CTASSERT(sizeof(struct rusage32) == 88); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); #ifdef __amd64__ CTASSERT(sizeof(struct kevent32) == 56); #else CTASSERT(sizeof(struct kevent32) == 64); #endif CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifdef __amd64__ CTASSERT(sizeof(struct stat32) == 208); CTASSERT(sizeof(struct freebsd11_stat32) == 96); #else CTASSERT(sizeof(struct stat32) == 224); CTASSERT(sizeof(struct freebsd11_stat32) == 120); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp); void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct __wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct ostatfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_OMNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_OMNAMELEN)); } #endif int freebsd32_getfsstat(struct thread *td, struct freebsd32_getfsstat_args *uap) { size_t count; int error; if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) return (EINVAL); error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, UIO_USERSPACE, uap->mode); if (error == 0) td->td_retval[0] = count; return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct ostatfs32 stat32; size_t count, size, copycount; int error; count = uap->bufsize / sizeof(struct ostatfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, uap->mode); if (size > 0) { sp = buf; copycount = count; while (copycount > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; copycount--; } free(buf, M_STATFS); } if (error == 0) td->td_retval[0] = count; return (error); } #endif #ifdef COMPAT_FREEBSD11 int freebsd11_freebsd32_getfsstat(struct thread *td, struct freebsd11_freebsd32_getfsstat_args *uap) { return(kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, const char *fname, enum uio_seg segflg, uint32_t *argv, uint32_t *envv) { char *argp, *envp; uint32_t *p32, arg; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ error = exec_args_add_fname(args, fname, segflg); if (error != 0) goto err_exit; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = exec_args_add_arg(args, argp, UIO_USERSPACE); if (error != 0) goto err_exit; } /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = exec_args_add_env(args, envp, UIO_USERSPACE); if (error != 0) goto err_exit; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL, oldvmspace); post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; struct vmspace *oldvmspace; int error; error = pre_execve(td, &oldvmspace); if (error != 0) return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL, oldvmspace); } post_execve(td, error, oldvmspace); AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); return (error); } int freebsd32_mknodat(struct thread *td, struct freebsd32_mknodat_args *uap) { return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, PAIR32TO64(dev_t, uap->dev))); } int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ) != 0) prot |= PROT_EXEC; #endif return (kern_mprotect(td, (uintptr_t)PTRIN(uap->addr), uap->len, prot, 0)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = PAIR32TO64(off_t, uap->pos), })); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { int prot; prot = uap->prot; #if defined(__amd64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif return (kern_mmap(td, &(struct mmap_req){ .mr_hint = (uintptr_t)uap->addr, .mr_len = uap->len, .mr_prot = prot, .mr_flags = uap->flags, .mr_fd = uap->fd, .mr_pos = PAIR32TO64(off_t, uap->pos), })); } #endif #ifdef COMPAT_43 int ofreebsd32_mmap(struct thread *td, struct ofreebsd32_mmap_args *uap) { return (kern_ommap(td, (uintptr_t)uap->addr, uap->len, uap->prot, uap->flags, uap->fd, uap->pos)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; uint64_t e; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); #if BYTE_ORDER == LITTLE_ENDIAN ks32[i].data1 = kevp[i].data; ks32[i].data2 = kevp[i].data >> 32; #else ks32[i].data1 = kevp[i].data >> 32; ks32[i].data2 = kevp[i].data; #endif PTROUT_CP(kevp[i], ks32[i], udata); for (j = 0; j < nitems(kevp->ext); j++) { e = kevp[i].ext[j]; #if BYTE_ORDER == LITTLE_ENDIAN ks32[i].ext64[2 * j] = e; ks32[i].ext64[2 * j + 1] = e >> 32; #else ks32[i].ext64[2 * j] = e >> 32; ks32[i].ext64[2 * j + 1] = e; #endif } } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; uint64_t e; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); kevp[i].data = PAIR32TO64(uint64_t, ks32[i].data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) { #if BYTE_ORDER == LITTLE_ENDIAN e = ks32[i].ext64[2 * j + 1]; e <<= 32; e += ks32[i].ext64[2 * j]; #else e = ks32[i].ext64[2 * j]; e <<= 32; e += ks32[i].ext64[2 * j + 1]; #endif kevp[i].ext[j] = e; } } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent_copyout, .k_copyin = freebsd32_kevent_copyin, }; #ifdef KTRACE struct kevent32 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct kevent32)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("kevent32", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct kevent32)); #endif return (error); } #ifdef COMPAT_FREEBSD11 static int freebsd32_kevent11_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct freebsd11_kevent32 ks32[KQ_NEVENTS]; int i, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent11_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd11_freebsd32_kevent_args *uap; struct freebsd11_kevent32 ks32[KQ_NEVENTS]; int i, j, error; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd11_freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); for (j = 0; j < nitems(kevp->ext); j++) kevp[i].ext[j] = 0; } done: return (error); } int freebsd11_freebsd32_kevent(struct thread *td, struct freebsd11_freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { .arg = uap, .k_copyout = freebsd32_kevent11_copyout, .k_copyin = freebsd32_kevent11_copyin, }; #ifdef KTRACE struct freebsd11_kevent32 *eventlist = uap->eventlist; #endif int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; #ifdef KTRACE if (KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("freebsd11_kevent32", UIO_USERSPACE, uap->changelist, uap->nchanges, sizeof(struct freebsd11_kevent32)); #endif error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); #ifdef KTRACE if (error == 0 && KTRPOINT(td, KTR_STRUCT_ARRAY)) ktrstructarray("freebsd11_kevent32", UIO_USERSPACE, eventlist, td->td_retval[0], sizeof(struct freebsd11_kevent32)); #endif return (error); } #endif int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = 0; rtz.tz_dsttime = 0; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error == 0) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static void ptrace_lwpinfo_to32(const struct ptrace_lwpinfo *pl, struct ptrace_lwpinfo32 *pl32) { bzero(pl32, sizeof(*pl32)); pl32->pl_lwpid = pl->pl_lwpid; pl32->pl_event = pl->pl_event; pl32->pl_flags = pl->pl_flags; pl32->pl_sigmask = pl->pl_sigmask; pl32->pl_siglist = pl->pl_siglist; siginfo_to_siginfo32(&pl->pl_siginfo, &pl32->pl_siginfo); strcpy(pl32->pl_tdname, pl->pl_tdname); pl32->pl_child_pid = pl->pl_child_pid; pl32->pl_syscall_code = pl->pl_syscall_code; pl32->pl_syscall_narg = pl->pl_syscall_narg; } static void ptrace_sc_ret_to32(const struct ptrace_sc_ret *psr, struct ptrace_sc_ret32 *psr32) { bzero(psr32, sizeof(*psr32)); psr32->sr_retval[0] = psr->sr_retval[0]; psr32->sr_retval[1] = psr->sr_retval[1]; psr32->sr_error = psr->sr_error; } int freebsd32_ptrace(struct thread *td, struct freebsd32_ptrace_args *uap) { union { struct ptrace_io_desc piod; struct ptrace_lwpinfo pl; struct ptrace_vm_entry pve; struct ptrace_coredump pc; struct ptrace_sc_remote sr; struct dbreg32 dbreg; struct fpreg32 fpreg; struct reg32 reg; struct iovec vec; register_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret psr; int ptevents; } r; union { struct ptrace_io_desc32 piod; struct ptrace_lwpinfo32 pl; struct ptrace_vm_entry32 pve; struct ptrace_coredump32 pc; struct ptrace_sc_remote32 sr; uint32_t args[nitems(td->td_sa.args)]; struct ptrace_sc_ret32 psr; struct iovec32 vec; } r32; syscallarg_t pscr_args[nitems(td->td_sa.args)]; u_int pscr_args32[nitems(td->td_sa.args)]; void *addr; int data, error, i; if (!allow_ptrace) return (ENOSYS); error = 0; AUDIT_ARG_PID(uap->pid); AUDIT_ARG_CMD(uap->req); AUDIT_ARG_VALUE(uap->data); addr = &r; data = uap->data; switch (uap->req) { case PT_GET_EVENT_MASK: case PT_GET_SC_ARGS: case PT_GET_SC_RET: break; case PT_LWPINFO: if (uap->data > sizeof(r32.pl)) return (EINVAL); /* * Pass size of native structure in 'data'. Truncate * if necessary to avoid siginfo. */ data = sizeof(r.pl); if (uap->data < offsetof(struct ptrace_lwpinfo32, pl_siginfo) + sizeof(struct siginfo32)) data = offsetof(struct ptrace_lwpinfo, pl_siginfo); break; case PT_GETREGS: bzero(&r.reg, sizeof(r.reg)); break; case PT_GETFPREGS: bzero(&r.fpreg, sizeof(r.fpreg)); break; case PT_GETDBREGS: bzero(&r.dbreg, sizeof(r.dbreg)); break; case PT_SETREGS: error = copyin(uap->addr, &r.reg, sizeof(r.reg)); break; case PT_SETFPREGS: error = copyin(uap->addr, &r.fpreg, sizeof(r.fpreg)); break; case PT_SETDBREGS: error = copyin(uap->addr, &r.dbreg, sizeof(r.dbreg)); break; case PT_GETREGSET: case PT_SETREGSET: error = copyin(uap->addr, &r32.vec, sizeof(r32.vec)); if (error != 0) break; r.vec.iov_len = r32.vec.iov_len; r.vec.iov_base = PTRIN(r32.vec.iov_base); break; case PT_SET_EVENT_MASK: if (uap->data != sizeof(r.ptevents)) error = EINVAL; else error = copyin(uap->addr, &r.ptevents, uap->data); break; case PT_IO: error = copyin(uap->addr, &r32.piod, sizeof(r32.piod)); if (error) break; CP(r32.piod, r.piod, piod_op); PTRIN_CP(r32.piod, r.piod, piod_offs); PTRIN_CP(r32.piod, r.piod, piod_addr); CP(r32.piod, r.piod, piod_len); break; case PT_VM_ENTRY: error = copyin(uap->addr, &r32.pve, sizeof(r32.pve)); if (error) break; CP(r32.pve, r.pve, pve_entry); CP(r32.pve, r.pve, pve_timestamp); CP(r32.pve, r.pve, pve_start); CP(r32.pve, r.pve, pve_end); CP(r32.pve, r.pve, pve_offset); CP(r32.pve, r.pve, pve_prot); CP(r32.pve, r.pve, pve_pathlen); CP(r32.pve, r.pve, pve_fileid); CP(r32.pve, r.pve, pve_fsid); PTRIN_CP(r32.pve, r.pve, pve_path); break; case PT_COREDUMP: if (uap->data != sizeof(r32.pc)) error = EINVAL; else error = copyin(uap->addr, &r32.pc, uap->data); CP(r32.pc, r.pc, pc_fd); CP(r32.pc, r.pc, pc_flags); r.pc.pc_limit = PAIR32TO64(off_t, r32.pc.pc_limit); data = sizeof(r.pc); break; case PT_SC_REMOTE: if (uap->data != sizeof(r32.sr)) { error = EINVAL; break; } error = copyin(uap->addr, &r32.sr, uap->data); if (error != 0) break; CP(r32.sr, r.sr, pscr_syscall); CP(r32.sr, r.sr, pscr_nargs); if (r.sr.pscr_nargs > nitems(td->td_sa.args)) { error = EINVAL; break; } error = copyin(PTRIN(r32.sr.pscr_args), pscr_args32, sizeof(u_int) * r32.sr.pscr_nargs); if (error != 0) break; for (i = 0; i < r32.sr.pscr_nargs; i++) pscr_args[i] = pscr_args32[i]; r.sr.pscr_args = pscr_args; break; default: addr = uap->addr; break; } if (error) return (error); error = kern_ptrace(td, uap->req, uap->pid, addr, data); if (error) return (error); switch (uap->req) { case PT_VM_ENTRY: CP(r.pve, r32.pve, pve_entry); CP(r.pve, r32.pve, pve_timestamp); CP(r.pve, r32.pve, pve_start); CP(r.pve, r32.pve, pve_end); CP(r.pve, r32.pve, pve_offset); CP(r.pve, r32.pve, pve_prot); CP(r.pve, r32.pve, pve_pathlen); CP(r.pve, r32.pve, pve_fileid); CP(r.pve, r32.pve, pve_fsid); error = copyout(&r32.pve, uap->addr, sizeof(r32.pve)); break; case PT_IO: CP(r.piod, r32.piod, piod_len); error = copyout(&r32.piod, uap->addr, sizeof(r32.piod)); break; case PT_GETREGS: error = copyout(&r.reg, uap->addr, sizeof(r.reg)); break; case PT_GETFPREGS: error = copyout(&r.fpreg, uap->addr, sizeof(r.fpreg)); break; case PT_GETDBREGS: error = copyout(&r.dbreg, uap->addr, sizeof(r.dbreg)); break; case PT_GETREGSET: r32.vec.iov_len = r.vec.iov_len; error = copyout(&r32.vec, uap->addr, sizeof(r32.vec)); break; case PT_GET_EVENT_MASK: /* NB: The size in uap->data is validated in kern_ptrace(). */ error = copyout(&r.ptevents, uap->addr, uap->data); break; case PT_LWPINFO: ptrace_lwpinfo_to32(&r.pl, &r32.pl); error = copyout(&r32.pl, uap->addr, uap->data); break; case PT_GET_SC_ARGS: for (i = 0; i < nitems(r.args); i++) r32.args[i] = (uint32_t)r.args[i]; error = copyout(r32.args, uap->addr, MIN(uap->data, sizeof(r32.args))); break; case PT_GET_SC_RET: ptrace_sc_ret_to32(&r.psr, &r32.psr); error = copyout(&r32.psr, uap->addr, MIN(uap->data, sizeof(r32.psr))); break; case PT_SC_REMOTE: ptrace_sc_ret_to32(&r.sr.pscr_ret, &r32.sr.pscr_ret); error = copyout(&r32.sr.pscr_ret, uap->addr + offsetof(struct ptrace_sc_remote32, pscr_ret), sizeof(r32.psr)); break; } return (error); } int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(const struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static size_t freebsd32_cmsg_convert(const struct cmsghdr *cm, void *data, socklen_t datalen) { size_t copylen; union { struct timespec32 ts; struct timeval32 tv; struct bintime32 bt; } tmp32; union { struct timespec ts; struct timeval tv; struct bintime bt; } *in; in = data; copylen = 0; switch (cm->cmsg_level) { case SOL_SOCKET: switch (cm->cmsg_type) { case SCM_TIMESTAMP: TV_CP(*in, tmp32, tv); copylen = sizeof(tmp32.tv); break; case SCM_BINTIME: BT_CP(*in, tmp32, bt); copylen = sizeof(tmp32.bt); break; case SCM_REALTIME: case SCM_MONOTONIC: TS_CP(*in, tmp32, ts); copylen = sizeof(tmp32.ts); break; default: break; } default: break; } if (copylen == 0) return (datalen); KASSERT((datalen >= copylen), ("corrupted cmsghdr")); bcopy(&tmp32, data, copylen); return (copylen); } static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen, datalen_out, oldclen; int error; caddr_t ctlbuf; int len, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; msg->msg_controllen = 0; ctlbuf = msg->msg_control; for (m = control; m != NULL && len > 0; m = m->m_next) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; datalen_out = freebsd32_cmsg_convert(cm, data, datalen); /* * Copy out the message header. Preserve the native * message size in case we need to inspect the message * contents later. */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); goto exit; } oldclen = cm->cmsg_len; cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen_out; error = copyout(cm, ctlbuf, copylen); cm->cmsg_len = oldclen; if (error != 0) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); copylen = datalen_out; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); break; } /* Copy out the message data. */ error = copyout(data, ctlbuf, copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } msg->msg_controllen += FREEBSD32_CMSG_SPACE(datalen_out); } } if (len == 0 && m != NULL) { msg->msg_flags |= MSG_CTRUNC; m_dispose_extcontrolm(m); } exit: return (error); } int freebsd32_recvmsg(struct thread *td, struct freebsd32_recvmsg_args *uap) { struct msghdr msg; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) { if (error != 0) m_dispose_extcontrolm(control); m_freem(control); } return (error); } #ifdef COMPAT_43 int ofreebsd32_recvmsg(struct thread *td, struct ofreebsd32_recvmsg_args *uap) { return (ENOSYS); } #endif /* * Copy-in the array of control messages constructed using alignment * and padding suitable for a 32-bit environment and construct an * mbuf using alignment and padding suitable for a 64-bit kernel. * The alignment and padding are defined indirectly by CMSG_DATA(), * CMSG_SPACE() and CMSG_LEN(). */ static int freebsd32_copyin_control(struct mbuf **mp, caddr_t buf, u_int buflen) { struct cmsghdr *cm; struct mbuf *m; void *in, *in1, *md; u_int msglen, outlen; int error; /* Enforce the size limit of the native implementation. */ if (buflen > MCLBYTES) return (EINVAL); in = malloc(buflen, M_TEMP, M_WAITOK); error = copyin(buf, in, buflen); if (error != 0) goto out; /* * Make a pass over the input buffer to determine the amount of space * required for 64 bit-aligned copies of the control messages. */ in1 = in; outlen = 0; while (buflen > 0) { if (buflen < sizeof(*cm)) { error = EINVAL; break; } cm = (struct cmsghdr *)in1; if (cm->cmsg_len < FREEBSD32_ALIGN(sizeof(*cm)) || cm->cmsg_len > buflen) { error = EINVAL; break; } msglen = FREEBSD32_ALIGN(cm->cmsg_len); if (msglen < cm->cmsg_len) { error = EINVAL; break; } /* The native ABI permits the final padding to be omitted. */ if (msglen > buflen) msglen = buflen; buflen -= msglen; in1 = (char *)in1 + msglen; outlen += CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen - FREEBSD32_ALIGN(sizeof(*cm))); } if (error != 0) goto out; /* * Allocate up to MJUMPAGESIZE space for the re-aligned and * re-padded control messages. This allows a full MCLBYTES of * 32-bit sized and aligned messages to fit and avoids an ABI * mismatch with the native implementation. */ m = m_get2(outlen, M_WAITOK, MT_CONTROL, 0); if (m == NULL) { error = EINVAL; goto out; } m->m_len = outlen; md = mtod(m, void *); /* * Make a second pass over input messages, copying them into the output * buffer. */ in1 = in; while (outlen > 0) { /* Copy the message header and align the length field. */ cm = md; memcpy(cm, in1, sizeof(*cm)); msglen = cm->cmsg_len - FREEBSD32_ALIGN(sizeof(*cm)); cm->cmsg_len = CMSG_ALIGN(sizeof(*cm)) + msglen; /* Copy the message body. */ in1 = (char *)in1 + FREEBSD32_ALIGN(sizeof(*cm)); md = (char *)md + CMSG_ALIGN(sizeof(*cm)); memcpy(md, in1, msglen); in1 = (char *)in1 + FREEBSD32_ALIGN(msglen); md = (char *)md + CMSG_ALIGN(msglen); KASSERT(outlen >= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen), ("outlen %u underflow, msglen %u", outlen, msglen)); outlen -= CMSG_ALIGN(sizeof(*cm)) + CMSG_ALIGN(msglen); } *mp = m; out: free(in, M_TEMP); return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov((void *)msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = freebsd32_copyin_control(&control, msg.msg_control, msg.msg_controllen); if (error) goto out; msg.msg_control = NULL; msg.msg_controllen = 0; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } #ifdef COMPAT_43 int ofreebsd32_sendmsg(struct thread *td, struct ofreebsd32_sendmsg_args *uap) { return (ENOSYS); } #endif int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimens(struct thread *td, struct freebsd32_futimens_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_futimens(td, uap->fd, tsp, UIO_SYSSPACE)); } int freebsd32_utimensat(struct thread *td, struct freebsd32_utimensat_args *uap) { struct timespec32 ts32[2]; struct timespec ts[2], *tsp; int error; if (uap->times != NULL) { error = copyin(uap->times, ts32, sizeof(ts32)); if (error) return (error); CP(ts32[0], ts[0], tv_sec); CP(ts32[0], ts[0], tv_nsec); CP(ts32[1], ts[1], tv_sec); CP(ts32[1], ts[1], tv_nsec); tsp = ts; } else tsp = NULL; return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, tsp, UIO_SYSSPACE, uap->flag)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_statfs(td, uap->path, UIO_USERSPACE, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; int error; sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fstatfs(td, uap->fd, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct ostatfs32 s32; struct statfs *sp; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); sp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); error = kern_fhstatfs(td, fh, sp); if (error == 0) { copy_statfs(sp, &s32); error = copyout(&s32, uap->buf, sizeof(s32)); } free(sp, M_STATFS); return (error); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = td->td_uretoff.tdu_off; td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_truncate(struct thread *td, struct ofreebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); } #endif int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #ifdef COMPAT_43 int ofreebsd32_ftruncate(struct thread *td, struct ofreebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, uap->length)); } int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif #if defined(COMPAT_FREEBSD11) int freebsd11_freebsd32_getdirentries(struct thread *td, struct freebsd11_freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #endif /* COMPAT_FREEBSD11 */ #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, PAIR32TO64(off_t, uap->offset))); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; off_t pos; error = kern_lseek(td, uap->fd, PAIR32TO64(off_t, uap->offset), uap->whence); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { return (kern_truncate(td, uap->path, UIO_USERSPACE, PAIR32TO64(off_t, uap->length))); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { return (kern_ftruncate(td, uap->fd, PAIR32TO64(off_t, uap->length))); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; cap_rights_t rights; struct iovec32 *iov32; off_t offset, sbytes; int error; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } AUDIT_ARG_FD(uap->fd); if ((error = fget_read(td, uap->fd, cap_rights_init_one(&rights, CAP_PREAD), &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) - copyout(&sbytes, uap->sbytes, sizeof(off_t)); + (void)copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { #ifndef __amd64__ /* * 32-bit architectures other than i386 have 64-bit time_t. This * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec, * and 4 bytes of padding. Zero the padding holes in struct stat32. */ bzero(&out->st_atim, sizeof(out->st_atim)); bzero(&out->st_mtim, sizeof(out->st_mtim)); bzero(&out->st_ctim, sizeof(out->st_ctim)); bzero(&out->st_birthtim, sizeof(out->st_birthtim)); #endif CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_padding0 = 0; out->st_padding1 = 0; #ifdef __STAT32_TIME_T_EXT out->st_atim_ext = 0; out->st_mtim_ext = 0; out->st_ctim_ext = 0; out->st_btim_ext = 0; #endif bzero(out->st_spare, sizeof(out->st_spare)); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { bzero(out, sizeof(*out)); CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); out->st_size = MIN(in->st_size, INT32_MAX); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->sb, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->sb, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fhstat(struct thread *td, struct freebsd32_fhstat_args *uap) { struct stat sb; struct stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } #if defined(COMPAT_FREEBSD11) extern int ino64_trunc_error; static int freebsd11_cvtstat32(struct stat *in, struct freebsd11_stat32 *out) { #ifndef __amd64__ /* * 32-bit architectures other than i386 have 64-bit time_t. This * results in struct timespec32 with 12 bytes for tv_sec and tv_nsec, * and 4 bytes of padding. Zero the padding holes in freebsd11_stat32. */ bzero(&out->st_atim, sizeof(out->st_atim)); bzero(&out->st_mtim, sizeof(out->st_mtim)); bzero(&out->st_ctim, sizeof(out->st_ctim)); bzero(&out->st_birthtim, sizeof(out->st_birthtim)); #endif CP(*in, *out, st_ino); if (in->st_ino != out->st_ino) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_ino = UINT32_MAX; break; } } CP(*in, *out, st_nlink); if (in->st_nlink != out->st_nlink) { switch (ino64_trunc_error) { default: case 0: break; case 1: return (EOVERFLOW); case 2: out->st_nlink = UINT16_MAX; break; } } out->st_dev = in->st_dev; if (out->st_dev != in->st_dev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } CP(*in, *out, st_mode); CP(*in, *out, st_uid); CP(*in, *out, st_gid); out->st_rdev = in->st_rdev; if (out->st_rdev != in->st_rdev) { switch (ino64_trunc_error) { default: break; case 1: return (EOVERFLOW); } } TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); out->st_lspare = 0; bzero((char *)&out->st_birthtim + sizeof(out->st_birthtim), sizeof(*out) - offsetof(struct freebsd11_stat32, st_birthtim) - sizeof(out->st_birthtim)); return (0); } int freebsd11_freebsd32_stat(struct thread *td, struct freebsd11_freebsd32_stat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstat(struct thread *td, struct freebsd11_freebsd32_fstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_fstat(td, uap->fd, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fstatat(struct thread *td, struct freebsd11_freebsd32_fstatat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->buf, sizeof (sb32)); return (error); } int freebsd11_freebsd32_lstat(struct thread *td, struct freebsd11_freebsd32_lstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } int freebsd11_freebsd32_fhstat(struct thread *td, struct freebsd11_freebsd32_fhstat_args *uap) { struct stat sb; struct freebsd11_stat32 sb32; struct fhandle fh; int error; error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); if (error != 0) return (error); error = kern_fhstat(td, fh, &sb); if (error != 0) return (error); error = freebsd11_cvtstat32(&sb, &sb32); if (error == 0) error = copyout(&sb32, uap->sb, sizeof (sb32)); return (error); } static int freebsd11_cvtnstat32(struct stat *sb, struct nstat32 *nsb32) { struct nstat nsb; int error; error = freebsd11_cvtnstat(sb, &nsb); if (error != 0) return (error); bzero(nsb32, sizeof(*nsb32)); CP(nsb, *nsb32, st_dev); CP(nsb, *nsb32, st_ino); CP(nsb, *nsb32, st_mode); CP(nsb, *nsb32, st_nlink); CP(nsb, *nsb32, st_uid); CP(nsb, *nsb32, st_gid); CP(nsb, *nsb32, st_rdev); CP(nsb, *nsb32, st_atim.tv_sec); CP(nsb, *nsb32, st_atim.tv_nsec); CP(nsb, *nsb32, st_mtim.tv_sec); CP(nsb, *nsb32, st_mtim.tv_nsec); CP(nsb, *nsb32, st_ctim.tv_sec); CP(nsb, *nsb32, st_ctim.tv_nsec); CP(nsb, *nsb32, st_size); CP(nsb, *nsb32, st_blocks); CP(nsb, *nsb32, st_blksize); CP(nsb, *nsb32, st_flags); CP(nsb, *nsb32, st_gen); CP(nsb, *nsb32, st_birthtim.tv_sec); CP(nsb, *nsb32, st_birthtim.tv_nsec); return (0); } int freebsd11_freebsd32_nstat(struct thread *td, struct freebsd11_freebsd32_nstat_args *uap) { struct stat sb; struct nstat32 nsb; int error; error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtnstat32(&sb, &nsb); if (error != 0) error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } int freebsd11_freebsd32_nlstat(struct thread *td, struct freebsd11_freebsd32_nlstat_args *uap) { struct stat sb; struct nstat32 nsb; int error; error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); if (error != 0) return (error); error = freebsd11_cvtnstat32(&sb, &nsb); if (error == 0) error = copyout(&nsb, uap->ub, sizeof (nsb)); return (error); } int freebsd11_freebsd32_nfstat(struct thread *td, struct freebsd11_freebsd32_nfstat_args *uap) { struct nstat32 nub; struct stat ub; int error; error = kern_fstat(td, uap->fd, &ub); if (error != 0) return (error); error = freebsd11_cvtnstat32(&ub, &nub); if (error == 0) error = copyout(&nub, uap->sb, sizeof(nub)); return (error); } #endif int freebsd32___sysctl(struct thread *td, struct freebsd32___sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; uint32_t tmp; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { oldlen = 0; } if (error != 0) return (EFAULT); error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error) return (error); if (uap->oldlenp != NULL && suword32(uap->oldlenp, j) != 0) error = EFAULT; return (error); } int freebsd32___sysctlbyname(struct thread *td, struct freebsd32___sysctlbyname_args *uap) { size_t oldlen, rv; int error; uint32_t tmp; if (uap->oldlenp != NULL) { error = fueword32(uap->oldlenp, &tmp); oldlen = tmp; } else { error = oldlen = 0; } if (error != 0) return (EFAULT); error = kern___sysctlbyname(td, uap->name, uap->namelen, uap->old, &oldlen, uap->new, uap->newlen, &rv, SCTL_MASK32, 1); if (error != 0) return (error); if (uap->oldlenp != NULL && suword32(uap->oldlenp, rv) != 0) error = EFAULT; return (error); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = htonl(j32_v0.ip_number); /* jail_v0 is host order */ break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { uint32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } struct sigvec32 { uint32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } struct sigstack32 { uint32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { return (freebsd32_user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, uap->rqtp, uap->rmtp)); } int freebsd32_clock_nanosleep(struct thread *td, struct freebsd32_clock_nanosleep_args *uap) { int error; error = freebsd32_user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, uap->rmtp); return (kern_posix_error(td, error)); } static int freebsd32_user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, const struct timespec32 *ua_rqtp, struct timespec32 *ua_rmtp) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error, error2; error = copyin(ua_rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, ua_rmtp, sizeof(rmt32)); if (error2 != 0) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_ktimer_create(struct thread *td, struct freebsd32_ktimer_create_args *uap) { struct sigevent32 ev32; struct sigevent ev, *evp; int error, id; if (uap->evp == NULL) { evp = NULL; } else { evp = &ev; error = copyin(uap->evp, &ev32, sizeof(ev32)); if (error != 0) return (error); error = convert_sigevent32(&ev32, &ev); if (error != 0) return (error); } error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) kern_ktimer_delete(td, id); } return (error); } int freebsd32_ktimer_settime(struct thread *td, struct freebsd32_ktimer_settime_args *uap) { struct itimerspec32 val32, oval32; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val32, sizeof(val32)); if (error != 0) return (error); ITS_CP(val32, val); ovalp = uap->ovalue != NULL ? &oval : NULL; error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); if (error == 0 && uap->ovalue != NULL) { ITS_CP(oval, oval32); error = copyout(&oval32, uap->ovalue, sizeof(oval32)); } return (error); } int freebsd32_ktimer_gettime(struct thread *td, struct freebsd32_ktimer_gettime_args *uap) { struct itimerspec32 val32; struct itimerspec val; int error; error = kern_ktimer_gettime(td, uap->timerid, &val); if (error == 0) { ITS_CP(val, val32); error = copyout(&val32, uap->value, sizeof(val32)); } return (error); } int freebsd32_timerfd_gettime(struct thread *td, struct freebsd32_timerfd_gettime_args *uap) { struct itimerspec curr_value; struct itimerspec32 curr_value32; int error; error = kern_timerfd_gettime(td, uap->fd, &curr_value); if (error == 0) { CP(curr_value, curr_value32, it_value.tv_sec); CP(curr_value, curr_value32, it_value.tv_nsec); CP(curr_value, curr_value32, it_interval.tv_sec); CP(curr_value, curr_value32, it_interval.tv_nsec); error = copyout(&curr_value32, uap->curr_value, sizeof(curr_value32)); } return (error); } int freebsd32_timerfd_settime(struct thread *td, struct freebsd32_timerfd_settime_args *uap) { struct itimerspec new_value, old_value; struct itimerspec32 new_value32, old_value32; int error; error = copyin(uap->new_value, &new_value32, sizeof(new_value32)); if (error != 0) return (error); CP(new_value32, new_value, it_value.tv_sec); CP(new_value32, new_value, it_value.tv_nsec); CP(new_value32, new_value, it_interval.tv_sec); CP(new_value32, new_value, it_interval.tv_nsec); if (uap->old_value == NULL) { error = kern_timerfd_settime(td, uap->fd, uap->flags, &new_value, NULL); } else { error = kern_timerfd_settime(td, uap->fd, uap->flags, &new_value, &old_value); if (error == 0) { CP(old_value, old_value32, it_value.tv_sec); CP(old_value, old_value32, it_value.tv_nsec); CP(old_value, old_value32, it_interval.tv_sec); CP(old_value, old_value32, it_interval.tv_nsec); error = copyout(&old_value32, uap->old_value, sizeof(old_value32)); } } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } #ifndef _FREEBSD32_SYSPROTO_H_ struct freebsd32_sigqueue_args { pid_t pid; int signum; /* union sigval32 */ int value; }; #endif int freebsd32_sigqueue(struct thread *td, struct freebsd32_sigqueue_args *uap) { union sigval sv; /* * On 32-bit ABIs, sival_int and sival_ptr are the same. * On 64-bit little-endian ABIs, the low bits are the same. * In 64-bit big-endian ABIs, sival_int overlaps with * sival_ptr's HIGH bits. We choose to support sival_int * rather than sival_ptr in this case as it seems to be * more common. */ bzero(&sv, sizeof(sv)); sv.sival_int = (uint32_t)(uint64_t)uap->value; return (kern_sigqueue(td, uap->pid, uap->signum, &sv)); } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { return (kern_cpuset_setid(td, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { return (kern_cpuset_getid(td, uap->level, uap->which, PAIR32TO64(id_t, uap->id), uap->setid)); } static int copyin32_set(const void *u, void *k, size_t size) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ int rv; struct bitset *kb = k; int *p; rv = copyin(u, k, size); if (rv != 0) return (rv); p = (int *)kb->__bits; /* Loop through swapping words. * `size' is in bytes, we need bits. */ for (int i = 0; i < __bitset_words(size * 8); i++) { int tmp = p[0]; p[0] = p[1]; p[1] = tmp; p += 2; } return (0); #else return (copyin(u, k, size)); #endif } static int copyout32_set(const void *k, void *u, size_t size) { #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ const struct bitset *kb = k; struct bitset *ub = u; const int *kp = (const int *)kb->__bits; int *up = (int *)ub->__bits; int rv; for (int i = 0; i < __bitset_words(CPU_SETSIZE); i++) { /* `size' is in bytes, we need bits. */ for (int i = 0; i < __bitset_words(size * 8); i++) { rv = suword32(up, kp[1]); if (rv == 0) rv = suword32(up + 1, kp[0]); if (rv != 0) return (EFAULT); } } return (0); #else return (copyout(k, u, size)); #endif } static const struct cpuset_copy_cb cpuset_copy32_cb = { .cpuset_copyin = copyin32_set, .cpuset_copyout = copyout32_set }; int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { return (user_cpuset_getaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask, &cpuset_copy32_cb)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { return (user_cpuset_setaffinity(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->cpusetsize, uap->mask, &cpuset_copy32_cb)); } int freebsd32_cpuset_getdomain(struct thread *td, struct freebsd32_cpuset_getdomain_args *uap) { return (kern_cpuset_getdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy, &cpuset_copy32_cb)); } int freebsd32_cpuset_setdomain(struct thread *td, struct freebsd32_cpuset_setdomain_args *uap) { return (kern_cpuset_setdomain(td, uap->level, uap->which, PAIR32TO64(id_t,uap->id), uap->domainsetsize, uap->mask, uap->policy, &cpuset_copy32_cb)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_module_handler(struct module *mod, int what, void *arg) { return (kern_syscall_module_handler(freebsd32_sysent, mod, what, arg)); } int syscall32_helper_register(struct syscall_helper_data *sd, int flags) { return (kern_syscall_helper_register(freebsd32_sysent, sd, flags)); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { return (kern_syscall_helper_unregister(freebsd32_sysent, sd)); } int freebsd32_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) { struct sysentvec *sysent; int argc, envc, i; uint32_t *vectp; char *stringp; uintptr_t destp, ustringp; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int error, szsigcode; sysent = imgp->sysent; arginfo = (struct freebsd32_ps_strings *)PROC_PS_STRINGS(imgp->proc); imgp->ps_strings = arginfo; destp = (uintptr_t)arginfo; /* * Install sigcode. */ if (!PROC_HAS_SHP(imgp->proc)) { szsigcode = *sysent->sv_szsigcode; destp -= szsigcode; destp = rounddown2(destp, sizeof(uint32_t)); error = copyout(sysent->sv_sigcode, (void *)destp, szsigcode); if (error != 0) return (error); } /* * Copy the image path for the rtld. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) { execpath_len = strlen(imgp->execpath) + 1; destp -= execpath_len; imgp->execpathp = (void *)destp; error = copyout(imgp->execpath, imgp->execpathp, execpath_len); if (error != 0) return (error); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); destp -= sizeof(canary); imgp->canary = (void *)destp; error = copyout(canary, imgp->canary, sizeof(canary)); if (error != 0) return (error); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; destp -= sizeof(pagesizes32); destp = rounddown2(destp, sizeof(uint32_t)); imgp->pagesizes = (void *)destp; error = copyout(pagesizes32, imgp->pagesizes, sizeof(pagesizes32)); if (error != 0) return (error); imgp->pagesizeslen = sizeof(pagesizes32); /* * Allocate room for the argument and environment strings. */ destp -= ARG_MAX - imgp->args->stringspace; destp = rounddown2(destp, sizeof(uint32_t)); ustringp = destp; if (imgp->auxargs) { /* * Allocate room on the stack for the ELF auxargs * array. It has up to AT_COUNT entries. */ destp -= AT_COUNT * sizeof(Elf32_Auxinfo); destp = rounddown2(destp, sizeof(uint32_t)); } vectp = (uint32_t *)destp; /* * Allocate room for the argv[] and env vectors including the * terminating NULL pointers. */ vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; /* * vectp also becomes our initial stack base */ *stack_base = (uintptr_t)vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ error = copyout(stringp, (void *)ustringp, ARG_MAX - imgp->args->stringspace); if (error != 0) return (error); /* * Fill in "ps_strings" struct for ps, w, etc. */ imgp->argv = vectp; if (suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nargvstr, argc) != 0) return (EFAULT); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* a null vector table pointer separates the argp's from the envp's */ if (suword32(vectp++, 0) != 0) return (EFAULT); imgp->envv = vectp; if (suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp) != 0 || suword32(&arginfo->ps_nenvstr, envc) != 0) return (EFAULT); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { if (suword32(vectp++, ustringp) != 0) return (EFAULT); while (*stringp++ != 0) ustringp++; ustringp++; } /* end of vector table is a null pointer */ if (suword32(vectp, 0) != 0) return (EFAULT); if (imgp->auxargs) { vectp++; error = imgp->sysent->sv_copyout_auxargs(imgp, (uintptr_t)vectp); if (error != 0) return (error); } return (0); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat *stat; struct kld_file_stat32 *stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld_file_stat_1_32) && version != sizeof(struct kld_file_stat32)) return (EINVAL); stat = malloc(sizeof(*stat), M_TEMP, M_WAITOK | M_ZERO); stat32 = malloc(sizeof(*stat32), M_TEMP, M_WAITOK | M_ZERO); error = kern_kldstat(td, uap->fileid, stat); if (error == 0) { bcopy(&stat->name[0], &stat32->name[0], sizeof(stat->name)); CP(*stat, *stat32, refs); CP(*stat, *stat32, id); PTROUT_CP(*stat, *stat32, address); CP(*stat, *stat32, size); bcopy(&stat->pathname[0], &stat32->pathname[0], sizeof(stat->pathname)); stat32->version = version; error = copyout(stat32, uap->stat, version); } free(stat, M_TEMP); free(stat32, M_TEMP); return (error); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { int error; error = kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len)); return (kern_posix_error(td, error)); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { int error; error = kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice); return (kern_posix_error(td, error)); } int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) { CP(*sig32, *sig, sigev_notify); switch (sig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_THREAD_ID: CP(*sig32, *sig, sigev_notify_thread_id); /* FALLTHROUGH */ case SIGEV_SIGNAL: CP(*sig32, *sig, sigev_signo); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; case SIGEV_KEVENT: CP(*sig32, *sig, sigev_notify_kqueue); CP(*sig32, *sig, sigev_notify_kevent_flags); PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } int freebsd32_procctl(struct thread *td, struct freebsd32_procctl_args *uap) { void *data; union { struct procctl_reaper_status rs; struct procctl_reaper_pids rp; struct procctl_reaper_kill rk; } x; union { struct procctl_reaper_pids32 rp; } x32; int error, error1, flags, signum; if (uap->com >= PROC_PROCCTL_MD_MIN) return (cpu_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, PTRIN(uap->data))); switch (uap->com) { case PROC_ASLR_CTL: case PROC_PROTMAX_CTL: case PROC_SPROTECT: case PROC_STACKGAP_CTL: case PROC_TRACE_CTL: case PROC_TRAPCAP_CTL: case PROC_NO_NEW_PRIVS_CTL: case PROC_WXMAP_CTL: error = copyin(PTRIN(uap->data), &flags, sizeof(flags)); if (error != 0) return (error); data = &flags; break; case PROC_REAP_ACQUIRE: case PROC_REAP_RELEASE: if (uap->data != NULL) return (EINVAL); data = NULL; break; case PROC_REAP_STATUS: data = &x.rs; break; case PROC_REAP_GETPIDS: error = copyin(uap->data, &x32.rp, sizeof(x32.rp)); if (error != 0) return (error); CP(x32.rp, x.rp, rp_count); PTRIN_CP(x32.rp, x.rp, rp_pids); data = &x.rp; break; case PROC_REAP_KILL: error = copyin(uap->data, &x.rk, sizeof(x.rk)); if (error != 0) return (error); data = &x.rk; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: case PROC_NO_NEW_PRIVS_STATUS: case PROC_WXMAP_STATUS: data = &flags; break; case PROC_PDEATHSIG_CTL: error = copyin(uap->data, &signum, sizeof(signum)); if (error != 0) return (error); data = &signum; break; case PROC_PDEATHSIG_STATUS: data = &signum; break; default: return (EINVAL); } error = kern_procctl(td, uap->idtype, PAIR32TO64(id_t, uap->id), uap->com, data); switch (uap->com) { case PROC_REAP_STATUS: if (error == 0) error = copyout(&x.rs, uap->data, sizeof(x.rs)); break; case PROC_REAP_KILL: error1 = copyout(&x.rk, uap->data, sizeof(x.rk)); if (error == 0) error = error1; break; case PROC_ASLR_STATUS: case PROC_PROTMAX_STATUS: case PROC_STACKGAP_STATUS: case PROC_TRACE_STATUS: case PROC_TRAPCAP_STATUS: case PROC_NO_NEW_PRIVS_STATUS: case PROC_WXMAP_STATUS: if (error == 0) error = copyout(&flags, uap->data, sizeof(flags)); break; case PROC_PDEATHSIG_STATUS: if (error == 0) error = copyout(&signum, uap->data, sizeof(signum)); break; } return (error); } int freebsd32_fcntl(struct thread *td, struct freebsd32_fcntl_args *uap) { long tmp; switch (uap->cmd) { /* * Do unsigned conversion for arg when operation * interprets it as flags or pointer. */ case F_SETLK_REMOTE: case F_SETLKW: case F_SETLK: case F_GETLK: case F_SETFD: case F_SETFL: case F_OGETLK: case F_OSETLK: case F_OSETLKW: case F_KINFO: tmp = (unsigned int)(uap->arg); break; default: tmp = uap->arg; break; } return (kern_fcntl_freebsd(td, uap->fd, uap->cmd, tmp)); } int freebsd32_ppoll(struct thread *td, struct freebsd32_ppoll_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; sigset_t set, *ssp; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; if (uap->set != NULL) { error = copyin(uap->set, &set, sizeof(set)); if (error != 0) return (error); ssp = &set; } else ssp = NULL; return (kern_poll(td, uap->fds, uap->nfds, tsp, ssp)); } int freebsd32_sched_rr_get_interval(struct thread *td, struct freebsd32_sched_rr_get_interval_args *uap) { struct timespec ts; struct timespec32 ts32; int error; error = kern_sched_rr_get_interval(td, uap->pid, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->interval, sizeof(ts32)); } return (error); } static void timex_to_32(struct timex32 *dst, struct timex *src) { CP(*src, *dst, modes); CP(*src, *dst, offset); CP(*src, *dst, freq); CP(*src, *dst, maxerror); CP(*src, *dst, esterror); CP(*src, *dst, status); CP(*src, *dst, constant); CP(*src, *dst, precision); CP(*src, *dst, tolerance); CP(*src, *dst, ppsfreq); CP(*src, *dst, jitter); CP(*src, *dst, shift); CP(*src, *dst, stabil); CP(*src, *dst, jitcnt); CP(*src, *dst, calcnt); CP(*src, *dst, errcnt); CP(*src, *dst, stbcnt); } static void timex_from_32(struct timex *dst, struct timex32 *src) { CP(*src, *dst, modes); CP(*src, *dst, offset); CP(*src, *dst, freq); CP(*src, *dst, maxerror); CP(*src, *dst, esterror); CP(*src, *dst, status); CP(*src, *dst, constant); CP(*src, *dst, precision); CP(*src, *dst, tolerance); CP(*src, *dst, ppsfreq); CP(*src, *dst, jitter); CP(*src, *dst, shift); CP(*src, *dst, stabil); CP(*src, *dst, jitcnt); CP(*src, *dst, calcnt); CP(*src, *dst, errcnt); CP(*src, *dst, stbcnt); } int freebsd32_ntp_adjtime(struct thread *td, struct freebsd32_ntp_adjtime_args *uap) { struct timex tx; struct timex32 tx32; int error, retval; error = copyin(uap->tp, &tx32, sizeof(tx32)); if (error == 0) { timex_from_32(&tx, &tx32); error = kern_ntp_adjtime(td, &tx, &retval); if (error == 0) { timex_to_32(&tx32, &tx); error = copyout(&tx32, uap->tp, sizeof(tx32)); if (error == 0) td->td_retval[0] = retval; } } return (error); } #ifdef FFCLOCK extern struct mtx ffclock_mtx; extern struct ffclock_estimate ffclock_estimate; extern int8_t ffclock_updated; int freebsd32_ffclock_setestimate(struct thread *td, struct freebsd32_ffclock_setestimate_args *uap) { struct ffclock_estimate cest; struct ffclock_estimate32 cest32; int error; /* Reuse of PRIV_CLOCK_SETTIME. */ if ((error = priv_check(td, PRIV_CLOCK_SETTIME)) != 0) return (error); if ((error = copyin(uap->cest, &cest32, sizeof(struct ffclock_estimate32))) != 0) return (error); CP(cest.update_time, cest32.update_time, sec); memcpy(&cest.update_time.frac, &cest32.update_time.frac, sizeof(uint64_t)); CP(cest, cest32, update_ffcount); CP(cest, cest32, leapsec_next); CP(cest, cest32, period); CP(cest, cest32, errb_abs); CP(cest, cest32, errb_rate); CP(cest, cest32, status); CP(cest, cest32, leapsec_total); CP(cest, cest32, leapsec); mtx_lock(&ffclock_mtx); memcpy(&ffclock_estimate, &cest, sizeof(struct ffclock_estimate)); ffclock_updated++; mtx_unlock(&ffclock_mtx); return (error); } int freebsd32_ffclock_getestimate(struct thread *td, struct freebsd32_ffclock_getestimate_args *uap) { struct ffclock_estimate cest; struct ffclock_estimate32 cest32; int error; mtx_lock(&ffclock_mtx); memcpy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); mtx_unlock(&ffclock_mtx); CP(cest32.update_time, cest.update_time, sec); memcpy(&cest32.update_time.frac, &cest.update_time.frac, sizeof(uint64_t)); CP(cest32, cest, update_ffcount); CP(cest32, cest, leapsec_next); CP(cest32, cest, period); CP(cest32, cest, errb_abs); CP(cest32, cest, errb_rate); CP(cest32, cest, status); CP(cest32, cest, leapsec_total); CP(cest32, cest, leapsec); error = copyout(&cest32, uap->cest, sizeof(struct ffclock_estimate32)); return (error); } #else /* !FFCLOCK */ int freebsd32_ffclock_setestimate(struct thread *td, struct freebsd32_ffclock_setestimate_args *uap) { return (ENOSYS); } int freebsd32_ffclock_getestimate(struct thread *td, struct freebsd32_ffclock_getestimate_args *uap) { return (ENOSYS); } #endif /* FFCLOCK */ #ifdef COMPAT_43 int ofreebsd32_sethostid(struct thread *td, struct ofreebsd32_sethostid_args *uap) { int name[] = { CTL_KERN, KERN_HOSTID }; long hostid; hostid = uap->hostid; return (kernel_sysctl(td, name, nitems(name), NULL, NULL, &hostid, sizeof(hostid), NULL, 0)); } #endif diff --git a/sys/kern/kern_sendfile.c b/sys/kern/kern_sendfile.c index dfdf01f45e34..f6f6b57adfc7 100644 --- a/sys/kern/kern_sendfile.c +++ b/sys/kern/kern_sendfile.c @@ -1,1373 +1,1373 @@ /*- * Copyright (c) 2013-2015 Gleb Smirnoff * Copyright (c) 1998, David Greenman. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include #include "opt_kern_tls.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static MALLOC_DEFINE(M_SENDFILE, "sendfile", "sendfile dynamic memory"); #define EXT_FLAG_SYNC EXT_FLAG_VENDOR1 #define EXT_FLAG_NOCACHE EXT_FLAG_VENDOR2 #define EXT_FLAG_CACHE_LAST EXT_FLAG_VENDOR3 /* * Structure describing a single sendfile(2) I/O, which may consist of * several underlying pager I/Os. * * The syscall context allocates the structure and initializes 'nios' * to 1. As sendfile_swapin() runs through pages and starts asynchronous * paging operations, it increments 'nios'. * * Every I/O completion calls sendfile_iodone(), which decrements the 'nios', * and the syscall also calls sendfile_iodone() after allocating all mbufs, * linking them and sending to socket. Whoever reaches zero 'nios' is * responsible to * call pru_ready on the socket, to notify it of readyness * of the data. */ struct sf_io { volatile u_int nios; u_int error; int npages; struct socket *so; struct mbuf *m; vm_object_t obj; vm_pindex_t pindex0; #ifdef KERN_TLS struct ktls_session *tls; #endif vm_page_t pa[]; }; /* * Structure used to track requests with SF_SYNC flag. */ struct sendfile_sync { struct mtx mtx; struct cv cv; unsigned count; bool waiting; }; static void sendfile_sync_destroy(struct sendfile_sync *sfs) { KASSERT(sfs->count == 0, ("sendfile sync %p still busy", sfs)); cv_destroy(&sfs->cv); mtx_destroy(&sfs->mtx); free(sfs, M_SENDFILE); } static void sendfile_sync_signal(struct sendfile_sync *sfs) { mtx_lock(&sfs->mtx); KASSERT(sfs->count > 0, ("sendfile sync %p not busy", sfs)); if (--sfs->count == 0) { if (!sfs->waiting) { /* The sendfile() waiter was interrupted by a signal. */ sendfile_sync_destroy(sfs); return; } else { cv_signal(&sfs->cv); } } mtx_unlock(&sfs->mtx); } counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; static void sfstat_init(const void *unused) { COUNTER_ARRAY_ALLOC(sfstat, sizeof(struct sfstat) / sizeof(uint64_t), M_WAITOK); } SYSINIT(sfstat, SI_SUB_MBUF, SI_ORDER_FIRST, sfstat_init, NULL); static int sfstat_sysctl(SYSCTL_HANDLER_ARGS) { struct sfstat s; COUNTER_ARRAY_COPY(sfstat, &s, sizeof(s) / sizeof(uint64_t)); if (req->newptr) COUNTER_ARRAY_ZERO(sfstat, sizeof(s) / sizeof(uint64_t)); return (SYSCTL_OUT(req, &s, sizeof(s))); } SYSCTL_PROC(_kern_ipc, OID_AUTO, sfstat, CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sfstat_sysctl, "I", "sendfile statistics"); static void sendfile_free_mext(struct mbuf *m) { struct sf_buf *sf; vm_page_t pg; int flags; KASSERT(m->m_flags & M_EXT && m->m_ext.ext_type == EXT_SFBUF, ("%s: m %p !M_EXT or !EXT_SFBUF", __func__, m)); sf = m->m_ext.ext_arg1; pg = sf_buf_page(sf); flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; sf_buf_free(sf); vm_page_release(pg, flags); if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg2; sendfile_sync_signal(sfs); } } static void sendfile_free_mext_pg(struct mbuf *m) { vm_page_t pg; int flags, i; bool cache_last; M_ASSERTEXTPG(m); cache_last = m->m_ext.ext_flags & EXT_FLAG_CACHE_LAST; flags = (m->m_ext.ext_flags & EXT_FLAG_NOCACHE) != 0 ? VPR_TRYFREE : 0; for (i = 0; i < m->m_epg_npgs; i++) { if (cache_last && i == m->m_epg_npgs - 1) flags = 0; pg = PHYS_TO_VM_PAGE(m->m_epg_pa[i]); vm_page_release(pg, flags); } if (m->m_ext.ext_flags & EXT_FLAG_SYNC) { struct sendfile_sync *sfs = m->m_ext.ext_arg1; sendfile_sync_signal(sfs); } } /* * Helper function to calculate how much data to put into page i of n. * Only first and last pages are special. */ static inline off_t xfsize(int i, int n, off_t off, off_t len) { if (i == 0) return (omin(PAGE_SIZE - (off & PAGE_MASK), len)); if (i == n - 1 && ((off + len) & PAGE_MASK) > 0) return ((off + len) & PAGE_MASK); return (PAGE_SIZE); } /* * Helper function to get offset within object for i page. */ static inline vm_ooffset_t vmoff(int i, off_t off) { if (i == 0) return ((vm_ooffset_t)off); return (trunc_page(off + i * PAGE_SIZE)); } /* * Helper function used when allocation of a page or sf_buf failed. * Pretend as if we don't have enough space, subtract xfsize() of * all pages that failed. */ static inline void fixspace(int old, int new, off_t off, int *space) { KASSERT(old > new, ("%s: old %d new %d", __func__, old, new)); /* Subtract last one. */ *space -= xfsize(old - 1, old, off, *space); old--; if (new == old) /* There was only one page. */ return; /* Subtract first one. */ if (new == 0) { *space -= xfsize(0, old, off, *space); new++; } /* Rest of pages are full sized. */ *space -= (old - new) * PAGE_SIZE; KASSERT(*space >= 0, ("%s: space went backwards", __func__)); } /* * Wait for all in-flight ios to complete, we must not unwire pages * under them. */ static void sendfile_iowait(struct sf_io *sfio, const char *wmesg) { while (atomic_load_int(&sfio->nios) != 1) pause(wmesg, 1); } /* * I/O completion callback. */ static void sendfile_iodone(void *arg, vm_page_t *pa, int count, int error) { struct sf_io *sfio = arg; struct socket *so; int i; if (error != 0) sfio->error = error; /* * Restore the valid page pointers. They are already * unbusied, but still wired. * * XXXKIB since pages are only wired, and we do not * own the object lock, other users might have * invalidated them in meantime. Similarly, after we * unbusied the swapped-in pages, they can become * invalid under us. */ MPASS(count == 0 || pa[0] != bogus_page); for (i = 0; i < count; i++) { if (pa[i] == bogus_page) { sfio->pa[(pa[0]->pindex - sfio->pindex0) + i] = pa[i] = vm_page_relookup(sfio->obj, pa[0]->pindex + i); KASSERT(pa[i] != NULL, ("%s: page %p[%d] disappeared", __func__, pa, i)); } else { vm_page_xunbusy_unchecked(pa[i]); } } if (!refcount_release(&sfio->nios)) return; #ifdef INVARIANTS for (i = 1; i < sfio->npages; i++) { if (sfio->pa[i] == NULL) break; KASSERT(vm_page_wired(sfio->pa[i]), ("sfio %p page %d %p not wired", sfio, i, sfio->pa[i])); if (i == 0) continue; KASSERT(sfio->pa[0]->object == sfio->pa[i]->object, ("sfio %p page %d %p wrong owner %p %p", sfio, i, sfio->pa[i], sfio->pa[0]->object, sfio->pa[i]->object)); KASSERT(sfio->pa[0]->pindex + i == sfio->pa[i]->pindex, ("sfio %p page %d %p wrong index %jx %jx", sfio, i, sfio->pa[i], (uintmax_t)sfio->pa[0]->pindex, (uintmax_t)sfio->pa[i]->pindex)); } #endif vm_object_pip_wakeup(sfio->obj); if (sfio->m == NULL) { /* * Either I/O operation failed, or we failed to allocate * buffers, or we bailed out on first busy page, or we * succeeded filling the request without any I/Os. Anyway, * pru_send hadn't been executed - nothing had been sent * to the socket yet. */ MPASS((curthread->td_pflags & TDP_KTHREAD) == 0); free(sfio, M_SENDFILE); return; } #if defined(KERN_TLS) && defined(INVARIANTS) if ((sfio->m->m_flags & M_EXTPG) != 0) KASSERT(sfio->tls == sfio->m->m_epg_tls, ("TLS session mismatch")); else KASSERT(sfio->tls == NULL, ("non-ext_pgs mbuf with TLS session")); #endif so = sfio->so; CURVNET_SET(so->so_vnet); if (__predict_false(sfio->error)) { /* * I/O operation failed. The state of data in the socket * is now inconsistent, and all what we can do is to tear * it down. Protocol abort method would tear down protocol * state, free all ready mbufs and detach not ready ones. * We will free the mbufs corresponding to this I/O manually. * * The socket would be marked with EIO and made available * for read, so that application receives EIO on next * syscall and eventually closes the socket. */ so->so_proto->pr_abort(so); so->so_error = EIO; mb_free_notready(sfio->m, sfio->npages); #ifdef KERN_TLS } else if (sfio->tls != NULL && sfio->tls->mode == TCP_TLS_MODE_SW) { /* * I/O operation is complete, but we still need to * encrypt. We cannot do this in the interrupt thread * of the disk controller, so forward the mbufs to a * different thread. * * Donate the socket reference from sfio to rather * than explicitly invoking soref(). */ ktls_enqueue(sfio->m, so, sfio->npages); goto out_with_ref; #endif } else (void)so->so_proto->pr_ready(so, sfio->m, sfio->npages); sorele(so); #ifdef KERN_TLS out_with_ref: #endif CURVNET_RESTORE(); free(sfio, M_SENDFILE); } /* * Iterate through pages vector and request paging for non-valid pages. */ static int sendfile_swapin(vm_object_t obj, struct sf_io *sfio, int *nios, off_t off, off_t len, int rhpages, int flags) { vm_page_t *pa; int a, count, count1, grabbed, i, j, npages, rv; pa = sfio->pa; npages = sfio->npages; *nios = 0; flags = (flags & SF_NODISKIO) ? VM_ALLOC_NOWAIT : 0; sfio->pindex0 = OFF_TO_IDX(off); /* * First grab all the pages and wire them. Note that we grab * only required pages. Readahead pages are dealt with later. */ grabbed = vm_page_grab_pages_unlocked(obj, OFF_TO_IDX(off), VM_ALLOC_NORMAL | VM_ALLOC_WIRED | flags, pa, npages); if (grabbed < npages) { for (int i = grabbed; i < npages; i++) pa[i] = NULL; npages = grabbed; rhpages = 0; } for (i = 0; i < npages;) { /* Skip valid pages. */ if (vm_page_is_valid(pa[i], vmoff(i, off) & PAGE_MASK, xfsize(i, npages, off, len))) { vm_page_xunbusy(pa[i]); SFSTAT_INC(sf_pages_valid); i++; continue; } /* * Next page is invalid. Check if it belongs to pager. It * may not be there, which is a regular situation for shmem * pager. For vnode pager this happens only in case of * a sparse file. * * Important feature of vm_pager_has_page() is the hint * stored in 'a', about how many pages we can pagein after * this page in a single I/O. */ VM_OBJECT_RLOCK(obj); if (!vm_pager_has_page(obj, OFF_TO_IDX(vmoff(i, off)), NULL, &a)) { VM_OBJECT_RUNLOCK(obj); pmap_zero_page(pa[i]); vm_page_valid(pa[i]); MPASS(pa[i]->dirty == 0); vm_page_xunbusy(pa[i]); i++; continue; } VM_OBJECT_RUNLOCK(obj); /* * We want to pagein as many pages as possible, limited only * by the 'a' hint and actual request. */ count = min(a + 1, npages - i); /* * We should not pagein into a valid page because * there might be still unfinished write tracked by * e.g. a buffer, thus we substitute any valid pages * with the bogus one. * * We must not leave around xbusy pages which are not * part of the run passed to vm_pager_getpages(), * otherwise pager might deadlock waiting for the busy * status of the page, e.g. if it constitues the * buffer needed to validate other page. * * First trim the end of the run consisting of the * valid pages, then replace the rest of the valid * with bogus. */ count1 = count; for (j = i + count - 1; j > i; j--) { if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { vm_page_xunbusy(pa[j]); SFSTAT_INC(sf_pages_valid); count--; } else { break; } } /* * The last page in the run pa[i + count - 1] is * guaranteed to be invalid by the trim above, so it * is not replaced with bogus, thus -1 in the loop end * condition. */ MPASS(pa[i + count - 1]->valid != VM_PAGE_BITS_ALL); for (j = i + 1; j < i + count - 1; j++) { if (vm_page_is_valid(pa[j], vmoff(j, off) & PAGE_MASK, xfsize(j, npages, off, len))) { vm_page_xunbusy(pa[j]); SFSTAT_INC(sf_pages_valid); SFSTAT_INC(sf_pages_bogus); pa[j] = bogus_page; } } refcount_acquire(&sfio->nios); rv = vm_pager_get_pages_async(obj, pa + i, count, NULL, i + count == npages ? &rhpages : NULL, &sendfile_iodone, sfio); if (__predict_false(rv != VM_PAGER_OK)) { sendfile_iowait(sfio, "sferrio"); /* * Do remaining pages recovery before returning EIO. * Pages from 0 to npages are wired. * Pages from (i + count1) to npages are busied. */ for (j = 0; j < npages; j++) { if (j >= i + count1) vm_page_xunbusy(pa[j]); KASSERT(pa[j] != NULL && pa[j] != bogus_page, ("%s: page %p[%d] I/O recovery failure", __func__, pa, j)); vm_page_unwire(pa[j], PQ_INACTIVE); pa[j] = NULL; } return (EIO); } SFSTAT_INC(sf_iocnt); SFSTAT_ADD(sf_pages_read, count); if (i + count == npages) SFSTAT_ADD(sf_rhpages_read, rhpages); i += count1; (*nios)++; } if (*nios == 0 && npages != 0) SFSTAT_INC(sf_noiocnt); return (0); } static int sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res, struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size, int *bsize) { vm_object_t obj; struct vnode *vp; struct shmfd *shmfd; int error; error = 0; vp = *vp_res = NULL; obj = NULL; shmfd = *shmfd_res = NULL; *bsize = 0; /* * The file descriptor must be a regular file and have a * backing VM object. */ if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type != VREG) { error = EINVAL; goto out; } *bsize = vp->v_mount->mnt_stat.f_iosize; obj = vp->v_object; if (obj == NULL) { error = EINVAL; goto out; } /* * Use the pager size when available to simplify synchronization * with filesystems, which otherwise must atomically update both * the vnode pager size and file size. */ if (obj->type == OBJT_VNODE) { VM_OBJECT_RLOCK(obj); *obj_size = obj->un_pager.vnp.vnp_size; } else { error = vn_getsize_locked(vp, obj_size, td->td_ucred); if (error != 0) goto out; VM_OBJECT_RLOCK(obj); } } else if (fp->f_type == DTYPE_SHM) { shmfd = fp->f_data; obj = shmfd->shm_object; VM_OBJECT_RLOCK(obj); *obj_size = shmfd->shm_size; } else { error = EINVAL; goto out; } if ((obj->flags & OBJ_DEAD) != 0) { VM_OBJECT_RUNLOCK(obj); error = EBADF; goto out; } /* * Temporarily increase the backing VM object's reference * count so that a forced reclamation of its vnode does not * immediately destroy it. */ vm_object_reference_locked(obj); VM_OBJECT_RUNLOCK(obj); *obj_res = obj; *vp_res = vp; *shmfd_res = shmfd; out: if (vp != NULL) VOP_UNLOCK(vp); return (error); } static int sendfile_getsock(struct thread *td, int s, struct file **sock_fp, struct socket **so) { int error; *sock_fp = NULL; *so = NULL; /* * The socket must be a stream socket and connected. */ error = getsock(td, s, &cap_send_rights, sock_fp); if (error != 0) return (error); *so = (*sock_fp)->f_data; if ((*so)->so_type != SOCK_STREAM) return (EINVAL); /* * SCTP one-to-one style sockets currently don't work with * sendfile(). So indicate EINVAL for now. */ if ((*so)->so_proto->pr_protocol == IPPROTO_SCTP) return (EINVAL); return (0); } int vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, struct thread *td) { struct file *sock_fp; struct vnode *vp; struct vm_object *obj; vm_page_t pga; struct socket *so; #ifdef KERN_TLS struct ktls_session *tls; #endif struct mbuf *m, *mh, *mhtail; struct sf_buf *sf; struct shmfd *shmfd; struct sendfile_sync *sfs; struct vattr va; off_t off, sbytes, rem, obj_size, nobj_size; int bsize, error, ext_pgs_idx, hdrlen, max_pgs, softerr; #ifdef KERN_TLS int tls_enq_cnt; #endif bool use_ext_pgs; obj = NULL; so = NULL; m = mh = NULL; sfs = NULL; #ifdef KERN_TLS tls = NULL; #endif hdrlen = sbytes = 0; softerr = 0; use_ext_pgs = false; error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize); if (error != 0) return (error); error = sendfile_getsock(td, sockfd, &sock_fp, &so); if (error != 0) goto out; #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error != 0) goto out; #endif SFSTAT_INC(sf_syscalls); SFSTAT_ADD(sf_rhpages_requested, SF_READAHEAD(flags)); if (flags & SF_SYNC) { sfs = malloc(sizeof(*sfs), M_SENDFILE, M_WAITOK | M_ZERO); mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); cv_init(&sfs->cv, "sendfile"); sfs->waiting = true; } rem = nbytes ? omin(nbytes, obj_size - offset) : obj_size - offset; /* * Protect against multiple writers to the socket. * * XXXRW: Historically this has assumed non-interruptibility, so now * we implement that, but possibly shouldn't. */ error = SOCK_IO_SEND_LOCK(so, SBL_WAIT | SBL_NOINTR); if (error != 0) goto out; #ifdef KERN_TLS tls = ktls_hold(so->so_snd.sb_tls_info); #endif /* * Loop through the pages of the file, starting with the requested * offset. Get a file page (do I/O if necessary), map the file page * into an sf_buf, attach an mbuf header to the sf_buf, and queue * it on the socket. * This is done in two loops. The inner loop turns as many pages * as it can, up to available socket buffer space, without blocking * into mbufs to have it bulk delivered into the socket send buffer. * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ for (off = offset; rem > 0; ) { struct sf_io *sfio; vm_page_t *pa; struct mbuf *m0, *mtail; int nios, space, npages, rhpages; mtail = NULL; /* * Check the socket state for ongoing connection, * no errors and space in socket buffer. * If space is low allow for the remainder of the * file to be processed if it fits the socket buffer. * Otherwise block in waiting for sufficient space * to proceed, or if the socket is nonblocking, return * to userland with EAGAIN while reporting how far * we've come. * We wait until the socket buffer has significant free * space to do bulk sends. This makes good use of file * system read ahead and allows packet segmentation * offloading hardware to take over lots of work. If * we were not careful here we would send off only one * sfbuf at a time. */ SOCKBUF_LOCK(&so->so_snd); if (so->so_snd.sb_lowat < so->so_snd.sb_hiwat / 2) so->so_snd.sb_lowat = so->so_snd.sb_hiwat / 2; retry_space: if (so->so_snd.sb_state & SBS_CANTSENDMORE) { error = EPIPE; SOCKBUF_UNLOCK(&so->so_snd); goto done; } else if (so->so_error) { error = so->so_error; so->so_error = 0; SOCKBUF_UNLOCK(&so->so_snd); goto done; } if ((so->so_state & SS_ISCONNECTED) == 0) { SOCKBUF_UNLOCK(&so->so_snd); error = ENOTCONN; goto done; } space = sbspace(&so->so_snd); if (space < rem && (space <= 0 || space < so->so_snd.sb_lowat)) { if (so->so_state & SS_NBIO) { SOCKBUF_UNLOCK(&so->so_snd); error = EAGAIN; goto done; } /* * sbwait drops the lock while sleeping. * When we loop back to retry_space the * state may have changed and we retest * for it. */ error = sbwait(so, SO_SND); /* * An error from sbwait usually indicates that we've * been interrupted by a signal. If we've sent anything * then return bytes sent, otherwise return the error. */ if (error != 0) { SOCKBUF_UNLOCK(&so->so_snd); goto done; } goto retry_space; } SOCKBUF_UNLOCK(&so->so_snd); /* * At the beginning of the first loop check if any headers * are specified and copy them into mbufs. Reduce space in * the socket buffer by the size of the header mbuf chain. * Clear hdr_uio here and hdrlen at the end of the first loop. */ if (hdr_uio != NULL && hdr_uio->uio_resid > 0) { hdr_uio->uio_td = td; hdr_uio->uio_rw = UIO_WRITE; #ifdef KERN_TLS if (tls != NULL) mh = m_uiotombuf(hdr_uio, M_WAITOK, space, tls->params.max_frame_len, M_EXTPG); else #endif mh = m_uiotombuf(hdr_uio, M_WAITOK, space, 0, 0); hdrlen = m_length(mh, &mhtail); space -= hdrlen; /* * If header consumed all the socket buffer space, * don't waste CPU cycles and jump to the end. */ if (space == 0) { sfio = NULL; nios = 0; goto prepend_header; } hdr_uio = NULL; } if (vp != NULL) { error = vn_lock(vp, LK_SHARED); if (error != 0) goto done; /* * Check to see if the file size has changed. */ if (obj->type == OBJT_VNODE) { VM_OBJECT_RLOCK(obj); nobj_size = obj->un_pager.vnp.vnp_size; VM_OBJECT_RUNLOCK(obj); } else { error = VOP_GETATTR(vp, &va, td->td_ucred); if (error != 0) { VOP_UNLOCK(vp); goto done; } nobj_size = va.va_size; } if (off >= nobj_size) { VOP_UNLOCK(vp); goto done; } if (nobj_size != obj_size) { obj_size = nobj_size; rem = nbytes ? omin(nbytes + offset, obj_size) : obj_size; rem -= off; } } if (space > rem) space = rem; else if (space > PAGE_SIZE) { /* * Use page boundaries when possible for large * requests. */ if (off & PAGE_MASK) space -= (PAGE_SIZE - (off & PAGE_MASK)); space = trunc_page(space); if (off & PAGE_MASK) space += (PAGE_SIZE - (off & PAGE_MASK)); } npages = howmany(space + (off & PAGE_MASK), PAGE_SIZE); /* * Calculate maximum allowed number of pages for readahead * at this iteration. If SF_USER_READAHEAD was set, we don't * do any heuristics and use exactly the value supplied by * application. Otherwise, we allow readahead up to "rem". * If application wants more, let it be, but there is no * reason to go above maxphys. Also check against "obj_size", * since vm_pager_has_page() can hint beyond EOF. */ if (flags & SF_USER_READAHEAD) { rhpages = SF_READAHEAD(flags); } else { rhpages = howmany(rem + (off & PAGE_MASK), PAGE_SIZE) - npages; rhpages += SF_READAHEAD(flags); } rhpages = min(howmany(maxphys, PAGE_SIZE), rhpages); rhpages = min(howmany(obj_size - trunc_page(off), PAGE_SIZE) - npages, rhpages); sfio = malloc(sizeof(struct sf_io) + npages * sizeof(vm_page_t), M_SENDFILE, M_WAITOK); refcount_init(&sfio->nios, 1); sfio->obj = obj; sfio->error = 0; sfio->m = NULL; sfio->npages = npages; #ifdef KERN_TLS /* * This doesn't use ktls_hold() because sfio->m will * also have a reference on 'tls' that will be valid * for all of sfio's lifetime. */ sfio->tls = tls; #endif vm_object_pip_add(obj, 1); error = sendfile_swapin(obj, sfio, &nios, off, space, rhpages, flags); if (error != 0) { if (vp != NULL) VOP_UNLOCK(vp); sendfile_iodone(sfio, NULL, 0, error); goto done; } /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. */ pa = sfio->pa; /* * Use unmapped mbufs if enabled for TCP. Unmapped * bufs are restricted to TCP as that is what has been * tested. In particular, unmapped mbufs have not * been tested with UNIX-domain sockets. * * TLS frames always require unmapped mbufs. */ if ((mb_use_ext_pgs && so->so_proto->pr_protocol == IPPROTO_TCP) #ifdef KERN_TLS || tls != NULL #endif ) { use_ext_pgs = true; #ifdef KERN_TLS if (tls != NULL) max_pgs = num_pages(tls->params.max_frame_len); else #endif max_pgs = MBUF_PEXT_MAX_PGS; /* Start at last index, to wrap on first use. */ ext_pgs_idx = max_pgs - 1; } for (int i = 0; i < npages; i++) { /* * If a page wasn't grabbed successfully, then * trim the array. Can happen only with SF_NODISKIO. */ if (pa[i] == NULL) { SFSTAT_INC(sf_busy); fixspace(npages, i, off, &space); sfio->npages = i; softerr = EBUSY; break; } pga = pa[i]; if (pga == bogus_page) pga = vm_page_relookup(obj, sfio->pindex0 + i); if (use_ext_pgs) { off_t xfs; ext_pgs_idx++; if (ext_pgs_idx == max_pgs) { m0 = mb_alloc_ext_pgs(M_WAITOK, sendfile_free_mext_pg); if (flags & SF_NOCACHE) { m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; /* * See comment below regarding * ignoring SF_NOCACHE for the * last page. */ if ((npages - i <= max_pgs) && ((off + space) & PAGE_MASK) && (rem > space || rhpages > 0)) m0->m_ext.ext_flags |= EXT_FLAG_CACHE_LAST; } if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; m0->m_ext.ext_arg1 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } ext_pgs_idx = 0; /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; else m = m0; mtail = m0; m0->m_epg_1st_off = vmoff(i, off) & PAGE_MASK; } if (nios) { mtail->m_flags |= M_NOTREADY; m0->m_epg_nrdy++; } m0->m_epg_pa[ext_pgs_idx] = VM_PAGE_TO_PHYS(pga); m0->m_epg_npgs++; xfs = xfsize(i, npages, off, space); m0->m_epg_last_len = xfs; MBUF_EXT_PGS_ASSERT_SANITY(m0); mtail->m_len += xfs; mtail->m_ext.ext_size += PAGE_SIZE; continue; } /* * Get a sendfile buf. When allocating the * first buffer for mbuf chain, we usually * wait as long as necessary, but this wait * can be interrupted. For consequent * buffers, do not sleep, since several * threads might exhaust the buffers and then * deadlock. */ sf = sf_buf_alloc(pga, m != NULL ? SFB_NOWAIT : SFB_CATCH); if (sf == NULL) { SFSTAT_INC(sf_allocfail); sendfile_iowait(sfio, "sfnosf"); for (int j = i; j < npages; j++) { vm_page_unwire(pa[j], PQ_INACTIVE); pa[j] = NULL; } if (m == NULL) softerr = ENOBUFS; fixspace(npages, i, off, &space); sfio->npages = i; break; } m0 = m_get(M_WAITOK, MT_DATA); m0->m_ext.ext_buf = (char *)sf_buf_kva(sf); m0->m_ext.ext_size = PAGE_SIZE; m0->m_ext.ext_arg1 = sf; m0->m_ext.ext_type = EXT_SFBUF; m0->m_ext.ext_flags = EXT_FLAG_EMBREF; m0->m_ext.ext_free = sendfile_free_mext; /* * SF_NOCACHE sets the page as being freed upon send. * However, we ignore it for the last page in 'space', * if the page is truncated, and we got more data to * send (rem > space), or if we have readahead * configured (rhpages > 0). */ if ((flags & SF_NOCACHE) && (i != npages - 1 || !((off + space) & PAGE_MASK) || !(rem > space || rhpages > 0))) m0->m_ext.ext_flags |= EXT_FLAG_NOCACHE; if (sfs != NULL) { m0->m_ext.ext_flags |= EXT_FLAG_SYNC; m0->m_ext.ext_arg2 = sfs; mtx_lock(&sfs->mtx); sfs->count++; mtx_unlock(&sfs->mtx); } m0->m_ext.ext_count = 1; m0->m_flags |= (M_EXT | M_RDONLY); if (nios) m0->m_flags |= M_NOTREADY; m0->m_data = (char *)sf_buf_kva(sf) + (vmoff(i, off) & PAGE_MASK); m0->m_len = xfsize(i, npages, off, space); /* Append to mbuf chain. */ if (mtail != NULL) mtail->m_next = m0; else m = m0; mtail = m0; } if (vp != NULL) VOP_UNLOCK(vp); /* Keep track of bytes processed. */ off += space; rem -= space; /* * Prepend header, if any. Save pointer to first mbuf * with a page. */ if (hdrlen) { prepend_header: m0 = mhtail->m_next = m; m = mh; mh = NULL; } else m0 = m; if (m == NULL) { KASSERT(softerr, ("%s: m NULL, no error", __func__)); error = softerr; sendfile_iodone(sfio, NULL, 0, 0); goto done; } /* Add the buffer chain to the socket buffer. */ KASSERT(m_length(m, NULL) == space + hdrlen, ("%s: mlen %u space %d hdrlen %d", __func__, m_length(m, NULL), space, hdrlen)); CURVNET_SET(so->so_vnet); #ifdef KERN_TLS if (tls != NULL) ktls_frame(m, tls, &tls_enq_cnt, TLS_RLTYPE_APP); #endif if (nios == 0) { /* * If sendfile_swapin() didn't initiate any I/Os, * which happens if all data is cached in VM, or if * the header consumed all socket buffer space and * sfio is NULL, then we can send data right now * without the PRUS_NOTREADY flag. */ if (sfio != NULL) sendfile_iodone(sfio, NULL, 0, 0); #ifdef KERN_TLS if (tls != NULL && tls->mode == TCP_TLS_MODE_SW) { error = so->so_proto->pr_send(so, PRUS_NOTREADY, m, NULL, NULL, td); if (error != 0) { m_freem(m); } else { soref(so); ktls_enqueue(m, so, tls_enq_cnt); } } else #endif error = so->so_proto->pr_send(so, 0, m, NULL, NULL, td); } else { sfio->so = so; sfio->m = m0; soref(so); error = so->so_proto->pr_send(so, PRUS_NOTREADY, m, NULL, NULL, td); sendfile_iodone(sfio, NULL, 0, error); } #ifdef TCP_REQUEST_TRK if (so->so_proto->pr_protocol == IPPROTO_TCP) { /* log the sendfile call to the TCP log, if enabled */ tcp_log_sendfile(so, offset, nbytes, flags); } #endif CURVNET_RESTORE(); m = NULL; if (error) goto done; sbytes += space + hdrlen; if (hdrlen) hdrlen = 0; if (softerr) { error = softerr; goto done; } } /* * Send trailers. Wimp out and use writev(2). */ if (trl_uio != NULL) { SOCK_IO_SEND_UNLOCK(so); error = kern_writev(td, sockfd, trl_uio); if (error == 0) sbytes += td->td_retval[0]; goto out; } done: SOCK_IO_SEND_UNLOCK(so); out: /* * If there was no error we have to clear td->td_retval[0] * because it may have been set by writev. */ if (error == 0) { td->td_retval[0] = 0; } if (sent != NULL) { (*sent) = sbytes; } if (obj != NULL) vm_object_deallocate(obj); if (so) fdrop(sock_fp, td); if (m) m_freem(m); if (mh) m_freem(mh); if (sfs != NULL) { mtx_lock(&sfs->mtx); if (sfs->count != 0) error = cv_wait_sig(&sfs->cv, &sfs->mtx); if (sfs->count == 0) { sendfile_sync_destroy(sfs); } else { sfs->waiting = false; mtx_unlock(&sfs->mtx); } } #ifdef KERN_TLS if (tls != NULL) ktls_free(tls); #endif if (error == ERESTART) error = EINTR; return (error); } static int sendfile(struct thread *td, struct sendfile_args *uap, int compat) { struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct file *fp; off_t sbytes; int error; /* * File offset must be positive. If it goes beyond EOF * we send only the header/trailer and no payload data. */ if (uap->offset < 0) return (EINVAL); sbytes = 0; hdr_uio = trl_uio = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); if (error != 0) goto out; if (hdtr.headers != NULL) { error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio); if (error != 0) goto out; #ifdef COMPAT_FREEBSD4 /* * In FreeBSD < 5.0 the nbytes to send also included * the header. If compat is specified subtract the * header size from nbytes. */ if (compat) { if (uap->nbytes > hdr_uio->uio_resid) uap->nbytes -= hdr_uio->uio_resid; else uap->nbytes = 0; } #endif } if (hdtr.trailers != NULL) { error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio); if (error != 0) goto out; } } AUDIT_ARG_FD(uap->fd); /* * sendfile(2) can start at any offset within a file so we require * CAP_READ+CAP_SEEK = CAP_PREAD. */ if ((error = fget_read(td, uap->fd, &cap_pread_rights, &fp)) != 0) goto out; error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, uap->nbytes, &sbytes, uap->flags, td); fdrop(fp, td); if (uap->sbytes != NULL) - copyout(&sbytes, uap->sbytes, sizeof(off_t)); + (void)copyout(&sbytes, uap->sbytes, sizeof(off_t)); out: free(hdr_uio, M_IOV); free(trl_uio, M_IOV); return (error); } /* * sendfile(2) * * int sendfile(int fd, int s, off_t offset, size_t nbytes, * struct sf_hdtr *hdtr, off_t *sbytes, int flags) * * Send a file specified by 'fd' and starting at 'offset' to a socket * specified by 's'. Send only 'nbytes' of the file or until EOF if nbytes == * 0. Optionally add a header and/or trailer to the socket output. If * specified, write the total number of bytes sent into *sbytes. */ int sys_sendfile(struct thread *td, struct sendfile_args *uap) { return (sendfile(td, uap, 0)); } #ifdef COMPAT_FREEBSD4 int freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap) { struct sendfile_args args; args.fd = uap->fd; args.s = uap->s; args.offset = uap->offset; args.nbytes = uap->nbytes; args.hdtr = uap->hdtr; args.sbytes = uap->sbytes; args.flags = uap->flags; return (sendfile(td, &args, 1)); } #endif /* COMPAT_FREEBSD4 */