Index: head/sys/compat/freebsd32/freebsd32_misc.c =================================================================== --- head/sys/compat/freebsd32/freebsd32_misc.c (revision 253526) +++ head/sys/compat/freebsd32/freebsd32_misc.c (revision 253527) @@ -1,2914 +1,2940 @@ /*- * Copyright (c) 2002 Doug Rabson * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_inet.h" #include "opt_inet6.h" #define __ELF_WORD_SIZE 32 #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/malloc.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include /* Must come after sys/selinfo.h */ #include /* Must come after sys/selinfo.h */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include FEATURE(compat_freebsd_32bit, "Compatible with 32-bit FreeBSD"); #ifndef __mips__ CTASSERT(sizeof(struct timeval32) == 8); CTASSERT(sizeof(struct timespec32) == 8); CTASSERT(sizeof(struct itimerval32) == 16); #endif CTASSERT(sizeof(struct statfs32) == 256); #ifndef __mips__ CTASSERT(sizeof(struct rusage32) == 72); #endif CTASSERT(sizeof(struct sigaltstack32) == 12); CTASSERT(sizeof(struct kevent32) == 20); CTASSERT(sizeof(struct iovec32) == 8); CTASSERT(sizeof(struct msghdr32) == 28); #ifndef __mips__ CTASSERT(sizeof(struct stat32) == 96); #endif CTASSERT(sizeof(struct sigaction32) == 24); static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count); static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count); #if BYTE_ORDER == BIG_ENDIAN #define PAIR32TO64(type, name) ((name ## 2) | ((type)(name ## 1) << 32)) #define RETVAL_HI 0 #define RETVAL_LO 1 #else #define PAIR32TO64(type, name) ((name ## 1) | ((type)(name ## 2) << 32)) #define RETVAL_HI 1 #define RETVAL_LO 0 #endif void freebsd32_rusage_out(const struct rusage *s, struct rusage32 *s32) { TV_CP(*s, *s32, ru_utime); TV_CP(*s, *s32, ru_stime); CP(*s, *s32, ru_maxrss); CP(*s, *s32, ru_ixrss); CP(*s, *s32, ru_idrss); CP(*s, *s32, ru_isrss); CP(*s, *s32, ru_minflt); CP(*s, *s32, ru_majflt); CP(*s, *s32, ru_nswap); CP(*s, *s32, ru_inblock); CP(*s, *s32, ru_oublock); CP(*s, *s32, ru_msgsnd); CP(*s, *s32, ru_msgrcv); CP(*s, *s32, ru_nsignals); CP(*s, *s32, ru_nvcsw); CP(*s, *s32, ru_nivcsw); } int freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap) { int error, status; struct rusage32 ru32; struct rusage ru, *rup; if (uap->rusage != NULL) rup = &ru; else rup = NULL; error = kern_wait(td, uap->pid, &status, uap->options, rup); if (error) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->rusage != NULL && error == 0) { freebsd32_rusage_out(&ru, &ru32); error = copyout(&ru32, uap->rusage, sizeof(ru32)); } return (error); } int freebsd32_wait6(struct thread *td, struct freebsd32_wait6_args *uap) { struct wrusage32 wru32; struct __wrusage wru, *wrup; struct siginfo32 si32; struct __siginfo si, *sip; int error, status; if (uap->wrusage != NULL) wrup = &wru; else wrup = NULL; if (uap->info != NULL) { sip = &si; bzero(sip, sizeof(*sip)); } else sip = NULL; error = kern_wait6(td, uap->idtype, PAIR32TO64(id_t, uap->id), &status, uap->options, wrup, sip); if (error != 0) return (error); if (uap->status != NULL) error = copyout(&status, uap->status, sizeof(status)); if (uap->wrusage != NULL && error == 0) { freebsd32_rusage_out(&wru.wru_self, &wru32.wru_self); freebsd32_rusage_out(&wru.wru_children, &wru32.wru_children); error = copyout(&wru32, uap->wrusage, sizeof(wru32)); } if (uap->info != NULL && error == 0) { siginfo_to_siginfo32 (&si, &si32); error = copyout(&si32, uap->info, sizeof(si32)); } return (error); } #ifdef COMPAT_FREEBSD4 static void copy_statfs(struct statfs *in, struct statfs32 *out) { statfs_scale_blocks(in, INT32_MAX); bzero(out, sizeof(*out)); CP(*in, *out, f_bsize); out->f_iosize = MIN(in->f_iosize, INT32_MAX); CP(*in, *out, f_blocks); CP(*in, *out, f_bfree); CP(*in, *out, f_bavail); out->f_files = MIN(in->f_files, INT32_MAX); out->f_ffree = MIN(in->f_ffree, INT32_MAX); CP(*in, *out, f_fsid); CP(*in, *out, f_owner); CP(*in, *out, f_type); CP(*in, *out, f_flags); out->f_syncwrites = MIN(in->f_syncwrites, INT32_MAX); out->f_asyncwrites = MIN(in->f_asyncwrites, INT32_MAX); strlcpy(out->f_fstypename, in->f_fstypename, MFSNAMELEN); strlcpy(out->f_mntonname, in->f_mntonname, min(MNAMELEN, FREEBSD4_MNAMELEN)); out->f_syncreads = MIN(in->f_syncreads, INT32_MAX); out->f_asyncreads = MIN(in->f_asyncreads, INT32_MAX); strlcpy(out->f_mntfromname, in->f_mntfromname, min(MNAMELEN, FREEBSD4_MNAMELEN)); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_getfsstat(struct thread *td, struct freebsd4_freebsd32_getfsstat_args *uap) { struct statfs *buf, *sp; struct statfs32 stat32; size_t count, size; int error; count = uap->bufsize / sizeof(struct statfs32); size = count * sizeof(struct statfs); error = kern_getfsstat(td, &buf, size, UIO_SYSSPACE, uap->flags); if (size > 0) { count = td->td_retval[0]; sp = buf; while (count > 0 && error == 0) { copy_statfs(sp, &stat32); error = copyout(&stat32, uap->buf, sizeof(stat32)); sp++; uap->buf++; count--; } free(buf, M_TEMP); } return (error); } #endif int freebsd32_sigaltstack(struct thread *td, struct freebsd32_sigaltstack_args *uap) { struct sigaltstack32 s32; struct sigaltstack ss, oss, *ssp; int error; if (uap->ss != NULL) { error = copyin(uap->ss, &s32, sizeof(s32)); if (error) return (error); PTRIN_CP(s32, ss, ss_sp); CP(s32, ss, ss_size); CP(s32, ss, ss_flags); ssp = &ss; } else ssp = NULL; error = kern_sigaltstack(td, ssp, &oss); if (error == 0 && uap->oss != NULL) { PTROUT_CP(oss, s32, ss_sp); CP(oss, s32, ss_size); CP(oss, s32, ss_flags); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } /* * Custom version of exec_copyin_args() so that we can translate * the pointers. */ int freebsd32_exec_copyin_args(struct image_args *args, char *fname, enum uio_seg segflg, u_int32_t *argv, u_int32_t *envv) { char *argp, *envp; u_int32_t *p32, arg; size_t length; int error; bzero(args, sizeof(*args)); if (argv == NULL) return (EFAULT); /* * Allocate demand-paged memory for the file name, argument, and * environment strings. */ error = exec_alloc_args(args); if (error != 0) return (error); /* * Copy the file name. */ if (fname != NULL) { args->fname = args->buf; error = (segflg == UIO_SYSSPACE) ? copystr(fname, args->fname, PATH_MAX, &length) : copyinstr(fname, args->fname, PATH_MAX, &length); if (error != 0) goto err_exit; } else length = 0; args->begin_argv = args->buf + length; args->endp = args->begin_argv; args->stringspace = ARG_MAX; /* * extract arguments first */ p32 = argv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; argp = PTRIN(arg); error = copyinstr(argp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->argc++; } args->begin_envv = args->endp; /* * extract environment strings */ if (envv) { p32 = envv; for (;;) { error = copyin(p32++, &arg, sizeof(arg)); if (error) goto err_exit; if (arg == 0) break; envp = PTRIN(arg); error = copyinstr(envp, args->endp, args->stringspace, &length); if (error) { if (error == ENAMETOOLONG) error = E2BIG; goto err_exit; } args->stringspace -= length; args->endp += length; args->envc++; } } return (0); err_exit: exec_free_args(args); return (error); } int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; int error; error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); return (error); } int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; int error; error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } return (error); } #ifdef __ia64__ static int freebsd32_mmap_partial(struct thread *td, vm_offset_t start, vm_offset_t end, int prot, int fd, off_t pos) { vm_map_t map; vm_map_entry_t entry; int rv; map = &td->td_proc->p_vmspace->vm_map; if (fd != -1) prot |= VM_PROT_WRITE; if (vm_map_lookup_entry(map, start, &entry)) { if ((entry->protection & prot) != prot) { rv = vm_map_protect(map, trunc_page(start), round_page(end), entry->protection | prot, FALSE); if (rv != KERN_SUCCESS) return (EINVAL); } } else { vm_offset_t addr = trunc_page(start); rv = vm_map_find(map, 0, 0, &addr, PAGE_SIZE, FALSE, prot, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) return (EINVAL); } if (fd != -1) { struct pread_args r; r.fd = fd; r.buf = (void *) start; r.nbyte = end - start; r.offset = pos; return (sys_pread(td, &r)); } else { while (start < end) { subyte((void *) start, 0); start++; } return (0); } } #endif int freebsd32_mprotect(struct thread *td, struct freebsd32_mprotect_args *uap) { struct mprotect_args ap; ap.addr = PTRIN(uap->addr); ap.len = uap->len; ap.prot = uap->prot; #if defined(__amd64__) || defined(__ia64__) if (i386_read_exec && (ap.prot & PROT_READ) != 0) ap.prot |= PROT_EXEC; #endif return (sys_mprotect(td, &ap)); } int freebsd32_mmap(struct thread *td, struct freebsd32_mmap_args *uap) { struct mmap_args ap; vm_offset_t addr = (vm_offset_t) uap->addr; vm_size_t len = uap->len; int prot = uap->prot; int flags = uap->flags; int fd = uap->fd; off_t pos = PAIR32TO64(off_t,uap->pos); #ifdef __ia64__ vm_size_t pageoff; int error; /* * Attempt to handle page size hassles. */ pageoff = (pos & PAGE_MASK); if (flags & MAP_FIXED) { vm_offset_t start, end; start = addr; end = addr + len; if (start != trunc_page(start)) { error = freebsd32_mmap_partial(td, start, round_page(start), prot, fd, pos); if (fd != -1) pos += round_page(start) - start; start = round_page(start); } if (end != round_page(end)) { vm_offset_t t = trunc_page(end); error = freebsd32_mmap_partial(td, t, end, prot, fd, pos + t - start); end = trunc_page(end); } if (end > start && fd != -1 && (pos & PAGE_MASK)) { /* * We can't map this region at all. The specified * address doesn't have the same alignment as the file * position. Fake the mapping by simply reading the * entire region into memory. First we need to make * sure the region exists. */ vm_map_t map; struct pread_args r; int rv; prot |= VM_PROT_WRITE; map = &td->td_proc->p_vmspace->vm_map; rv = vm_map_remove(map, start, end); if (rv != KERN_SUCCESS) return (EINVAL); rv = vm_map_find(map, 0, 0, &start, end - start, FALSE, prot, VM_PROT_ALL, 0); if (rv != KERN_SUCCESS) return (EINVAL); r.fd = fd; r.buf = (void *) start; r.nbyte = end - start; r.offset = pos; error = sys_pread(td, &r); if (error) return (error); td->td_retval[0] = addr; return (0); } if (end == start) { /* * After dealing with the ragged ends, there * might be none left. */ td->td_retval[0] = addr; return (0); } addr = start; len = end - start; } #endif #if defined(__amd64__) || defined(__ia64__) if (i386_read_exec && (prot & PROT_READ)) prot |= PROT_EXEC; #endif ap.addr = (void *) addr; ap.len = len; ap.prot = prot; ap.flags = flags; ap.fd = fd; ap.pos = pos; return (sys_mmap(td, &ap)); } #ifdef COMPAT_FREEBSD6 int freebsd6_freebsd32_mmap(struct thread *td, struct freebsd6_freebsd32_mmap_args *uap) { struct freebsd32_mmap_args ap; ap.addr = uap->addr; ap.len = uap->len; ap.prot = uap->prot; ap.flags = uap->flags; ap.fd = uap->fd; ap.pos1 = uap->pos1; ap.pos2 = uap->pos2; return (freebsd32_mmap(td, &ap)); } #endif int freebsd32_setitimer(struct thread *td, struct freebsd32_setitimer_args *uap) { struct itimerval itv, oitv, *itvp; struct itimerval32 i32; int error; if (uap->itv != NULL) { error = copyin(uap->itv, &i32, sizeof(i32)); if (error) return (error); TV_CP(i32, itv, it_interval); TV_CP(i32, itv, it_value); itvp = &itv; } else itvp = NULL; error = kern_setitimer(td, uap->which, itvp, &oitv); if (error || uap->oitv == NULL) return (error); TV_CP(oitv, i32, it_interval); TV_CP(oitv, i32, it_value); return (copyout(&i32, uap->oitv, sizeof(i32))); } int freebsd32_getitimer(struct thread *td, struct freebsd32_getitimer_args *uap) { struct itimerval itv; struct itimerval32 i32; int error; error = kern_getitimer(td, uap->which, &itv); if (error || uap->itv == NULL) return (error); TV_CP(itv, i32, it_interval); TV_CP(itv, i32, it_value); return (copyout(&i32, uap->itv, sizeof(i32))); } int freebsd32_select(struct thread *td, struct freebsd32_select_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; int error; if (uap->tv != NULL) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; /* * XXX Do pointers need PTRIN()? */ return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, sizeof(int32_t) * 8)); } int freebsd32_pselect(struct thread *td, struct freebsd32_pselect_args *uap) { struct timespec32 ts32; struct timespec ts; struct timeval tv, *tvp; sigset_t set, *uset; int error; if (uap->ts != NULL) { error = copyin(uap->ts, &ts32, sizeof(ts32)); if (error != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); TIMESPEC_TO_TIMEVAL(&tv, &ts); tvp = &tv; } else tvp = NULL; if (uap->sm != NULL) { error = copyin(uap->sm, &set, sizeof(set)); if (error != 0) return (error); uset = &set; } else uset = NULL; /* * XXX Do pointers need PTRIN()? */ error = kern_pselect(td, uap->nd, uap->in, uap->ou, uap->ex, tvp, uset, sizeof(int32_t) * 8); return (error); } /* * Copy 'count' items into the destination list pointed to by uap->eventlist. */ static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; for (i = 0; i < count; i++) { CP(kevp[i], ks32[i], ident); CP(kevp[i], ks32[i], filter); CP(kevp[i], ks32[i], flags); CP(kevp[i], ks32[i], fflags); CP(kevp[i], ks32[i], data); PTROUT_CP(kevp[i], ks32[i], udata); } error = copyout(ks32, uap->eventlist, count * sizeof *ks32); if (error == 0) uap->eventlist += count; return (error); } /* * Copy 'count' items from the list pointed to by uap->changelist. */ static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count) { struct freebsd32_kevent_args *uap; struct kevent32 ks32[KQ_NEVENTS]; int i, error = 0; KASSERT(count <= KQ_NEVENTS, ("count (%d) > KQ_NEVENTS", count)); uap = (struct freebsd32_kevent_args *)arg; error = copyin(uap->changelist, ks32, count * sizeof *ks32); if (error) goto done; uap->changelist += count; for (i = 0; i < count; i++) { CP(ks32[i], kevp[i], ident); CP(ks32[i], kevp[i], filter); CP(ks32[i], kevp[i], flags); CP(ks32[i], kevp[i], fflags); CP(ks32[i], kevp[i], data); PTRIN_CP(ks32[i], kevp[i], udata); } done: return (error); } int freebsd32_kevent(struct thread *td, struct freebsd32_kevent_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, freebsd32_kevent_copyout, freebsd32_kevent_copyin}; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; error = kern_kevent(td, uap->fd, uap->nchanges, uap->nevents, &k_ops, tsp); return (error); } int freebsd32_gettimeofday(struct thread *td, struct freebsd32_gettimeofday_args *uap) { struct timeval atv; struct timeval32 atv32; struct timezone rtz; int error = 0; if (uap->tp) { microtime(&atv); CP(atv, atv32, tv_sec); CP(atv, atv32, tv_usec); error = copyout(&atv32, uap->tp, sizeof (atv32)); } if (error == 0 && uap->tzp != NULL) { rtz.tz_minuteswest = tz_minuteswest; rtz.tz_dsttime = tz_dsttime; error = copyout(&rtz, uap->tzp, sizeof (rtz)); } return (error); } int freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap) { struct rusage32 s32; struct rusage s; int error; error = kern_getrusage(td, uap->who, &s); if (error) return (error); if (uap->rusage != NULL) { freebsd32_rusage_out(&s, &s32); error = copyout(&s32, uap->rusage, sizeof(s32)); } return (error); } static int freebsd32_copyinuio(struct iovec32 *iovp, u_int iovcnt, struct uio **uiop) { struct iovec32 iov32; struct iovec *iov; struct uio *uio; u_int iovlen; int error, i; *uiop = NULL; if (iovcnt > UIO_MAXIOV) return (EINVAL); iovlen = iovcnt * sizeof(struct iovec); uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); iov = (struct iovec *)(uio + 1); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp[i], &iov32, sizeof(struct iovec32)); if (error) { free(uio, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } uio->uio_iov = iov; uio->uio_iovcnt = iovcnt; uio->uio_segflg = UIO_USERSPACE; uio->uio_offset = -1; uio->uio_resid = 0; for (i = 0; i < iovcnt; i++) { if (iov->iov_len > INT_MAX - uio->uio_resid) { free(uio, M_IOV); return (EINVAL); } uio->uio_resid += iov->iov_len; iov++; } *uiop = uio; return (0); } int freebsd32_readv(struct thread *td, struct freebsd32_readv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_readv(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_writev(struct thread *td, struct freebsd32_writev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_writev(td, uap->fd, auio); free(auio, M_IOV); return (error); } int freebsd32_preadv(struct thread *td, struct freebsd32_preadv_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_preadv(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_pwritev(struct thread *td, struct freebsd32_pwritev_args *uap) { struct uio *auio; int error; error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_pwritev(td, uap->fd, auio, PAIR32TO64(off_t,uap->offset)); free(auio, M_IOV); return (error); } int freebsd32_copyiniov(struct iovec32 *iovp32, u_int iovcnt, struct iovec **iovp, int error) { struct iovec32 iov32; struct iovec *iov; u_int iovlen; int i; *iovp = NULL; if (iovcnt > UIO_MAXIOV) return (error); iovlen = iovcnt * sizeof(struct iovec); iov = malloc(iovlen, M_IOV, M_WAITOK); for (i = 0; i < iovcnt; i++) { error = copyin(&iovp32[i], &iov32, sizeof(struct iovec32)); if (error) { free(iov, M_IOV); return (error); } iov[i].iov_base = PTRIN(iov32.iov_base); iov[i].iov_len = iov32.iov_len; } *iovp = iov; return (0); } static int freebsd32_copyinmsghdr(struct msghdr32 *msg32, struct msghdr *msg) { struct msghdr32 m32; int error; error = copyin(msg32, &m32, sizeof(m32)); if (error) return (error); msg->msg_name = PTRIN(m32.msg_name); msg->msg_namelen = m32.msg_namelen; msg->msg_iov = PTRIN(m32.msg_iov); msg->msg_iovlen = m32.msg_iovlen; msg->msg_control = PTRIN(m32.msg_control); msg->msg_controllen = m32.msg_controllen; msg->msg_flags = m32.msg_flags; return (0); } static int freebsd32_copyoutmsghdr(struct msghdr *msg, struct msghdr32 *msg32) { struct msghdr32 m32; int error; m32.msg_name = PTROUT(msg->msg_name); m32.msg_namelen = msg->msg_namelen; m32.msg_iov = PTROUT(msg->msg_iov); m32.msg_iovlen = msg->msg_iovlen; m32.msg_control = PTROUT(msg->msg_control); m32.msg_controllen = msg->msg_controllen; m32.msg_flags = msg->msg_flags; error = copyout(&m32, msg32, sizeof(m32)); return (error); } #ifndef __mips__ #define FREEBSD32_ALIGNBYTES (sizeof(int) - 1) #else #define FREEBSD32_ALIGNBYTES (sizeof(long) - 1) #endif #define FREEBSD32_ALIGN(p) \ (((u_long)(p) + FREEBSD32_ALIGNBYTES) & ~FREEBSD32_ALIGNBYTES) #define FREEBSD32_CMSG_SPACE(l) \ (FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + FREEBSD32_ALIGN(l)) #define FREEBSD32_CMSG_DATA(cmsg) ((unsigned char *)(cmsg) + \ FREEBSD32_ALIGN(sizeof(struct cmsghdr))) static int freebsd32_copy_msg_out(struct msghdr *msg, struct mbuf *control) { struct cmsghdr *cm; void *data; socklen_t clen, datalen; int error; caddr_t ctlbuf; int len, maxlen, copylen; struct mbuf *m; error = 0; len = msg->msg_controllen; maxlen = msg->msg_controllen; msg->msg_controllen = 0; m = control; ctlbuf = msg->msg_control; while (m && len > 0) { cm = mtod(m, struct cmsghdr *); clen = m->m_len; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; /* Adjust message length */ cm->cmsg_len = FREEBSD32_ALIGN(sizeof(struct cmsghdr)) + datalen; /* Copy cmsghdr */ copylen = sizeof(struct cmsghdr); if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(cm,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (len <= 0) break; /* Copy data */ copylen = datalen; if (len < copylen) { msg->msg_flags |= MSG_CTRUNC; copylen = len; } error = copyout(data,ctlbuf,copylen); if (error) goto exit; ctlbuf += FREEBSD32_ALIGN(copylen); len -= FREEBSD32_ALIGN(copylen); if (CMSG_SPACE(datalen) < clen) { clen -= CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m = m->m_next; } msg->msg_controllen = (len <= 0) ? maxlen : ctlbuf - (caddr_t)msg->msg_control; exit: return (error); } int freebsd32_recvmsg(td, uap) struct thread *td; struct freebsd32_recvmsg_args /* { int s; struct msghdr32 *msg; int flags; } */ *uap; { struct msghdr msg; struct msghdr32 m32; struct iovec *uiov, *iov; struct mbuf *control = NULL; struct mbuf **controlp; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_flags = uap->flags; uiov = msg.msg_iov; msg.msg_iov = iov; controlp = (msg.msg_control != NULL) ? &control : NULL; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, controlp); if (error == 0) { msg.msg_iov = uiov; if (control != NULL) error = freebsd32_copy_msg_out(&msg, control); else msg.msg_controllen = 0; if (error == 0) error = freebsd32_copyoutmsghdr(&msg, uap->msg); } free(iov, M_IOV); if (control != NULL) m_freem(control); return (error); } static int freebsd32_convert_msg_in(struct mbuf **controlp) { struct mbuf *control = *controlp; struct cmsghdr *cm = mtod(control, struct cmsghdr *); void *data; socklen_t clen = control->m_len, datalen; int error; error = 0; *controlp = NULL; while (cm != NULL) { if (sizeof(struct cmsghdr) > clen || cm->cmsg_len > clen) { error = EINVAL; break; } data = FREEBSD32_CMSG_DATA(cm); datalen = (caddr_t)cm + cm->cmsg_len - (caddr_t)data; *controlp = sbcreatecontrol(data, datalen, cm->cmsg_type, cm->cmsg_level); controlp = &(*controlp)->m_next; if (FREEBSD32_CMSG_SPACE(datalen) < clen) { clen -= FREEBSD32_CMSG_SPACE(datalen); cm = (struct cmsghdr *) ((caddr_t)cm + FREEBSD32_CMSG_SPACE(datalen)); } else { clen = 0; cm = NULL; } } m_freem(control); return (error); } int freebsd32_sendmsg(struct thread *td, struct freebsd32_sendmsg_args *uap) { struct msghdr msg; struct msghdr32 m32; struct iovec *iov; struct mbuf *control = NULL; struct sockaddr *to = NULL; int error; error = copyin(uap->msg, &m32, sizeof(m32)); if (error) return (error); error = freebsd32_copyinmsghdr(uap->msg, &msg); if (error) return (error); error = freebsd32_copyiniov(PTRIN(m32.msg_iov), m32.msg_iovlen, &iov, EMSGSIZE); if (error) return (error); msg.msg_iov = iov; if (msg.msg_name != NULL) { error = getsockaddr(&to, msg.msg_name, msg.msg_namelen); if (error) { to = NULL; goto out; } msg.msg_name = to; } if (msg.msg_control) { if (msg.msg_controllen < sizeof(struct cmsghdr)) { error = EINVAL; goto out; } error = sockargs(&control, msg.msg_control, msg.msg_controllen, MT_CONTROL); if (error) goto out; error = freebsd32_convert_msg_in(&control); if (error) goto out; } error = kern_sendit(td, uap->s, &msg, uap->flags, control, UIO_USERSPACE); out: free(iov, M_IOV); if (to) free(to, M_SONAME); return (error); } int freebsd32_recvfrom(struct thread *td, struct freebsd32_recvfrom_args *uap) { struct msghdr msg; struct iovec aiov; int error; if (uap->fromlenaddr) { error = copyin(PTRIN(uap->fromlenaddr), &msg.msg_namelen, sizeof(msg.msg_namelen)); if (error) return (error); } else { msg.msg_namelen = 0; } msg.msg_name = PTRIN(uap->from); msg.msg_iov = &aiov; msg.msg_iovlen = 1; aiov.iov_base = PTRIN(uap->buf); aiov.iov_len = uap->len; msg.msg_control = NULL; msg.msg_flags = uap->flags; error = kern_recvit(td, uap->s, &msg, UIO_USERSPACE, NULL); if (error == 0 && uap->fromlenaddr) error = copyout(&msg.msg_namelen, PTRIN(uap->fromlenaddr), sizeof (msg.msg_namelen)); return (error); } int freebsd32_settimeofday(struct thread *td, struct freebsd32_settimeofday_args *uap) { struct timeval32 tv32; struct timeval tv, *tvp; struct timezone tz, *tzp; int error; if (uap->tv) { error = copyin(uap->tv, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, tv, tv_sec); CP(tv32, tv, tv_usec); tvp = &tv; } else tvp = NULL; if (uap->tzp) { error = copyin(uap->tzp, &tz, sizeof(tz)); if (error) return (error); tzp = &tz; } else tzp = NULL; return (kern_settimeofday(td, tvp, tzp)); } int freebsd32_utimes(struct thread *td, struct freebsd32_utimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_lutimes(struct thread *td, struct freebsd32_lutimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_lutimes(td, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_futimes(struct thread *td, struct freebsd32_futimes_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->tptr != NULL) { error = copyin(uap->tptr, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_futimes(td, uap->fd, sp, UIO_SYSSPACE)); } int freebsd32_futimesat(struct thread *td, struct freebsd32_futimesat_args *uap) { struct timeval32 s32[2]; struct timeval s[2], *sp; int error; if (uap->times != NULL) { error = copyin(uap->times, s32, sizeof(s32)); if (error) return (error); CP(s32[0], s[0], tv_sec); CP(s32[0], s[0], tv_usec); CP(s32[1], s[1], tv_sec); CP(s32[1], s[1], tv_usec); sp = s; } else sp = NULL; return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, sp, UIO_SYSSPACE)); } int freebsd32_adjtime(struct thread *td, struct freebsd32_adjtime_args *uap) { struct timeval32 tv32; struct timeval delta, olddelta, *deltap; int error; if (uap->delta) { error = copyin(uap->delta, &tv32, sizeof(tv32)); if (error) return (error); CP(tv32, delta, tv_sec); CP(tv32, delta, tv_usec); deltap = δ } else deltap = NULL; error = kern_adjtime(td, deltap, &olddelta); if (uap->olddelta && error == 0) { CP(olddelta, tv32, tv_sec); CP(olddelta, tv32, tv_usec); error = copyout(&tv32, uap->olddelta, sizeof(tv32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_statfs(struct thread *td, struct freebsd4_freebsd32_statfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_statfs(td, uap->path, UIO_USERSPACE, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fstatfs(struct thread *td, struct freebsd4_freebsd32_fstatfs_args *uap) { struct statfs32 s32; struct statfs s; int error; error = kern_fstatfs(td, uap->fd, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_fhstatfs(struct thread *td, struct freebsd4_freebsd32_fhstatfs_args *uap) { struct statfs32 s32; struct statfs s; fhandle_t fh; int error; if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) return (error); error = kern_fhstatfs(td, fh, &s); if (error) return (error); copy_statfs(&s, &s32); return (copyout(&s32, uap->buf, sizeof(s32))); } #endif int freebsd32_pread(struct thread *td, struct freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pread(td, &ap)); } int freebsd32_pwrite(struct thread *td, struct freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pwrite(td, &ap)); } #ifdef COMPAT_43 int ofreebsd32_lseek(struct thread *td, struct ofreebsd32_lseek_args *uap) { struct lseek_args nuap; nuap.fd = uap->fd; nuap.offset = uap->offset; nuap.whence = uap->whence; return (sys_lseek(td, &nuap)); } #endif int freebsd32_lseek(struct thread *td, struct freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = sys_lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd32_truncate(struct thread *td, struct freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (sys_truncate(td, &ap)); } int freebsd32_ftruncate(struct thread *td, struct freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (sys_ftruncate(td, &ap)); } #ifdef COMPAT_43 int ofreebsd32_getdirentries(struct thread *td, struct ofreebsd32_getdirentries_args *uap) { struct ogetdirentries_args ap; int error; long loff; int32_t loff_cut; ap.fd = uap->fd; ap.buf = uap->buf; ap.count = uap->count; ap.basep = NULL; error = kern_ogetdirentries(td, &ap, &loff); if (error == 0) { loff_cut = loff; error = copyout(&loff_cut, uap->basep, sizeof(int32_t)); } return (error); } #endif int freebsd32_getdirentries(struct thread *td, struct freebsd32_getdirentries_args *uap) { long base; int32_t base32; int error; error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, NULL, UIO_USERSPACE); if (error) return (error); if (uap->basep != NULL) { base32 = base; error = copyout(&base32, uap->basep, sizeof(int32_t)); } return (error); } #ifdef COMPAT_FREEBSD6 /* versions with the 'int pad' argument */ int freebsd6_freebsd32_pread(struct thread *td, struct freebsd6_freebsd32_pread_args *uap) { struct pread_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pread(td, &ap)); } int freebsd6_freebsd32_pwrite(struct thread *td, struct freebsd6_freebsd32_pwrite_args *uap) { struct pwrite_args ap; ap.fd = uap->fd; ap.buf = uap->buf; ap.nbyte = uap->nbyte; ap.offset = PAIR32TO64(off_t,uap->offset); return (sys_pwrite(td, &ap)); } int freebsd6_freebsd32_lseek(struct thread *td, struct freebsd6_freebsd32_lseek_args *uap) { int error; struct lseek_args ap; off_t pos; ap.fd = uap->fd; ap.offset = PAIR32TO64(off_t,uap->offset); ap.whence = uap->whence; error = sys_lseek(td, &ap); /* Expand the quad return into two parts for eax and edx */ pos = *(off_t *)(td->td_retval); td->td_retval[RETVAL_LO] = pos & 0xffffffff; /* %eax */ td->td_retval[RETVAL_HI] = pos >> 32; /* %edx */ return error; } int freebsd6_freebsd32_truncate(struct thread *td, struct freebsd6_freebsd32_truncate_args *uap) { struct truncate_args ap; ap.path = uap->path; ap.length = PAIR32TO64(off_t,uap->length); return (sys_truncate(td, &ap)); } int freebsd6_freebsd32_ftruncate(struct thread *td, struct freebsd6_freebsd32_ftruncate_args *uap) { struct ftruncate_args ap; ap.fd = uap->fd; ap.length = PAIR32TO64(off_t,uap->length); return (sys_ftruncate(td, &ap)); } #endif /* COMPAT_FREEBSD6 */ struct sf_hdtr32 { uint32_t headers; int hdr_cnt; uint32_t trailers; int trl_cnt; }; static int freebsd32_do_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap, int compat) { struct sendfile_args ap; struct sf_hdtr32 hdtr32; struct sf_hdtr hdtr; struct uio *hdr_uio, *trl_uio; struct iovec32 *iov32; int error; hdr_uio = trl_uio = NULL; ap.fd = uap->fd; ap.s = uap->s; ap.offset = PAIR32TO64(off_t,uap->offset); ap.nbytes = uap->nbytes; ap.hdtr = (struct sf_hdtr *)uap->hdtr; /* XXX not used */ ap.sbytes = uap->sbytes; ap.flags = uap->flags; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); if (error) goto out; PTRIN_CP(hdtr32, hdtr, headers); CP(hdtr32, hdtr, hdr_cnt); PTRIN_CP(hdtr32, hdtr, trailers); CP(hdtr32, hdtr, trl_cnt); if (hdtr.headers != NULL) { iov32 = PTRIN(hdtr32.headers); error = freebsd32_copyinuio(iov32, hdtr32.hdr_cnt, &hdr_uio); if (error) goto out; } if (hdtr.trailers != NULL) { iov32 = PTRIN(hdtr32.trailers); error = freebsd32_copyinuio(iov32, hdtr32.trl_cnt, &trl_uio); if (error) goto out; } } error = kern_sendfile(td, &ap, hdr_uio, trl_uio, compat); out: if (hdr_uio) free(hdr_uio, M_IOV); if (trl_uio) free(trl_uio, M_IOV); return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sendfile(struct thread *td, struct freebsd4_freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, (struct freebsd32_sendfile_args *)uap, 1)); } #endif int freebsd32_sendfile(struct thread *td, struct freebsd32_sendfile_args *uap) { return (freebsd32_do_sendfile(td, uap, 0)); } static void copy_stat(struct stat *in, struct stat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_size); CP(*in, *out, st_blocks); CP(*in, *out, st_blksize); CP(*in, *out, st_flags); CP(*in, *out, st_gen); TS_CP(*in, *out, st_birthtim); } #ifdef COMPAT_43 static void copy_ostat(struct stat *in, struct ostat32 *out) { CP(*in, *out, st_dev); CP(*in, *out, st_ino); CP(*in, *out, st_mode); CP(*in, *out, st_nlink); CP(*in, *out, st_uid); CP(*in, *out, st_gid); CP(*in, *out, st_rdev); CP(*in, *out, st_size); TS_CP(*in, *out, st_atim); TS_CP(*in, *out, st_mtim); TS_CP(*in, *out, st_ctim); CP(*in, *out, st_blksize); CP(*in, *out, st_blocks); CP(*in, *out, st_flags); CP(*in, *out, st_gen); } #endif int freebsd32_stat(struct thread *td, struct freebsd32_stat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_stat(struct thread *td, struct ofreebsd32_stat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_stat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_fstat(struct thread *td, struct freebsd32_fstat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_fstat(struct thread *td, struct ofreebsd32_fstat_args *uap) { struct stat ub; struct ostat32 ub32; int error; error = kern_fstat(td, uap->fd, &ub); if (error) return (error); copy_ostat(&ub, &ub32); error = copyout(&ub32, uap->ub, sizeof(ub32)); return (error); } #endif int freebsd32_fstatat(struct thread *td, struct freebsd32_fstatat_args *uap) { struct stat ub; struct stat32 ub32; int error; error = kern_statat(td, uap->flag, uap->fd, uap->path, UIO_USERSPACE, &ub); if (error) return (error); copy_stat(&ub, &ub32); error = copyout(&ub32, uap->buf, sizeof(ub32)); return (error); } int freebsd32_lstat(struct thread *td, struct freebsd32_lstat_args *uap) { struct stat sb; struct stat32 sb32; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_stat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #ifdef COMPAT_43 int ofreebsd32_lstat(struct thread *td, struct ofreebsd32_lstat_args *uap) { struct stat sb; struct ostat32 sb32; int error; error = kern_lstat(td, uap->path, UIO_USERSPACE, &sb); if (error) return (error); copy_ostat(&sb, &sb32); error = copyout(&sb32, uap->ub, sizeof (sb32)); return (error); } #endif int freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap) { int error, name[CTL_MAXNAME]; size_t j, oldlen; if (uap->namelen > CTL_MAXNAME || uap->namelen < 2) return (EINVAL); error = copyin(uap->name, name, uap->namelen * sizeof(int)); if (error) return (error); if (uap->oldlenp) oldlen = fuword32(uap->oldlenp); else oldlen = 0; error = userland_sysctl(td, name, uap->namelen, uap->old, &oldlen, 1, uap->new, uap->newlen, &j, SCTL_MASK32); if (error && error != ENOMEM) return (error); if (uap->oldlenp) suword32(uap->oldlenp, j); return (0); } int freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap) { uint32_t version; int error; struct jail j; error = copyin(uap->jail, &version, sizeof(uint32_t)); if (error) return (error); switch (version) { case 0: { /* FreeBSD single IPv4 jails. */ struct jail32_v0 j32_v0; bzero(&j, sizeof(struct jail)); error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0)); if (error) return (error); CP(j32_v0, j, version); PTRIN_CP(j32_v0, j, path); PTRIN_CP(j32_v0, j, hostname); j.ip4s = j32_v0.ip_number; break; } case 1: /* * Version 1 was used by multi-IPv4 jail implementations * that never made it into the official kernel. */ return (EINVAL); case 2: /* JAIL_API_VERSION */ { /* FreeBSD multi-IPv4/IPv6,noIP jails. */ struct jail32 j32; error = copyin(uap->jail, &j32, sizeof(struct jail32)); if (error) return (error); CP(j32, j, version); PTRIN_CP(j32, j, path); PTRIN_CP(j32, j, hostname); PTRIN_CP(j32, j, jailname); CP(j32, j, ip4s); CP(j32, j, ip6s); PTRIN_CP(j32, j, ip4); PTRIN_CP(j32, j, ip6); break; } default: /* Sci-Fi jails are not supported, sorry. */ return (EINVAL); } return (kern_jail(td, &j)); } int freebsd32_jail_set(struct thread *td, struct freebsd32_jail_set_args *uap) { struct uio *auio; int error; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_set(td, auio, uap->flags); free(auio, M_IOV); return (error); } int freebsd32_jail_get(struct thread *td, struct freebsd32_jail_get_args *uap) { struct iovec32 iov32; struct uio *auio; int error, i; /* Check that we have an even number of iovecs. */ if (uap->iovcnt & 1) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = kern_jail_get(td, auio, uap->flags); if (error == 0) for (i = 0; i < uap->iovcnt; i++) { PTROUT_CP(auio->uio_iov[i], iov32, iov_base); CP(auio->uio_iov[i], iov32, iov_len); error = copyout(&iov32, uap->iovp + i, sizeof(iov32)); if (error != 0) break; } free(auio, M_IOV); return (error); } int freebsd32_sigaction(struct thread *td, struct freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, 0); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #ifdef COMPAT_FREEBSD4 int freebsd4_freebsd32_sigaction(struct thread *td, struct freebsd4_freebsd32_sigaction_args *uap) { struct sigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->act) { error = copyin(uap->act, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); CP(s32, sa, sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->sig, sap, &osa, KSA_FREEBSD4); if (error == 0 && uap->oact != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); CP(osa, s32, sa_mask); error = copyout(&s32, uap->oact, sizeof(s32)); } return (error); } #endif #ifdef COMPAT_43 struct osigaction32 { u_int32_t sa_u; osigset_t sa_mask; int sa_flags; }; #define ONSIG 32 int ofreebsd32_sigaction(struct thread *td, struct ofreebsd32_sigaction_args *uap) { struct osigaction32 s32; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsa) { error = copyin(uap->nsa, &s32, sizeof(s32)); if (error) return (error); sa.sa_handler = PTRIN(s32.sa_u); CP(s32, sa, sa_flags); OSIG2SIG(s32.sa_mask, sa.sa_mask); sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osa != NULL) { s32.sa_u = PTROUT(osa.sa_handler); CP(osa, s32, sa_flags); SIG2OSIG(osa.sa_mask, s32.sa_mask); error = copyout(&s32, uap->osa, sizeof(s32)); } return (error); } int ofreebsd32_sigprocmask(struct thread *td, struct ofreebsd32_sigprocmask_args *uap) { sigset_t set, oset; int error; OSIG2SIG(uap->mask, set); error = kern_sigprocmask(td, uap->how, &set, &oset, SIGPROCMASK_OLD); SIG2OSIG(oset, td->td_retval[0]); return (error); } int ofreebsd32_sigpending(struct thread *td, struct ofreebsd32_sigpending_args *uap) { struct proc *p = td->td_proc; sigset_t siglist; PROC_LOCK(p); siglist = p->p_siglist; SIGSETOR(siglist, td->td_siglist); PROC_UNLOCK(p); SIG2OSIG(siglist, td->td_retval[0]); return (0); } struct sigvec32 { u_int32_t sv_handler; int sv_mask; int sv_flags; }; int ofreebsd32_sigvec(struct thread *td, struct ofreebsd32_sigvec_args *uap) { struct sigvec32 vec; struct sigaction sa, osa, *sap; int error; if (uap->signum <= 0 || uap->signum >= ONSIG) return (EINVAL); if (uap->nsv) { error = copyin(uap->nsv, &vec, sizeof(vec)); if (error) return (error); sa.sa_handler = PTRIN(vec.sv_handler); OSIG2SIG(vec.sv_mask, sa.sa_mask); sa.sa_flags = vec.sv_flags; sa.sa_flags ^= SA_RESTART; sap = &sa; } else sap = NULL; error = kern_sigaction(td, uap->signum, sap, &osa, KSA_OSIGSET); if (error == 0 && uap->osv != NULL) { vec.sv_handler = PTROUT(osa.sa_handler); SIG2OSIG(osa.sa_mask, vec.sv_mask); vec.sv_flags = osa.sa_flags; vec.sv_flags &= ~SA_NOCLDWAIT; vec.sv_flags ^= SA_RESTART; error = copyout(&vec, uap->osv, sizeof(vec)); } return (error); } int ofreebsd32_sigblock(struct thread *td, struct ofreebsd32_sigblock_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_BLOCK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsetmask(struct thread *td, struct ofreebsd32_sigsetmask_args *uap) { sigset_t set, oset; OSIG2SIG(uap->mask, set); kern_sigprocmask(td, SIG_SETMASK, &set, &oset, 0); SIG2OSIG(oset, td->td_retval[0]); return (0); } int ofreebsd32_sigsuspend(struct thread *td, struct ofreebsd32_sigsuspend_args *uap) { sigset_t mask; OSIG2SIG(uap->mask, mask); return (kern_sigsuspend(td, mask)); } struct sigstack32 { u_int32_t ss_sp; int ss_onstack; }; int ofreebsd32_sigstack(struct thread *td, struct ofreebsd32_sigstack_args *uap) { struct sigstack32 s32; struct sigstack nss, oss; int error = 0, unss; if (uap->nss != NULL) { error = copyin(uap->nss, &s32, sizeof(s32)); if (error) return (error); nss.ss_sp = PTRIN(s32.ss_sp); CP(s32, nss, ss_onstack); unss = 1; } else { unss = 0; } oss.ss_sp = td->td_sigstk.ss_sp; oss.ss_onstack = sigonstack(cpu_getstack(td)); if (unss) { td->td_sigstk.ss_sp = nss.ss_sp; td->td_sigstk.ss_size = 0; td->td_sigstk.ss_flags |= (nss.ss_onstack & SS_ONSTACK); td->td_pflags |= TDP_ALTSTACK; } if (uap->oss != NULL) { s32.ss_sp = PTROUT(oss.ss_sp); CP(oss, s32, ss_onstack); error = copyout(&s32, uap->oss, sizeof(s32)); } return (error); } #endif int freebsd32_nanosleep(struct thread *td, struct freebsd32_nanosleep_args *uap) { struct timespec32 rmt32, rqt32; struct timespec rmt, rqt; int error; error = copyin(uap->rqtp, &rqt32, sizeof(rqt32)); if (error) return (error); CP(rqt32, rqt, tv_sec); CP(rqt32, rqt, tv_nsec); if (uap->rmtp && !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) return (EFAULT); error = kern_nanosleep(td, &rqt, &rmt); if (error && uap->rmtp) { int error2; CP(rmt, rmt32, tv_sec); CP(rmt, rmt32, tv_nsec); error2 = copyout(&rmt32, uap->rmtp, sizeof(rmt32)); if (error2) error = error2; } return (error); } int freebsd32_clock_gettime(struct thread *td, struct freebsd32_clock_gettime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = kern_clock_gettime(td, uap->clock_id, &ats); if (error == 0) { CP(ats, ats32, tv_sec); CP(ats, ats32, tv_nsec); error = copyout(&ats32, uap->tp, sizeof(ats32)); } return (error); } int freebsd32_clock_settime(struct thread *td, struct freebsd32_clock_settime_args *uap) { struct timespec ats; struct timespec32 ats32; int error; error = copyin(uap->tp, &ats32, sizeof(ats32)); if (error) return (error); CP(ats32, ats, tv_sec); CP(ats32, ats, tv_nsec); return (kern_clock_settime(td, uap->clock_id, &ats)); } int freebsd32_clock_getres(struct thread *td, struct freebsd32_clock_getres_args *uap) { struct timespec ts; struct timespec32 ts32; int error; if (uap->tp == NULL) return (0); error = kern_clock_getres(td, uap->clock_id, &ts); if (error == 0) { CP(ts, ts32, tv_sec); CP(ts, ts32, tv_nsec); error = copyout(&ts32, uap->tp, sizeof(ts32)); } return (error); } int freebsd32_clock_getcpuclockid2(struct thread *td, struct freebsd32_clock_getcpuclockid2_args *uap) { clockid_t clk_id; int error; error = kern_clock_getcpuclockid2(td, PAIR32TO64(id_t, uap->id), uap->which, &clk_id); if (error == 0) error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); return (error); } int freebsd32_thr_new(struct thread *td, struct freebsd32_thr_new_args *uap) { struct thr_param32 param32; struct thr_param param; int error; if (uap->param_size < 0 || uap->param_size > sizeof(struct thr_param32)) return (EINVAL); bzero(¶m, sizeof(struct thr_param)); bzero(¶m32, sizeof(struct thr_param32)); error = copyin(uap->param, ¶m32, uap->param_size); if (error != 0) return (error); param.start_func = PTRIN(param32.start_func); param.arg = PTRIN(param32.arg); param.stack_base = PTRIN(param32.stack_base); param.stack_size = param32.stack_size; param.tls_base = PTRIN(param32.tls_base); param.tls_size = param32.tls_size; param.child_tid = PTRIN(param32.child_tid); param.parent_tid = PTRIN(param32.parent_tid); param.flags = param32.flags; param.rtp = PTRIN(param32.rtp); param.spare[0] = PTRIN(param32.spare[0]); param.spare[1] = PTRIN(param32.spare[1]); param.spare[2] = PTRIN(param32.spare[2]); return (kern_thr_new(td, ¶m)); } int freebsd32_thr_suspend(struct thread *td, struct freebsd32_thr_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; error = 0; tsp = NULL; if (uap->timeout != NULL) { error = copyin((const void *)uap->timeout, (void *)&ts32, sizeof(struct timespec32)); if (error != 0) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; tsp = &ts; } return (kern_thr_suspend(td, tsp)); } void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst) { bzero(dst, sizeof(*dst)); dst->si_signo = src->si_signo; dst->si_errno = src->si_errno; dst->si_code = src->si_code; dst->si_pid = src->si_pid; dst->si_uid = src->si_uid; dst->si_status = src->si_status; dst->si_addr = (uintptr_t)src->si_addr; dst->si_value.sival_int = src->si_value.sival_int; dst->si_timerid = src->si_timerid; dst->si_overrun = src->si_overrun; } int freebsd32_sigtimedwait(struct thread *td, struct freebsd32_sigtimedwait_args *uap) { struct timespec32 ts32; struct timespec ts; struct timespec *timeout; sigset_t set; ksiginfo_t ksi; struct siginfo32 si32; int error; if (uap->timeout) { error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); ts.tv_sec = ts32.tv_sec; ts.tv_nsec = ts32.tv_nsec; timeout = &ts; } else timeout = NULL; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, timeout); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } /* * MPSAFE */ int freebsd32_sigwaitinfo(struct thread *td, struct freebsd32_sigwaitinfo_args *uap) { ksiginfo_t ksi; struct siginfo32 si32; sigset_t set; int error; error = copyin(uap->set, &set, sizeof(set)); if (error) return (error); error = kern_sigtimedwait(td, set, &ksi, NULL); if (error) return (error); if (uap->info) { siginfo_to_siginfo32(&ksi.ksi_info, &si32); error = copyout(&si32, uap->info, sizeof(struct siginfo32)); } if (error == 0) td->td_retval[0] = ksi.ksi_signo; return (error); } int freebsd32_cpuset_setid(struct thread *td, struct freebsd32_cpuset_setid_args *uap) { struct cpuset_setid_args ap; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (sys_cpuset_setid(td, &ap)); } int freebsd32_cpuset_getid(struct thread *td, struct freebsd32_cpuset_getid_args *uap) { struct cpuset_getid_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.setid = uap->setid; return (sys_cpuset_getid(td, &ap)); } int freebsd32_cpuset_getaffinity(struct thread *td, struct freebsd32_cpuset_getaffinity_args *uap) { struct cpuset_getaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (sys_cpuset_getaffinity(td, &ap)); } int freebsd32_cpuset_setaffinity(struct thread *td, struct freebsd32_cpuset_setaffinity_args *uap) { struct cpuset_setaffinity_args ap; ap.level = uap->level; ap.which = uap->which; ap.id = PAIR32TO64(id_t,uap->id); ap.cpusetsize = uap->cpusetsize; ap.mask = uap->mask; return (sys_cpuset_setaffinity(td, &ap)); } int freebsd32_nmount(struct thread *td, struct freebsd32_nmount_args /* { struct iovec *iovp; unsigned int iovcnt; int flags; } */ *uap) { struct uio *auio; uint64_t flags; int error; /* * Mount flags are now 64-bits. On 32-bit archtectures only * 32-bits are passed in, but from here on everything handles * 64-bit flags correctly. */ flags = uap->flags; AUDIT_ARG_FFLAGS(flags); /* * Filter out MNT_ROOTFS. We do not want clients of nmount() in * userspace to set this flag, but we must filter it out if we want * MNT_UPDATE on the root file system to work. * MNT_ROOTFS should only be set by the kernel when mounting its * root file system. */ flags &= ~MNT_ROOTFS; /* * check that we have an even number of iovec's * and that we have at least two options. */ if ((uap->iovcnt & 1) || (uap->iovcnt < 4)) return (EINVAL); error = freebsd32_copyinuio(uap->iovp, uap->iovcnt, &auio); if (error) return (error); error = vfs_donmount(td, flags, auio); free(auio, M_IOV); return error; } #if 0 int freebsd32_xxx(struct thread *td, struct freebsd32_xxx_args *uap) { struct yyy32 *p32, s32; struct yyy *p = NULL, s; struct xxx_arg ap; int error; if (uap->zzz) { error = copyin(uap->zzz, &s32, sizeof(s32)); if (error) return (error); /* translate in */ p = &s; } error = kern_xxx(td, p); if (error) return (error); if (uap->zzz) { /* translate out */ error = copyout(&s32, p32, sizeof(s32)); } return (error); } #endif int syscall32_register(int *offset, struct sysent *new_sysent, struct sysent *old_sysent) { if (*offset == NO_SYSCALL) { int i; for (i = 1; i < SYS_MAXSYSCALL; ++i) if (freebsd32_sysent[i].sy_call == (sy_call_t *)lkmnosys) break; if (i == SYS_MAXSYSCALL) return (ENFILE); *offset = i; } else if (*offset < 0 || *offset >= SYS_MAXSYSCALL) return (EINVAL); else if (freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmnosys && freebsd32_sysent[*offset].sy_call != (sy_call_t *)lkmressys) return (EEXIST); *old_sysent = freebsd32_sysent[*offset]; freebsd32_sysent[*offset] = *new_sysent; return 0; } int syscall32_deregister(int *offset, struct sysent *old_sysent) { if (*offset) freebsd32_sysent[*offset] = *old_sysent; return 0; } int syscall32_module_handler(struct module *mod, int what, void *arg) { struct syscall_module_data *data = (struct syscall_module_data*)arg; modspecific_t ms; int error; switch (what) { case MOD_LOAD: error = syscall32_register(data->offset, data->new_sysent, &data->old_sysent); if (error) { /* Leave a mark so we know to safely unload below. */ data->offset = NULL; return error; } ms.intval = *data->offset; MOD_XLOCK; module_setspecific(mod, &ms); MOD_XUNLOCK; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); case MOD_UNLOAD: /* * MOD_LOAD failed, so just return without calling the * chained handler since we didn't pass along the MOD_LOAD * event. */ if (data->offset == NULL) return (0); if (data->chainevh) { error = data->chainevh(mod, what, data->chainarg); if (error) return (error); } error = syscall32_deregister(data->offset, &data->old_sysent); return (error); default: error = EOPNOTSUPP; if (data->chainevh) error = data->chainevh(mod, what, data->chainarg); return (error); } } int syscall32_helper_register(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; int error; for (sd1 = sd; sd1->syscall_no != NO_SYSCALL; sd1++) { error = syscall32_register(&sd1->syscall_no, &sd1->new_sysent, &sd1->old_sysent); if (error != 0) { syscall32_helper_unregister(sd); return (error); } sd1->registered = 1; } return (0); } int syscall32_helper_unregister(struct syscall_helper_data *sd) { struct syscall_helper_data *sd1; for (sd1 = sd; sd1->registered != 0; sd1++) { syscall32_deregister(&sd1->syscall_no, &sd1->old_sysent); sd1->registered = 0; } return (0); } register_t * freebsd32_copyout_strings(struct image_params *imgp) { int argc, envc, i; u_int32_t *vectp; char *stringp, *destp; u_int32_t *stack_base; struct freebsd32_ps_strings *arginfo; char canary[sizeof(long) * 8]; int32_t pagesizes32[MAXPAGESIZES]; size_t execpath_len; int szsigcode; /* * Calculate string base and vector table pointers. * Also deal with signal trampoline code for this exec type. */ if (imgp->execpath != NULL && imgp->auxargs != NULL) execpath_len = strlen(imgp->execpath) + 1; else execpath_len = 0; arginfo = (struct freebsd32_ps_strings *)curproc->p_sysent-> sv_psstrings; if (imgp->proc->p_sysent->sv_sigcode_base == 0) szsigcode = *(imgp->proc->p_sysent->sv_szsigcode); else szsigcode = 0; destp = (caddr_t)arginfo - szsigcode - SPARE_USRSPACE - roundup(execpath_len, sizeof(char *)) - roundup(sizeof(canary), sizeof(char *)) - roundup(sizeof(pagesizes32), sizeof(char *)) - roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); /* * install sigcode */ if (szsigcode != 0) copyout(imgp->proc->p_sysent->sv_sigcode, ((caddr_t)arginfo - szsigcode), szsigcode); /* * Copy the image path for the rtld. */ if (execpath_len != 0) { imgp->execpathp = (uintptr_t)arginfo - szsigcode - execpath_len; copyout(imgp->execpath, (void *)imgp->execpathp, execpath_len); } /* * Prepare the canary for SSP. */ arc4rand(canary, sizeof(canary), 0); imgp->canary = (uintptr_t)arginfo - szsigcode - execpath_len - sizeof(canary); copyout(canary, (void *)imgp->canary, sizeof(canary)); imgp->canarylen = sizeof(canary); /* * Prepare the pagesizes array. */ for (i = 0; i < MAXPAGESIZES; i++) pagesizes32[i] = (uint32_t)pagesizes[i]; imgp->pagesizes = (uintptr_t)arginfo - szsigcode - execpath_len - roundup(sizeof(canary), sizeof(char *)) - sizeof(pagesizes32); copyout(pagesizes32, (void *)imgp->pagesizes, sizeof(pagesizes32)); imgp->pagesizeslen = sizeof(pagesizes32); /* * If we have a valid auxargs ptr, prepare some room * on the stack. */ if (imgp->auxargs) { /* * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for * lower compatibility. */ imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : (AT_COUNT * 2); /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets,and imgp->auxarg_size is room * for argument of Runtime loader. */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + imgp->auxarg_size + execpath_len) * sizeof(u_int32_t)); } else /* * The '+ 2' is for the null pointers at the end of each of * the arg and env vector sets */ vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); /* * vectp also becomes our initial stack base */ stack_base = vectp; stringp = imgp->args->begin_argv; argc = imgp->args->argc; envc = imgp->args->envc; /* * Copy out strings - arguments and environment. */ copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); /* * Fill in "ps_strings" struct for ps, w, etc. */ suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nargvstr, argc); /* * Fill in argument portion of vector table. */ for (; argc > 0; --argc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* a null vector table pointer separates the argp's from the envp's */ suword32(vectp++, 0); suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); suword32(&arginfo->ps_nenvstr, envc); /* * Fill in environment portion of vector table. */ for (; envc > 0; --envc) { suword32(vectp++, (u_int32_t)(intptr_t)destp); while (*stringp++ != 0) destp++; destp++; } /* end of vector table is a null pointer */ suword32(vectp, 0); return ((register_t *)stack_base); } int freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) { struct kld_file_stat stat; struct kld32_file_stat stat32; int error, version; if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) return (error); if (version != sizeof(struct kld32_file_stat_1) && version != sizeof(struct kld32_file_stat)) return (EINVAL); error = kern_kldstat(td, uap->fileid, &stat); if (error != 0) return (error); bcopy(&stat.name[0], &stat32.name[0], sizeof(stat.name)); CP(stat, stat32, refs); CP(stat, stat32, id); PTROUT_CP(stat, stat32, address); CP(stat, stat32, size); bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } int freebsd32_posix_fallocate(struct thread *td, struct freebsd32_posix_fallocate_args *uap) { return (kern_posix_fallocate(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len))); } int freebsd32_posix_fadvise(struct thread *td, struct freebsd32_posix_fadvise_args *uap) { return (kern_posix_fadvise(td, uap->fd, PAIR32TO64(off_t, uap->offset), PAIR32TO64(off_t, uap->len), uap->advice)); } + +int +convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) +{ + + CP(*sig32, *sig, sigev_notify); + switch (sig->sigev_notify) { + case SIGEV_NONE: + break; + case SIGEV_THREAD_ID: + CP(*sig32, *sig, sigev_notify_thread_id); + /* FALLTHROUGH */ + case SIGEV_SIGNAL: + CP(*sig32, *sig, sigev_signo); + PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); + break; + case SIGEV_KEVENT: + CP(*sig32, *sig, sigev_notify_kqueue); + CP(*sig32, *sig, sigev_notify_kevent_flags); + PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); + break; + default: + return (EINVAL); + } + return (0); +} Index: head/sys/compat/freebsd32/freebsd32_signal.h =================================================================== --- head/sys/compat/freebsd32/freebsd32_signal.h (revision 253526) +++ head/sys/compat/freebsd32/freebsd32_signal.h (revision 253527) @@ -1,102 +1,104 @@ /*- * Copyright (c) 2006 David Xu * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COMPAT_FREEBSD32_SIGNAL_H_ #define _COMPAT_FREEBSD32_SIGNAL_H_ struct sigaltstack32 { u_int32_t ss_sp; /* signal stack base */ u_int32_t ss_size; /* signal stack length */ int ss_flags; /* SS_DISABLE and/or SS_ONSTACK */ }; union sigval32 { int sival_int; u_int32_t sival_ptr; /* 6.0 compatibility */ int sigval_int; u_int32_t sigval_ptr; }; struct siginfo32 { int si_signo; /* signal number */ int si_errno; /* errno association */ int si_code; /* signal code */ int32_t si_pid; /* sending process */ u_int32_t si_uid; /* sender's ruid */ int si_status; /* exit value */ u_int32_t si_addr; /* faulting instruction */ union sigval32 si_value; /* signal value */ union { struct { int _trapno;/* machine specific trap code */ } _fault; struct { int _timerid; int _overrun; } _timer; struct { int _mqd; } _mesgq; struct { int _band; /* band event for SIGPOLL */ } _poll; /* was this ever used ? */ struct { int __spare1__; int __spare2__[7]; } __spare__; } _reason; }; struct osigevent32 { int sigev_notify; /* Notification type */ union { int __sigev_signo; /* Signal number */ int __sigev_notify_kqueue; } __sigev_u; union sigval32 sigev_value; /* Signal value */ }; struct sigevent32 { int sigev_notify; /* Notification type */ int sigev_signo; /* Signal number */ union sigval32 sigev_value; /* Signal value */ union { __lwpid_t _threadid; struct { uint32_t _function; uint32_t _attribute; } _sigev_thread; unsigned short _kevent_flags; uint32_t __spare__[8]; } _sigev_un; }; +struct sigevent; +int convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig); void siginfo_to_siginfo32(const siginfo_t *src, struct siginfo32 *dst); #endif /* !_COMPAT_FREEBSD32_SIGNAL_H_ */ Index: head/sys/kern/vfs_aio.c =================================================================== --- head/sys/kern/vfs_aio.c (revision 253526) +++ head/sys/kern/vfs_aio.c (revision 253527) @@ -1,3093 +1,3068 @@ /*- * Copyright (c) 1997 John S. Dyson. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. John S. Dyson's name may not be used to endorse or promote products * derived from this software without specific prior written permission. * * DISCLAIMER: This code isn't warranted to do anything useful. Anything * bad that happens because of using this software isn't the responsibility * of the author. This software is distributed AS-IS. */ /* * This file contains support for the POSIX 1003.1B AIO/LIO facility. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "opt_vfs_aio.h" /* * Counter for allocating reference ids to new jobs. Wrapped to 1 on * overflow. (XXX will be removed soon.) */ static u_long jobrefid; /* * Counter for aio_fsync. */ static uint64_t jobseqno; #define JOBST_NULL 0 #define JOBST_JOBQSOCK 1 #define JOBST_JOBQGLOBAL 2 #define JOBST_JOBRUNNING 3 #define JOBST_JOBFINISHED 4 #define JOBST_JOBQBUF 5 #define JOBST_JOBQSYNC 6 #ifndef MAX_AIO_PER_PROC #define MAX_AIO_PER_PROC 32 #endif #ifndef MAX_AIO_QUEUE_PER_PROC #define MAX_AIO_QUEUE_PER_PROC 256 /* Bigger than AIO_LISTIO_MAX */ #endif #ifndef MAX_AIO_PROCS #define MAX_AIO_PROCS 32 #endif #ifndef MAX_AIO_QUEUE #define MAX_AIO_QUEUE 1024 /* Bigger than AIO_LISTIO_MAX */ #endif #ifndef TARGET_AIO_PROCS #define TARGET_AIO_PROCS 4 #endif #ifndef MAX_BUF_AIO #define MAX_BUF_AIO 16 #endif #ifndef AIOD_TIMEOUT_DEFAULT #define AIOD_TIMEOUT_DEFAULT (10 * hz) #endif #ifndef AIOD_LIFETIME_DEFAULT #define AIOD_LIFETIME_DEFAULT (30 * hz) #endif FEATURE(aio, "Asynchronous I/O"); static MALLOC_DEFINE(M_LIO, "lio", "listio aio control block list"); static SYSCTL_NODE(_vfs, OID_AUTO, aio, CTLFLAG_RW, 0, "Async IO management"); static int max_aio_procs = MAX_AIO_PROCS; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_procs, CTLFLAG_RW, &max_aio_procs, 0, "Maximum number of kernel threads to use for handling async IO "); static int num_aio_procs = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_aio_procs, CTLFLAG_RD, &num_aio_procs, 0, "Number of presently active kernel threads for async IO"); /* * The code will adjust the actual number of AIO processes towards this * number when it gets a chance. */ static int target_aio_procs = TARGET_AIO_PROCS; SYSCTL_INT(_vfs_aio, OID_AUTO, target_aio_procs, CTLFLAG_RW, &target_aio_procs, 0, "Preferred number of ready kernel threads for async IO"); static int max_queue_count = MAX_AIO_QUEUE; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue, CTLFLAG_RW, &max_queue_count, 0, "Maximum number of aio requests to queue, globally"); static int num_queue_count = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_queue_count, CTLFLAG_RD, &num_queue_count, 0, "Number of queued aio requests"); static int num_buf_aio = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, num_buf_aio, CTLFLAG_RD, &num_buf_aio, 0, "Number of aio requests presently handled by the buf subsystem"); /* Number of async I/O thread in the process of being started */ /* XXX This should be local to aio_aqueue() */ static int num_aio_resv_start = 0; static int aiod_timeout; SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_timeout, CTLFLAG_RW, &aiod_timeout, 0, "Timeout value for synchronous aio operations"); static int aiod_lifetime; SYSCTL_INT(_vfs_aio, OID_AUTO, aiod_lifetime, CTLFLAG_RW, &aiod_lifetime, 0, "Maximum lifetime for idle aiod"); static int unloadable = 0; SYSCTL_INT(_vfs_aio, OID_AUTO, unloadable, CTLFLAG_RW, &unloadable, 0, "Allow unload of aio (not recommended)"); static int max_aio_per_proc = MAX_AIO_PER_PROC; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_per_proc, CTLFLAG_RW, &max_aio_per_proc, 0, "Maximum active aio requests per process (stored in the process)"); static int max_aio_queue_per_proc = MAX_AIO_QUEUE_PER_PROC; SYSCTL_INT(_vfs_aio, OID_AUTO, max_aio_queue_per_proc, CTLFLAG_RW, &max_aio_queue_per_proc, 0, "Maximum queued aio requests per process (stored in the process)"); static int max_buf_aio = MAX_BUF_AIO; SYSCTL_INT(_vfs_aio, OID_AUTO, max_buf_aio, CTLFLAG_RW, &max_buf_aio, 0, "Maximum buf aio requests per process (stored in the process)"); typedef struct oaiocb { int aio_fildes; /* File descriptor */ off_t aio_offset; /* File offset for I/O */ volatile void *aio_buf; /* I/O buffer in process space */ size_t aio_nbytes; /* Number of bytes for I/O */ struct osigevent aio_sigevent; /* Signal to deliver */ int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private _aiocb_private; } oaiocb_t; /* * Below is a key of locks used to protect each member of struct aiocblist * aioliojob and kaioinfo and any backends. * * * - need not protected * a - locked by kaioinfo lock * b - locked by backend lock, the backend lock can be null in some cases, * for example, BIO belongs to this type, in this case, proc lock is * reused. * c - locked by aio_job_mtx, the lock for the generic file I/O backend. */ /* * Current, there is only two backends: BIO and generic file I/O. * socket I/O is served by generic file I/O, this is not a good idea, since * disk file I/O and any other types without O_NONBLOCK flag can block daemon * threads, if there is no thread to serve socket I/O, the socket I/O will be * delayed too long or starved, we should create some threads dedicated to * sockets to do non-blocking I/O, same for pipe and fifo, for these I/O * systems we really need non-blocking interface, fiddling O_NONBLOCK in file * structure is not safe because there is race between userland and aio * daemons. */ struct aiocblist { TAILQ_ENTRY(aiocblist) list; /* (b) internal list of for backend */ TAILQ_ENTRY(aiocblist) plist; /* (a) list of jobs for each backend */ TAILQ_ENTRY(aiocblist) allist; /* (a) list of all jobs in proc */ int jobflags; /* (a) job flags */ int jobstate; /* (b) job state */ int inputcharge; /* (*) input blockes */ int outputcharge; /* (*) output blockes */ struct buf *bp; /* (*) private to BIO backend, * buffer pointer */ struct proc *userproc; /* (*) user process */ struct ucred *cred; /* (*) active credential when created */ struct file *fd_file; /* (*) pointer to file structure */ struct aioliojob *lio; /* (*) optional lio job */ struct aiocb *uuaiocb; /* (*) pointer in userspace of aiocb */ struct knlist klist; /* (a) list of knotes */ struct aiocb uaiocb; /* (*) kernel I/O control block */ ksiginfo_t ksi; /* (a) realtime signal info */ struct task biotask; /* (*) private to BIO backend */ uint64_t seqno; /* (*) job number */ int pending; /* (a) number of pending I/O, aio_fsync only */ }; /* jobflags */ #define AIOCBLIST_DONE 0x01 #define AIOCBLIST_BUFDONE 0x02 #define AIOCBLIST_RUNDOWN 0x04 #define AIOCBLIST_CHECKSYNC 0x08 /* * AIO process info */ #define AIOP_FREE 0x1 /* proc on free queue */ struct aiothreadlist { int aiothreadflags; /* (c) AIO proc flags */ TAILQ_ENTRY(aiothreadlist) list; /* (c) list of processes */ struct thread *aiothread; /* (*) the AIO thread */ }; /* * data-structure for lio signal management */ struct aioliojob { int lioj_flags; /* (a) listio flags */ int lioj_count; /* (a) listio flags */ int lioj_finished_count; /* (a) listio flags */ struct sigevent lioj_signal; /* (a) signal on all I/O done */ TAILQ_ENTRY(aioliojob) lioj_list; /* (a) lio list */ struct knlist klist; /* (a) list of knotes */ ksiginfo_t lioj_ksi; /* (a) Realtime signal info */ }; #define LIOJ_SIGNAL 0x1 /* signal on all done (lio) */ #define LIOJ_SIGNAL_POSTED 0x2 /* signal has been posted */ #define LIOJ_KEVENT_POSTED 0x4 /* kevent triggered */ /* * per process aio data structure */ struct kaioinfo { struct mtx kaio_mtx; /* the lock to protect this struct */ int kaio_flags; /* (a) per process kaio flags */ int kaio_maxactive_count; /* (*) maximum number of AIOs */ int kaio_active_count; /* (c) number of currently used AIOs */ int kaio_qallowed_count; /* (*) maxiumu size of AIO queue */ int kaio_count; /* (a) size of AIO queue */ int kaio_ballowed_count; /* (*) maximum number of buffers */ int kaio_buffer_count; /* (a) number of physio buffers */ TAILQ_HEAD(,aiocblist) kaio_all; /* (a) all AIOs in the process */ TAILQ_HEAD(,aiocblist) kaio_done; /* (a) done queue for process */ TAILQ_HEAD(,aioliojob) kaio_liojoblist; /* (a) list of lio jobs */ TAILQ_HEAD(,aiocblist) kaio_jobqueue; /* (a) job queue for process */ TAILQ_HEAD(,aiocblist) kaio_bufqueue; /* (a) buffer job queue for process */ TAILQ_HEAD(,aiocblist) kaio_sockqueue; /* (a) queue for aios waiting on sockets, * NOT USED YET. */ TAILQ_HEAD(,aiocblist) kaio_syncqueue; /* (a) queue for aio_fsync */ struct task kaio_task; /* (*) task to kick aio threads */ }; #define AIO_LOCK(ki) mtx_lock(&(ki)->kaio_mtx) #define AIO_UNLOCK(ki) mtx_unlock(&(ki)->kaio_mtx) #define AIO_LOCK_ASSERT(ki, f) mtx_assert(&(ki)->kaio_mtx, (f)) #define AIO_MTX(ki) (&(ki)->kaio_mtx) #define KAIO_RUNDOWN 0x1 /* process is being run down */ #define KAIO_WAKEUP 0x2 /* wakeup process when there is a significant event */ /* * Operations used to interact with userland aio control blocks. * Different ABIs provide their own operations. */ struct aiocb_ops { int (*copyin)(struct aiocb *ujob, struct aiocb *kjob); long (*fetch_status)(struct aiocb *ujob); long (*fetch_error)(struct aiocb *ujob); int (*store_status)(struct aiocb *ujob, long status); int (*store_error)(struct aiocb *ujob, long error); int (*store_kernelinfo)(struct aiocb *ujob, long jobref); int (*store_aiocb)(struct aiocb **ujobp, struct aiocb *ujob); }; static TAILQ_HEAD(,aiothreadlist) aio_freeproc; /* (c) Idle daemons */ static struct sema aio_newproc_sem; static struct mtx aio_job_mtx; static struct mtx aio_sock_mtx; static TAILQ_HEAD(,aiocblist) aio_jobs; /* (c) Async job list */ static struct unrhdr *aiod_unr; void aio_init_aioinfo(struct proc *p); static int aio_onceonly(void); static int aio_free_entry(struct aiocblist *aiocbe); static void aio_process_rw(struct aiocblist *aiocbe); static void aio_process_sync(struct aiocblist *aiocbe); static void aio_process_mlock(struct aiocblist *aiocbe); static int aio_newproc(int *); int aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lio, int type, struct aiocb_ops *ops); static void aio_physwakeup(struct buf *bp); static void aio_proc_rundown(void *arg, struct proc *p); static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp); static int aio_qphysio(struct proc *p, struct aiocblist *iocb); static void biohelper(void *, int); static void aio_daemon(void *param); static void aio_swake_cb(struct socket *, struct sockbuf *); static int aio_unload(void); static void aio_bio_done_notify(struct proc *userp, struct aiocblist *aiocbe, int type); #define DONE_BUF 1 #define DONE_QUEUE 2 static int aio_kick(struct proc *userp); static void aio_kick_nowait(struct proc *userp); static void aio_kick_helper(void *context, int pending); static int filt_aioattach(struct knote *kn); static void filt_aiodetach(struct knote *kn); static int filt_aio(struct knote *kn, long hint); static int filt_lioattach(struct knote *kn); static void filt_liodetach(struct knote *kn); static int filt_lio(struct knote *kn, long hint); /* * Zones for: * kaio Per process async io info * aiop async io thread data * aiocb async io jobs * aiol list io job pointer - internal to aio_suspend XXX * aiolio list io jobs */ static uma_zone_t kaio_zone, aiop_zone, aiocb_zone, aiol_zone, aiolio_zone; /* kqueue filters for aio */ static struct filterops aio_filtops = { .f_isfd = 0, .f_attach = filt_aioattach, .f_detach = filt_aiodetach, .f_event = filt_aio, }; static struct filterops lio_filtops = { .f_isfd = 0, .f_attach = filt_lioattach, .f_detach = filt_liodetach, .f_event = filt_lio }; static eventhandler_tag exit_tag, exec_tag; TASKQUEUE_DEFINE_THREAD(aiod_bio); /* * Main operations function for use as a kernel module. */ static int aio_modload(struct module *module, int cmd, void *arg) { int error = 0; switch (cmd) { case MOD_LOAD: aio_onceonly(); break; case MOD_UNLOAD: error = aio_unload(); break; case MOD_SHUTDOWN: break; default: error = EINVAL; break; } return (error); } static moduledata_t aio_mod = { "aio", &aio_modload, NULL }; static struct syscall_helper_data aio_syscalls[] = { SYSCALL_INIT_HELPER(aio_cancel), SYSCALL_INIT_HELPER(aio_error), SYSCALL_INIT_HELPER(aio_fsync), SYSCALL_INIT_HELPER(aio_mlock), SYSCALL_INIT_HELPER(aio_read), SYSCALL_INIT_HELPER(aio_return), SYSCALL_INIT_HELPER(aio_suspend), SYSCALL_INIT_HELPER(aio_waitcomplete), SYSCALL_INIT_HELPER(aio_write), SYSCALL_INIT_HELPER(lio_listio), SYSCALL_INIT_HELPER(oaio_read), SYSCALL_INIT_HELPER(oaio_write), SYSCALL_INIT_HELPER(olio_listio), SYSCALL_INIT_LAST }; #ifdef COMPAT_FREEBSD32 #include #include #include #include #include #include #include static struct syscall_helper_data aio32_syscalls[] = { SYSCALL32_INIT_HELPER(freebsd32_aio_return), SYSCALL32_INIT_HELPER(freebsd32_aio_suspend), SYSCALL32_INIT_HELPER(freebsd32_aio_cancel), SYSCALL32_INIT_HELPER(freebsd32_aio_error), SYSCALL32_INIT_HELPER(freebsd32_aio_fsync), SYSCALL32_INIT_HELPER(freebsd32_aio_mlock), SYSCALL32_INIT_HELPER(freebsd32_aio_read), SYSCALL32_INIT_HELPER(freebsd32_aio_write), SYSCALL32_INIT_HELPER(freebsd32_aio_waitcomplete), SYSCALL32_INIT_HELPER(freebsd32_lio_listio), SYSCALL32_INIT_HELPER(freebsd32_oaio_read), SYSCALL32_INIT_HELPER(freebsd32_oaio_write), SYSCALL32_INIT_HELPER(freebsd32_olio_listio), SYSCALL_INIT_LAST }; #endif DECLARE_MODULE(aio, aio_mod, SI_SUB_VFS, SI_ORDER_ANY); MODULE_VERSION(aio, 1); /* * Startup initialization */ static int aio_onceonly(void) { int error; /* XXX: should probably just use so->callback */ aio_swake = &aio_swake_cb; exit_tag = EVENTHANDLER_REGISTER(process_exit, aio_proc_rundown, NULL, EVENTHANDLER_PRI_ANY); exec_tag = EVENTHANDLER_REGISTER(process_exec, aio_proc_rundown_exec, NULL, EVENTHANDLER_PRI_ANY); kqueue_add_filteropts(EVFILT_AIO, &aio_filtops); kqueue_add_filteropts(EVFILT_LIO, &lio_filtops); TAILQ_INIT(&aio_freeproc); sema_init(&aio_newproc_sem, 0, "aio_new_proc"); mtx_init(&aio_job_mtx, "aio_job", NULL, MTX_DEF); mtx_init(&aio_sock_mtx, "aio_sock", NULL, MTX_DEF); TAILQ_INIT(&aio_jobs); aiod_unr = new_unrhdr(1, INT_MAX, NULL); kaio_zone = uma_zcreate("AIO", sizeof(struct kaioinfo), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiop_zone = uma_zcreate("AIOP", sizeof(struct aiothreadlist), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiocb_zone = uma_zcreate("AIOCB", sizeof(struct aiocblist), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiol_zone = uma_zcreate("AIOL", AIO_LISTIO_MAX*sizeof(intptr_t) , NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiolio_zone = uma_zcreate("AIOLIO", sizeof(struct aioliojob), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); aiod_timeout = AIOD_TIMEOUT_DEFAULT; aiod_lifetime = AIOD_LIFETIME_DEFAULT; jobrefid = 1; async_io_version = _POSIX_VERSION; p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, AIO_LISTIO_MAX); p31b_setcfg(CTL_P1003_1B_AIO_MAX, MAX_AIO_QUEUE); p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, 0); error = syscall_helper_register(aio_syscalls); if (error) return (error); #ifdef COMPAT_FREEBSD32 error = syscall32_helper_register(aio32_syscalls); if (error) return (error); #endif return (0); } /* * Callback for unload of AIO when used as a module. */ static int aio_unload(void) { int error; /* * XXX: no unloads by default, it's too dangerous. * perhaps we could do it if locked out callers and then * did an aio_proc_rundown() on each process. * * jhb: aio_proc_rundown() needs to run on curproc though, * so I don't think that would fly. */ if (!unloadable) return (EOPNOTSUPP); #ifdef COMPAT_FREEBSD32 syscall32_helper_unregister(aio32_syscalls); #endif syscall_helper_unregister(aio_syscalls); error = kqueue_del_filteropts(EVFILT_AIO); if (error) return error; error = kqueue_del_filteropts(EVFILT_LIO); if (error) return error; async_io_version = 0; aio_swake = NULL; taskqueue_free(taskqueue_aiod_bio); delete_unrhdr(aiod_unr); uma_zdestroy(kaio_zone); uma_zdestroy(aiop_zone); uma_zdestroy(aiocb_zone); uma_zdestroy(aiol_zone); uma_zdestroy(aiolio_zone); EVENTHANDLER_DEREGISTER(process_exit, exit_tag); EVENTHANDLER_DEREGISTER(process_exec, exec_tag); mtx_destroy(&aio_job_mtx); mtx_destroy(&aio_sock_mtx); sema_destroy(&aio_newproc_sem); p31b_setcfg(CTL_P1003_1B_AIO_LISTIO_MAX, -1); p31b_setcfg(CTL_P1003_1B_AIO_MAX, -1); p31b_setcfg(CTL_P1003_1B_AIO_PRIO_DELTA_MAX, -1); return (0); } /* * Init the per-process aioinfo structure. The aioinfo limits are set * per-process for user limit (resource) management. */ void aio_init_aioinfo(struct proc *p) { struct kaioinfo *ki; ki = uma_zalloc(kaio_zone, M_WAITOK); mtx_init(&ki->kaio_mtx, "aiomtx", NULL, MTX_DEF); ki->kaio_flags = 0; ki->kaio_maxactive_count = max_aio_per_proc; ki->kaio_active_count = 0; ki->kaio_qallowed_count = max_aio_queue_per_proc; ki->kaio_count = 0; ki->kaio_ballowed_count = max_buf_aio; ki->kaio_buffer_count = 0; TAILQ_INIT(&ki->kaio_all); TAILQ_INIT(&ki->kaio_done); TAILQ_INIT(&ki->kaio_jobqueue); TAILQ_INIT(&ki->kaio_bufqueue); TAILQ_INIT(&ki->kaio_liojoblist); TAILQ_INIT(&ki->kaio_sockqueue); TAILQ_INIT(&ki->kaio_syncqueue); TASK_INIT(&ki->kaio_task, 0, aio_kick_helper, p); PROC_LOCK(p); if (p->p_aioinfo == NULL) { p->p_aioinfo = ki; PROC_UNLOCK(p); } else { PROC_UNLOCK(p); mtx_destroy(&ki->kaio_mtx); uma_zfree(kaio_zone, ki); } while (num_aio_procs < MIN(target_aio_procs, max_aio_procs)) aio_newproc(NULL); } static int aio_sendsig(struct proc *p, struct sigevent *sigev, ksiginfo_t *ksi) { struct thread *td; int error; error = sigev_findtd(p, sigev, &td); if (error) return (error); if (!KSI_ONQ(ksi)) { ksiginfo_set_sigev(ksi, sigev); ksi->ksi_code = SI_ASYNCIO; ksi->ksi_flags |= KSI_EXT | KSI_INS; tdsendsignal(p, td, ksi->ksi_signo, ksi); } PROC_UNLOCK(p); return (error); } /* * Free a job entry. Wait for completion if it is currently active, but don't * delay forever. If we delay, we return a flag that says that we have to * restart the queue scan. */ static int aio_free_entry(struct aiocblist *aiocbe) { struct kaioinfo *ki; struct aioliojob *lj; struct proc *p; p = aiocbe->userproc; MPASS(curproc == p); ki = p->p_aioinfo; MPASS(ki != NULL); AIO_LOCK_ASSERT(ki, MA_OWNED); MPASS(aiocbe->jobstate == JOBST_JOBFINISHED); atomic_subtract_int(&num_queue_count, 1); ki->kaio_count--; MPASS(ki->kaio_count >= 0); TAILQ_REMOVE(&ki->kaio_done, aiocbe, plist); TAILQ_REMOVE(&ki->kaio_all, aiocbe, allist); lj = aiocbe->lio; if (lj) { lj->lioj_count--; lj->lioj_finished_count--; if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); /* lio is going away, we need to destroy any knotes */ knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); uma_zfree(aiolio_zone, lj); } } /* aiocbe is going away, we need to destroy any knotes */ knlist_delete(&aiocbe->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&aiocbe->ksi); PROC_UNLOCK(p); MPASS(aiocbe->bp == NULL); aiocbe->jobstate = JOBST_NULL; AIO_UNLOCK(ki); /* * The thread argument here is used to find the owning process * and is also passed to fo_close() which may pass it to various * places such as devsw close() routines. Because of that, we * need a thread pointer from the process owning the job that is * persistent and won't disappear out from under us or move to * another process. * * Currently, all the callers of this function call it to remove * an aiocblist from the current process' job list either via a * syscall or due to the current process calling exit() or * execve(). Thus, we know that p == curproc. We also know that * curthread can't exit since we are curthread. * * Therefore, we use curthread as the thread to pass to * knlist_delete(). This does mean that it is possible for the * thread pointer at close time to differ from the thread pointer * at open time, but this is already true of file descriptors in * a multithreaded process. */ if (aiocbe->fd_file) fdrop(aiocbe->fd_file, curthread); crfree(aiocbe->cred); uma_zfree(aiocb_zone, aiocbe); AIO_LOCK(ki); return (0); } static void aio_proc_rundown_exec(void *arg, struct proc *p, struct image_params *imgp __unused) { aio_proc_rundown(arg, p); } /* * Rundown the jobs for a given process. */ static void aio_proc_rundown(void *arg, struct proc *p) { struct kaioinfo *ki; struct aioliojob *lj; struct aiocblist *cbe, *cbn; struct file *fp; struct socket *so; int remove; KASSERT(curthread->td_proc == p, ("%s: called on non-curproc", __func__)); ki = p->p_aioinfo; if (ki == NULL) return; AIO_LOCK(ki); ki->kaio_flags |= KAIO_RUNDOWN; restart: /* * Try to cancel all pending requests. This code simulates * aio_cancel on all pending I/O requests. */ TAILQ_FOREACH_SAFE(cbe, &ki->kaio_jobqueue, plist, cbn) { remove = 0; mtx_lock(&aio_job_mtx); if (cbe->jobstate == JOBST_JOBQGLOBAL) { TAILQ_REMOVE(&aio_jobs, cbe, list); remove = 1; } else if (cbe->jobstate == JOBST_JOBQSOCK) { fp = cbe->fd_file; MPASS(fp->f_type == DTYPE_SOCKET); so = fp->f_data; TAILQ_REMOVE(&so->so_aiojobq, cbe, list); remove = 1; } else if (cbe->jobstate == JOBST_JOBQSYNC) { TAILQ_REMOVE(&ki->kaio_syncqueue, cbe, list); remove = 1; } mtx_unlock(&aio_job_mtx); if (remove) { cbe->jobstate = JOBST_JOBFINISHED; cbe->uaiocb._aiocb_private.status = -1; cbe->uaiocb._aiocb_private.error = ECANCELED; TAILQ_REMOVE(&ki->kaio_jobqueue, cbe, plist); aio_bio_done_notify(p, cbe, DONE_QUEUE); } } /* Wait for all running I/O to be finished */ if (TAILQ_FIRST(&ki->kaio_bufqueue) || TAILQ_FIRST(&ki->kaio_jobqueue)) { ki->kaio_flags |= KAIO_WAKEUP; msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO, "aioprn", hz); goto restart; } /* Free all completed I/O requests. */ while ((cbe = TAILQ_FIRST(&ki->kaio_done)) != NULL) aio_free_entry(cbe); while ((lj = TAILQ_FIRST(&ki->kaio_liojoblist)) != NULL) { if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); uma_zfree(aiolio_zone, lj); } else { panic("LIO job not cleaned up: C:%d, FC:%d\n", lj->lioj_count, lj->lioj_finished_count); } } AIO_UNLOCK(ki); taskqueue_drain(taskqueue_aiod_bio, &ki->kaio_task); mtx_destroy(&ki->kaio_mtx); uma_zfree(kaio_zone, ki); p->p_aioinfo = NULL; } /* * Select a job to run (called by an AIO daemon). */ static struct aiocblist * aio_selectjob(struct aiothreadlist *aiop) { struct aiocblist *aiocbe; struct kaioinfo *ki; struct proc *userp; mtx_assert(&aio_job_mtx, MA_OWNED); TAILQ_FOREACH(aiocbe, &aio_jobs, list) { userp = aiocbe->userproc; ki = userp->p_aioinfo; if (ki->kaio_active_count < ki->kaio_maxactive_count) { TAILQ_REMOVE(&aio_jobs, aiocbe, list); /* Account for currently active jobs. */ ki->kaio_active_count++; aiocbe->jobstate = JOBST_JOBRUNNING; break; } } return (aiocbe); } /* * Move all data to a permanent storage device, this code * simulates fsync syscall. */ static int aio_fsync_vnode(struct thread *td, struct vnode *vp) { struct mount *mp; int error; if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) goto drop; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_object != NULL) { VM_OBJECT_WLOCK(vp->v_object); vm_object_page_clean(vp->v_object, 0, 0, 0); VM_OBJECT_WUNLOCK(vp->v_object); } error = VOP_FSYNC(vp, MNT_WAIT, td); VOP_UNLOCK(vp, 0); vn_finished_write(mp); drop: return (error); } /* * The AIO processing activity for LIO_READ/LIO_WRITE. This is the code that * does the I/O request for the non-physio version of the operations. The * normal vn operations are used, and this code should work in all instances * for every type of file, including pipes, sockets, fifos, and regular files. * * XXX I don't think it works well for socket, pipe, and fifo. */ static void aio_process_rw(struct aiocblist *aiocbe) { struct ucred *td_savedcred; struct thread *td; struct aiocb *cb; struct file *fp; struct socket *so; struct uio auio; struct iovec aiov; int cnt; int error; int oublock_st, oublock_end; int inblock_st, inblock_end; KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_READ || aiocbe->uaiocb.aio_lio_opcode == LIO_WRITE, ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode)); td = curthread; td_savedcred = td->td_ucred; td->td_ucred = aiocbe->cred; cb = &aiocbe->uaiocb; fp = aiocbe->fd_file; aiov.iov_base = (void *)(uintptr_t)cb->aio_buf; aiov.iov_len = cb->aio_nbytes; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; auio.uio_offset = cb->aio_offset; auio.uio_resid = cb->aio_nbytes; cnt = cb->aio_nbytes; auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; inblock_st = td->td_ru.ru_inblock; oublock_st = td->td_ru.ru_oublock; /* * aio_aqueue() acquires a reference to the file that is * released in aio_free_entry(). */ if (cb->aio_lio_opcode == LIO_READ) { auio.uio_rw = UIO_READ; if (auio.uio_resid == 0) error = 0; else error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, td); } else { if (fp->f_type == DTYPE_VNODE) bwillwrite(); auio.uio_rw = UIO_WRITE; error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); } inblock_end = td->td_ru.ru_inblock; oublock_end = td->td_ru.ru_oublock; aiocbe->inputcharge = inblock_end - inblock_st; aiocbe->outputcharge = oublock_end - oublock_st; if ((error) && (auio.uio_resid != cnt)) { if (error == ERESTART || error == EINTR || error == EWOULDBLOCK) error = 0; if ((error == EPIPE) && (cb->aio_lio_opcode == LIO_WRITE)) { int sigpipe = 1; if (fp->f_type == DTYPE_SOCKET) { so = fp->f_data; if (so->so_options & SO_NOSIGPIPE) sigpipe = 0; } if (sigpipe) { PROC_LOCK(aiocbe->userproc); kern_psignal(aiocbe->userproc, SIGPIPE); PROC_UNLOCK(aiocbe->userproc); } } } cnt -= auio.uio_resid; cb->_aiocb_private.error = error; cb->_aiocb_private.status = cnt; td->td_ucred = td_savedcred; } static void aio_process_sync(struct aiocblist *aiocbe) { struct thread *td = curthread; struct ucred *td_savedcred = td->td_ucred; struct aiocb *cb = &aiocbe->uaiocb; struct file *fp = aiocbe->fd_file; int error = 0; KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_SYNC, ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode)); td->td_ucred = aiocbe->cred; if (fp->f_vnode != NULL) error = aio_fsync_vnode(td, fp->f_vnode); cb->_aiocb_private.error = error; cb->_aiocb_private.status = 0; td->td_ucred = td_savedcred; } static void aio_process_mlock(struct aiocblist *aiocbe) { struct aiocb *cb = &aiocbe->uaiocb; int error; KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_MLOCK, ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode)); error = vm_mlock(aiocbe->userproc, aiocbe->cred, __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes); cb->_aiocb_private.error = error; cb->_aiocb_private.status = 0; } static void aio_bio_done_notify(struct proc *userp, struct aiocblist *aiocbe, int type) { struct aioliojob *lj; struct kaioinfo *ki; struct aiocblist *scb, *scbn; int lj_done; ki = userp->p_aioinfo; AIO_LOCK_ASSERT(ki, MA_OWNED); lj = aiocbe->lio; lj_done = 0; if (lj) { lj->lioj_finished_count++; if (lj->lioj_count == lj->lioj_finished_count) lj_done = 1; } if (type == DONE_QUEUE) { aiocbe->jobflags |= AIOCBLIST_DONE; } else { aiocbe->jobflags |= AIOCBLIST_BUFDONE; } TAILQ_INSERT_TAIL(&ki->kaio_done, aiocbe, plist); aiocbe->jobstate = JOBST_JOBFINISHED; if (ki->kaio_flags & KAIO_RUNDOWN) goto notification_done; if (aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) aio_sendsig(userp, &aiocbe->uaiocb.aio_sigevent, &aiocbe->ksi); KNOTE_LOCKED(&aiocbe->klist, 1); if (lj_done) { if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { lj->lioj_flags |= LIOJ_KEVENT_POSTED; KNOTE_LOCKED(&lj->klist, 1); } if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { aio_sendsig(userp, &lj->lioj_signal, &lj->lioj_ksi); lj->lioj_flags |= LIOJ_SIGNAL_POSTED; } } notification_done: if (aiocbe->jobflags & AIOCBLIST_CHECKSYNC) { TAILQ_FOREACH_SAFE(scb, &ki->kaio_syncqueue, list, scbn) { if (aiocbe->fd_file == scb->fd_file && aiocbe->seqno < scb->seqno) { if (--scb->pending == 0) { mtx_lock(&aio_job_mtx); scb->jobstate = JOBST_JOBQGLOBAL; TAILQ_REMOVE(&ki->kaio_syncqueue, scb, list); TAILQ_INSERT_TAIL(&aio_jobs, scb, list); aio_kick_nowait(userp); mtx_unlock(&aio_job_mtx); } } } } if (ki->kaio_flags & KAIO_WAKEUP) { ki->kaio_flags &= ~KAIO_WAKEUP; wakeup(&userp->p_aioinfo); } } /* * The AIO daemon, most of the actual work is done in aio_process_*, * but the setup (and address space mgmt) is done in this routine. */ static void aio_daemon(void *_id) { struct aiocblist *aiocbe; struct aiothreadlist *aiop; struct kaioinfo *ki; struct proc *curcp, *mycp, *userp; struct vmspace *myvm, *tmpvm; struct thread *td = curthread; int id = (intptr_t)_id; /* * Local copies of curproc (cp) and vmspace (myvm) */ mycp = td->td_proc; myvm = mycp->p_vmspace; KASSERT(mycp->p_textvp == NULL, ("kthread has a textvp")); /* * Allocate and ready the aio control info. There is one aiop structure * per daemon. */ aiop = uma_zalloc(aiop_zone, M_WAITOK); aiop->aiothread = td; aiop->aiothreadflags = 0; /* The daemon resides in its own pgrp. */ sys_setsid(td, NULL); /* * Wakeup parent process. (Parent sleeps to keep from blasting away * and creating too many daemons.) */ sema_post(&aio_newproc_sem); mtx_lock(&aio_job_mtx); for (;;) { /* * curcp is the current daemon process context. * userp is the current user process context. */ curcp = mycp; /* * Take daemon off of free queue */ if (aiop->aiothreadflags & AIOP_FREE) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aiothreadflags &= ~AIOP_FREE; } /* * Check for jobs. */ while ((aiocbe = aio_selectjob(aiop)) != NULL) { mtx_unlock(&aio_job_mtx); userp = aiocbe->userproc; /* * Connect to process address space for user program. */ if (userp != curcp) { /* * Save the current address space that we are * connected to. */ tmpvm = mycp->p_vmspace; /* * Point to the new user address space, and * refer to it. */ mycp->p_vmspace = userp->p_vmspace; atomic_add_int(&mycp->p_vmspace->vm_refcnt, 1); /* Activate the new mapping. */ pmap_activate(FIRST_THREAD_IN_PROC(mycp)); /* * If the old address space wasn't the daemons * own address space, then we need to remove the * daemon's reference from the other process * that it was acting on behalf of. */ if (tmpvm != myvm) { vmspace_free(tmpvm); } curcp = userp; } ki = userp->p_aioinfo; /* Do the I/O function. */ switch(aiocbe->uaiocb.aio_lio_opcode) { case LIO_READ: case LIO_WRITE: aio_process_rw(aiocbe); break; case LIO_SYNC: aio_process_sync(aiocbe); break; case LIO_MLOCK: aio_process_mlock(aiocbe); break; } mtx_lock(&aio_job_mtx); /* Decrement the active job count. */ ki->kaio_active_count--; mtx_unlock(&aio_job_mtx); AIO_LOCK(ki); TAILQ_REMOVE(&ki->kaio_jobqueue, aiocbe, plist); aio_bio_done_notify(userp, aiocbe, DONE_QUEUE); AIO_UNLOCK(ki); mtx_lock(&aio_job_mtx); } /* * Disconnect from user address space. */ if (curcp != mycp) { mtx_unlock(&aio_job_mtx); /* Get the user address space to disconnect from. */ tmpvm = mycp->p_vmspace; /* Get original address space for daemon. */ mycp->p_vmspace = myvm; /* Activate the daemon's address space. */ pmap_activate(FIRST_THREAD_IN_PROC(mycp)); #ifdef DIAGNOSTIC if (tmpvm == myvm) { printf("AIOD: vmspace problem -- %d\n", mycp->p_pid); } #endif /* Remove our vmspace reference. */ vmspace_free(tmpvm); curcp = mycp; mtx_lock(&aio_job_mtx); /* * We have to restart to avoid race, we only sleep if * no job can be selected, that should be * curcp == mycp. */ continue; } mtx_assert(&aio_job_mtx, MA_OWNED); TAILQ_INSERT_HEAD(&aio_freeproc, aiop, list); aiop->aiothreadflags |= AIOP_FREE; /* * If daemon is inactive for a long time, allow it to exit, * thereby freeing resources. */ if (msleep(aiop->aiothread, &aio_job_mtx, PRIBIO, "aiordy", aiod_lifetime)) { if (TAILQ_EMPTY(&aio_jobs)) { if ((aiop->aiothreadflags & AIOP_FREE) && (num_aio_procs > target_aio_procs)) { TAILQ_REMOVE(&aio_freeproc, aiop, list); num_aio_procs--; mtx_unlock(&aio_job_mtx); uma_zfree(aiop_zone, aiop); free_unr(aiod_unr, id); #ifdef DIAGNOSTIC if (mycp->p_vmspace->vm_refcnt <= 1) { printf("AIOD: bad vm refcnt for" " exiting daemon: %d\n", mycp->p_vmspace->vm_refcnt); } #endif kproc_exit(0); } } } } mtx_unlock(&aio_job_mtx); panic("shouldn't be here\n"); } /* * Create a new AIO daemon. This is mostly a kernel-thread fork routine. The * AIO daemon modifies its environment itself. */ static int aio_newproc(int *start) { int error; struct proc *p; int id; id = alloc_unr(aiod_unr); error = kproc_create(aio_daemon, (void *)(intptr_t)id, &p, RFNOWAIT, 0, "aiod%d", id); if (error == 0) { /* * Wait until daemon is started. */ sema_wait(&aio_newproc_sem); mtx_lock(&aio_job_mtx); num_aio_procs++; if (start != NULL) (*start)--; mtx_unlock(&aio_job_mtx); } else { free_unr(aiod_unr, id); } return (error); } /* * Try the high-performance, low-overhead physio method for eligible * VCHR devices. This method doesn't use an aio helper thread, and * thus has very low overhead. * * Assumes that the caller, aio_aqueue(), has incremented the file * structure's reference count, preventing its deallocation for the * duration of this call. */ static int aio_qphysio(struct proc *p, struct aiocblist *aiocbe) { struct aiocb *cb; struct file *fp; struct buf *bp; struct vnode *vp; struct cdevsw *csw; struct cdev *dev; struct kaioinfo *ki; struct aioliojob *lj; int error, ref; cb = &aiocbe->uaiocb; fp = aiocbe->fd_file; if (fp == NULL || fp->f_type != DTYPE_VNODE) return (-1); vp = fp->f_vnode; /* * If its not a disk, we don't want to return a positive error. * It causes the aio code to not fall through to try the thread * way when you're talking to a regular file. */ if (!vn_isdisk(vp, &error)) { if (error == ENOTBLK) return (-1); else return (error); } if (vp->v_bufobj.bo_bsize == 0) return (-1); if (cb->aio_nbytes % vp->v_bufobj.bo_bsize) return (-1); if (cb->aio_nbytes > MAXPHYS - (((vm_offset_t) cb->aio_buf) & PAGE_MASK)) return (-1); ki = p->p_aioinfo; if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) return (-1); ref = 0; csw = devvn_refthread(vp, &dev, &ref); if (csw == NULL) return (ENXIO); if (cb->aio_nbytes > dev->si_iosize_max) { error = -1; goto unref; } /* Create and build a buffer header for a transfer. */ bp = (struct buf *)getpbuf(NULL); BUF_KERNPROC(bp); AIO_LOCK(ki); ki->kaio_count++; ki->kaio_buffer_count++; lj = aiocbe->lio; if (lj) lj->lioj_count++; AIO_UNLOCK(ki); /* * Get a copy of the kva from the physical buffer. */ error = 0; bp->b_bcount = cb->aio_nbytes; bp->b_bufsize = cb->aio_nbytes; bp->b_iodone = aio_physwakeup; bp->b_saveaddr = bp->b_data; bp->b_data = (void *)(uintptr_t)cb->aio_buf; bp->b_offset = cb->aio_offset; bp->b_iooffset = cb->aio_offset; bp->b_blkno = btodb(cb->aio_offset); bp->b_iocmd = cb->aio_lio_opcode == LIO_WRITE ? BIO_WRITE : BIO_READ; /* * Bring buffer into kernel space. */ if (vmapbuf(bp, (csw->d_flags & D_UNMAPPED_IO) == 0) < 0) { error = EFAULT; goto doerror; } AIO_LOCK(ki); aiocbe->bp = bp; bp->b_caller1 = (void *)aiocbe; TAILQ_INSERT_TAIL(&ki->kaio_bufqueue, aiocbe, plist); TAILQ_INSERT_TAIL(&ki->kaio_all, aiocbe, allist); aiocbe->jobstate = JOBST_JOBQBUF; cb->_aiocb_private.status = cb->aio_nbytes; AIO_UNLOCK(ki); atomic_add_int(&num_queue_count, 1); atomic_add_int(&num_buf_aio, 1); bp->b_error = 0; TASK_INIT(&aiocbe->biotask, 0, biohelper, aiocbe); /* Perform transfer. */ dev_strategy_csw(dev, csw, bp); dev_relthread(dev, ref); return (0); doerror: AIO_LOCK(ki); ki->kaio_count--; ki->kaio_buffer_count--; if (lj) lj->lioj_count--; aiocbe->bp = NULL; AIO_UNLOCK(ki); relpbuf(bp, NULL); unref: dev_relthread(dev, ref); return (error); } /* * Wake up aio requests that may be serviceable now. */ static void aio_swake_cb(struct socket *so, struct sockbuf *sb) { struct aiocblist *cb, *cbn; int opcode; SOCKBUF_LOCK_ASSERT(sb); if (sb == &so->so_snd) opcode = LIO_WRITE; else opcode = LIO_READ; sb->sb_flags &= ~SB_AIO; mtx_lock(&aio_job_mtx); TAILQ_FOREACH_SAFE(cb, &so->so_aiojobq, list, cbn) { if (opcode == cb->uaiocb.aio_lio_opcode) { if (cb->jobstate != JOBST_JOBQSOCK) panic("invalid queue value"); /* XXX * We don't have actual sockets backend yet, * so we simply move the requests to the generic * file I/O backend. */ TAILQ_REMOVE(&so->so_aiojobq, cb, list); TAILQ_INSERT_TAIL(&aio_jobs, cb, list); aio_kick_nowait(cb->userproc); } } mtx_unlock(&aio_job_mtx); } static int convert_old_sigevent(struct osigevent *osig, struct sigevent *nsig) { /* * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are * supported by AIO with the old sigevent structure. */ nsig->sigev_notify = osig->sigev_notify; switch (nsig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_SIGNAL: nsig->sigev_signo = osig->__sigev_u.__sigev_signo; break; case SIGEV_KEVENT: nsig->sigev_notify_kqueue = osig->__sigev_u.__sigev_notify_kqueue; nsig->sigev_value.sival_ptr = osig->sigev_value.sival_ptr; break; default: return (EINVAL); } return (0); } static int aiocb_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) { struct oaiocb *ojob; int error; bzero(kjob, sizeof(struct aiocb)); error = copyin(ujob, kjob, sizeof(struct oaiocb)); if (error) return (error); ojob = (struct oaiocb *)kjob; return (convert_old_sigevent(&ojob->aio_sigevent, &kjob->aio_sigevent)); } static int aiocb_copyin(struct aiocb *ujob, struct aiocb *kjob) { return (copyin(ujob, kjob, sizeof(struct aiocb))); } static long aiocb_fetch_status(struct aiocb *ujob) { return (fuword(&ujob->_aiocb_private.status)); } static long aiocb_fetch_error(struct aiocb *ujob) { return (fuword(&ujob->_aiocb_private.error)); } static int aiocb_store_status(struct aiocb *ujob, long status) { return (suword(&ujob->_aiocb_private.status, status)); } static int aiocb_store_error(struct aiocb *ujob, long error) { return (suword(&ujob->_aiocb_private.error, error)); } static int aiocb_store_kernelinfo(struct aiocb *ujob, long jobref) { return (suword(&ujob->_aiocb_private.kernelinfo, jobref)); } static int aiocb_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { return (suword(ujobp, (long)ujob)); } static struct aiocb_ops aiocb_ops = { .copyin = aiocb_copyin, .fetch_status = aiocb_fetch_status, .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; static struct aiocb_ops aiocb_ops_osigevent = { .copyin = aiocb_copyin_old_sigevent, .fetch_status = aiocb_fetch_status, .fetch_error = aiocb_fetch_error, .store_status = aiocb_store_status, .store_error = aiocb_store_error, .store_kernelinfo = aiocb_store_kernelinfo, .store_aiocb = aiocb_store_aiocb, }; /* * Queue a new AIO request. Choosing either the threaded or direct physio VCHR * technique is done in this code. */ int aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, int type, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct file *fp; struct socket *so; struct aiocblist *aiocbe, *cb; struct kaioinfo *ki; struct kevent kev; struct sockbuf *sb; int opcode; int error; int fd, kqfd; int jid; u_short evflags; if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; ops->store_status(job, -1); ops->store_error(job, 0); ops->store_kernelinfo(job, -1); if (num_queue_count >= max_queue_count || ki->kaio_count >= ki->kaio_qallowed_count) { ops->store_error(job, EAGAIN); return (EAGAIN); } aiocbe = uma_zalloc(aiocb_zone, M_WAITOK | M_ZERO); aiocbe->inputcharge = 0; aiocbe->outputcharge = 0; knlist_init_mtx(&aiocbe->klist, AIO_MTX(ki)); error = ops->copyin(job, &aiocbe->uaiocb); if (error) { ops->store_error(job, error); uma_zfree(aiocb_zone, aiocbe); return (error); } /* XXX: aio_nbytes is later casted to signed types. */ if (aiocbe->uaiocb.aio_nbytes > INT_MAX) { uma_zfree(aiocb_zone, aiocbe); return (EINVAL); } if (aiocbe->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT && aiocbe->uaiocb.aio_sigevent.sigev_notify != SIGEV_SIGNAL && aiocbe->uaiocb.aio_sigevent.sigev_notify != SIGEV_THREAD_ID && aiocbe->uaiocb.aio_sigevent.sigev_notify != SIGEV_NONE) { ops->store_error(job, EINVAL); uma_zfree(aiocb_zone, aiocbe); return (EINVAL); } if ((aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_SIGNAL || aiocbe->uaiocb.aio_sigevent.sigev_notify == SIGEV_THREAD_ID) && !_SIG_VALID(aiocbe->uaiocb.aio_sigevent.sigev_signo)) { uma_zfree(aiocb_zone, aiocbe); return (EINVAL); } ksiginfo_init(&aiocbe->ksi); /* Save userspace address of the job info. */ aiocbe->uuaiocb = job; /* Get the opcode. */ if (type != LIO_NOP) aiocbe->uaiocb.aio_lio_opcode = type; opcode = aiocbe->uaiocb.aio_lio_opcode; /* * Validate the opcode and fetch the file object for the specified * file descriptor. * * XXXRW: Moved the opcode validation up here so that we don't * retrieve a file descriptor without knowing what the capabiltity * should be. */ fd = aiocbe->uaiocb.aio_fildes; switch (opcode) { case LIO_WRITE: error = fget_write(td, fd, CAP_PWRITE, &fp); break; case LIO_READ: error = fget_read(td, fd, CAP_PREAD, &fp); break; case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; case LIO_MLOCK: fp = NULL; break; case LIO_NOP: error = fget(td, fd, CAP_NONE, &fp); break; default: error = EINVAL; } if (error) { uma_zfree(aiocb_zone, aiocbe); ops->store_error(job, error); return (error); } if (opcode == LIO_SYNC && fp->f_vnode == NULL) { error = EINVAL; goto aqueue_fail; } if (opcode != LIO_SYNC && aiocbe->uaiocb.aio_offset == -1LL) { error = EINVAL; goto aqueue_fail; } aiocbe->fd_file = fp; mtx_lock(&aio_job_mtx); jid = jobrefid++; aiocbe->seqno = jobseqno++; mtx_unlock(&aio_job_mtx); error = ops->store_kernelinfo(job, jid); if (error) { error = EINVAL; goto aqueue_fail; } aiocbe->uaiocb._aiocb_private.kernelinfo = (void *)(intptr_t)jid; if (opcode == LIO_NOP) { fdrop(fp, td); uma_zfree(aiocb_zone, aiocbe); return (0); } if (aiocbe->uaiocb.aio_sigevent.sigev_notify != SIGEV_KEVENT) goto no_kqueue; evflags = aiocbe->uaiocb.aio_sigevent.sigev_notify_kevent_flags; if ((evflags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0) { error = EINVAL; goto aqueue_fail; } kqfd = aiocbe->uaiocb.aio_sigevent.sigev_notify_kqueue; kev.ident = (uintptr_t)aiocbe->uuaiocb; kev.filter = EVFILT_AIO; kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | evflags; kev.data = (intptr_t)aiocbe; kev.udata = aiocbe->uaiocb.aio_sigevent.sigev_value.sival_ptr; error = kqfd_register(kqfd, &kev, td, 1); aqueue_fail: if (error) { if (fp) fdrop(fp, td); uma_zfree(aiocb_zone, aiocbe); ops->store_error(job, error); goto done; } no_kqueue: ops->store_error(job, EINPROGRESS); aiocbe->uaiocb._aiocb_private.error = EINPROGRESS; aiocbe->userproc = p; aiocbe->cred = crhold(td->td_ucred); aiocbe->jobflags = 0; aiocbe->lio = lj; if (opcode == LIO_SYNC) goto queueit; if (fp && fp->f_type == DTYPE_SOCKET) { /* * Alternate queueing for socket ops: Reach down into the * descriptor to get the socket data. Then check to see if the * socket is ready to be read or written (based on the requested * operation). * * If it is not ready for io, then queue the aiocbe on the * socket, and set the flags so we get a call when sbnotify() * happens. * * Note if opcode is neither LIO_WRITE nor LIO_READ we lock * and unlock the snd sockbuf for no reason. */ so = fp->f_data; sb = (opcode == LIO_READ) ? &so->so_rcv : &so->so_snd; SOCKBUF_LOCK(sb); if (((opcode == LIO_READ) && (!soreadable(so))) || ((opcode == LIO_WRITE) && (!sowriteable(so)))) { sb->sb_flags |= SB_AIO; mtx_lock(&aio_job_mtx); TAILQ_INSERT_TAIL(&so->so_aiojobq, aiocbe, list); mtx_unlock(&aio_job_mtx); AIO_LOCK(ki); TAILQ_INSERT_TAIL(&ki->kaio_all, aiocbe, allist); TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); aiocbe->jobstate = JOBST_JOBQSOCK; ki->kaio_count++; if (lj) lj->lioj_count++; AIO_UNLOCK(ki); SOCKBUF_UNLOCK(sb); atomic_add_int(&num_queue_count, 1); error = 0; goto done; } SOCKBUF_UNLOCK(sb); } if ((error = aio_qphysio(p, aiocbe)) == 0) goto done; #if 0 if (error > 0) { aiocbe->uaiocb._aiocb_private.error = error; ops->store_error(job, error); goto done; } #endif queueit: /* No buffer for daemon I/O. */ aiocbe->bp = NULL; atomic_add_int(&num_queue_count, 1); AIO_LOCK(ki); ki->kaio_count++; if (lj) lj->lioj_count++; TAILQ_INSERT_TAIL(&ki->kaio_jobqueue, aiocbe, plist); TAILQ_INSERT_TAIL(&ki->kaio_all, aiocbe, allist); if (opcode == LIO_SYNC) { TAILQ_FOREACH(cb, &ki->kaio_jobqueue, plist) { if (cb->fd_file == aiocbe->fd_file && cb->uaiocb.aio_lio_opcode != LIO_SYNC && cb->seqno < aiocbe->seqno) { cb->jobflags |= AIOCBLIST_CHECKSYNC; aiocbe->pending++; } } TAILQ_FOREACH(cb, &ki->kaio_bufqueue, plist) { if (cb->fd_file == aiocbe->fd_file && cb->uaiocb.aio_lio_opcode != LIO_SYNC && cb->seqno < aiocbe->seqno) { cb->jobflags |= AIOCBLIST_CHECKSYNC; aiocbe->pending++; } } if (aiocbe->pending != 0) { TAILQ_INSERT_TAIL(&ki->kaio_syncqueue, aiocbe, list); aiocbe->jobstate = JOBST_JOBQSYNC; AIO_UNLOCK(ki); goto done; } } mtx_lock(&aio_job_mtx); TAILQ_INSERT_TAIL(&aio_jobs, aiocbe, list); aiocbe->jobstate = JOBST_JOBQGLOBAL; aio_kick_nowait(p); mtx_unlock(&aio_job_mtx); AIO_UNLOCK(ki); error = 0; done: return (error); } static void aio_kick_nowait(struct proc *userp) { struct kaioinfo *ki = userp->p_aioinfo; struct aiothreadlist *aiop; mtx_assert(&aio_job_mtx, MA_OWNED); if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aiothreadflags &= ~AIOP_FREE; wakeup(aiop->aiothread); } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && ((ki->kaio_active_count + num_aio_resv_start) < ki->kaio_maxactive_count)) { taskqueue_enqueue(taskqueue_aiod_bio, &ki->kaio_task); } } static int aio_kick(struct proc *userp) { struct kaioinfo *ki = userp->p_aioinfo; struct aiothreadlist *aiop; int error, ret = 0; mtx_assert(&aio_job_mtx, MA_OWNED); retryproc: if ((aiop = TAILQ_FIRST(&aio_freeproc)) != NULL) { TAILQ_REMOVE(&aio_freeproc, aiop, list); aiop->aiothreadflags &= ~AIOP_FREE; wakeup(aiop->aiothread); } else if (((num_aio_resv_start + num_aio_procs) < max_aio_procs) && ((ki->kaio_active_count + num_aio_resv_start) < ki->kaio_maxactive_count)) { num_aio_resv_start++; mtx_unlock(&aio_job_mtx); error = aio_newproc(&num_aio_resv_start); mtx_lock(&aio_job_mtx); if (error) { num_aio_resv_start--; goto retryproc; } } else { ret = -1; } return (ret); } static void aio_kick_helper(void *context, int pending) { struct proc *userp = context; mtx_lock(&aio_job_mtx); while (--pending >= 0) { if (aio_kick(userp)) break; } mtx_unlock(&aio_job_mtx); } /* * Support the aio_return system call, as a side-effect, kernel resources are * released. */ static int kern_aio_return(struct thread *td, struct aiocb *uaiocb, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct aiocblist *cb; struct kaioinfo *ki; int status, error; ki = p->p_aioinfo; if (ki == NULL) return (EINVAL); AIO_LOCK(ki); TAILQ_FOREACH(cb, &ki->kaio_done, plist) { if (cb->uuaiocb == uaiocb) break; } if (cb != NULL) { MPASS(cb->jobstate == JOBST_JOBFINISHED); status = cb->uaiocb._aiocb_private.status; error = cb->uaiocb._aiocb_private.error; td->td_retval[0] = status; if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { td->td_ru.ru_oublock += cb->outputcharge; cb->outputcharge = 0; } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { td->td_ru.ru_inblock += cb->inputcharge; cb->inputcharge = 0; } aio_free_entry(cb); AIO_UNLOCK(ki); ops->store_error(uaiocb, error); ops->store_status(uaiocb, status); } else { error = EINVAL; AIO_UNLOCK(ki); } return (error); } int sys_aio_return(struct thread *td, struct aio_return_args *uap) { return (kern_aio_return(td, uap->aiocbp, &aiocb_ops)); } /* * Allow a process to wakeup when any of the I/O requests are completed. */ static int kern_aio_suspend(struct thread *td, int njoblist, struct aiocb **ujoblist, struct timespec *ts) { struct proc *p = td->td_proc; struct timeval atv; struct kaioinfo *ki; struct aiocblist *cb, *cbfirst; int error, i, timo; timo = 0; if (ts) { if (ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000) return (EINVAL); TIMESPEC_TO_TIMEVAL(&atv, ts); if (itimerfix(&atv)) return (EINVAL); timo = tvtohz(&atv); } ki = p->p_aioinfo; if (ki == NULL) return (EAGAIN); if (njoblist == 0) return (0); AIO_LOCK(ki); for (;;) { cbfirst = NULL; error = 0; TAILQ_FOREACH(cb, &ki->kaio_all, allist) { for (i = 0; i < njoblist; i++) { if (cb->uuaiocb == ujoblist[i]) { if (cbfirst == NULL) cbfirst = cb; if (cb->jobstate == JOBST_JOBFINISHED) goto RETURN; } } } /* All tasks were finished. */ if (cbfirst == NULL) break; ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiospn", timo); if (error == ERESTART) error = EINTR; if (error) break; } RETURN: AIO_UNLOCK(ki); return (error); } int sys_aio_suspend(struct thread *td, struct aio_suspend_args *uap) { struct timespec ts, *tsp; struct aiocb **ujoblist; int error; if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->timeout) { /* Get timespec struct. */ if ((error = copyin(uap->timeout, &ts, sizeof(ts))) != 0) return (error); tsp = &ts; } else tsp = NULL; ujoblist = uma_zalloc(aiol_zone, M_WAITOK); error = copyin(uap->aiocbp, ujoblist, uap->nent * sizeof(ujoblist[0])); if (error == 0) error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); uma_zfree(aiol_zone, ujoblist); return (error); } /* * aio_cancel cancels any non-physio aio operations not currently in * progress. */ int sys_aio_cancel(struct thread *td, struct aio_cancel_args *uap) { struct proc *p = td->td_proc; struct kaioinfo *ki; struct aiocblist *cbe, *cbn; struct file *fp; struct socket *so; int error; int remove; int cancelled = 0; int notcancelled = 0; struct vnode *vp; /* Lookup file object. */ error = fget(td, uap->fd, 0, &fp); if (error) return (error); ki = p->p_aioinfo; if (ki == NULL) goto done; if (fp->f_type == DTYPE_VNODE) { vp = fp->f_vnode; if (vn_isdisk(vp, &error)) { fdrop(fp, td); td->td_retval[0] = AIO_NOTCANCELED; return (0); } } AIO_LOCK(ki); TAILQ_FOREACH_SAFE(cbe, &ki->kaio_jobqueue, plist, cbn) { if ((uap->fd == cbe->uaiocb.aio_fildes) && ((uap->aiocbp == NULL) || (uap->aiocbp == cbe->uuaiocb))) { remove = 0; mtx_lock(&aio_job_mtx); if (cbe->jobstate == JOBST_JOBQGLOBAL) { TAILQ_REMOVE(&aio_jobs, cbe, list); remove = 1; } else if (cbe->jobstate == JOBST_JOBQSOCK) { MPASS(fp->f_type == DTYPE_SOCKET); so = fp->f_data; TAILQ_REMOVE(&so->so_aiojobq, cbe, list); remove = 1; } else if (cbe->jobstate == JOBST_JOBQSYNC) { TAILQ_REMOVE(&ki->kaio_syncqueue, cbe, list); remove = 1; } mtx_unlock(&aio_job_mtx); if (remove) { TAILQ_REMOVE(&ki->kaio_jobqueue, cbe, plist); cbe->uaiocb._aiocb_private.status = -1; cbe->uaiocb._aiocb_private.error = ECANCELED; aio_bio_done_notify(p, cbe, DONE_QUEUE); cancelled++; } else { notcancelled++; } if (uap->aiocbp != NULL) break; } } AIO_UNLOCK(ki); done: fdrop(fp, td); if (uap->aiocbp != NULL) { if (cancelled) { td->td_retval[0] = AIO_CANCELED; return (0); } } if (notcancelled) { td->td_retval[0] = AIO_NOTCANCELED; return (0); } if (cancelled) { td->td_retval[0] = AIO_CANCELED; return (0); } td->td_retval[0] = AIO_ALLDONE; return (0); } /* * aio_error is implemented in the kernel level for compatibility purposes * only. For a user mode async implementation, it would be best to do it in * a userland subroutine. */ static int kern_aio_error(struct thread *td, struct aiocb *aiocbp, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct aiocblist *cb; struct kaioinfo *ki; int status; ki = p->p_aioinfo; if (ki == NULL) { td->td_retval[0] = EINVAL; return (0); } AIO_LOCK(ki); TAILQ_FOREACH(cb, &ki->kaio_all, allist) { if (cb->uuaiocb == aiocbp) { if (cb->jobstate == JOBST_JOBFINISHED) td->td_retval[0] = cb->uaiocb._aiocb_private.error; else td->td_retval[0] = EINPROGRESS; AIO_UNLOCK(ki); return (0); } } AIO_UNLOCK(ki); /* * Hack for failure of aio_aqueue. */ status = ops->fetch_status(aiocbp); if (status == -1) { td->td_retval[0] = ops->fetch_error(aiocbp); return (0); } td->td_retval[0] = EINVAL; return (0); } int sys_aio_error(struct thread *td, struct aio_error_args *uap) { return (kern_aio_error(td, uap->aiocbp, &aiocb_ops)); } /* syscall - asynchronous read from a file (REALTIME) */ int sys_oaio_read(struct thread *td, struct oaio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb_ops_osigevent)); } int sys_aio_read(struct thread *td, struct aio_read_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_READ, &aiocb_ops)); } /* syscall - asynchronous write to a file (REALTIME) */ int sys_oaio_write(struct thread *td, struct oaio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops_osigevent)); } int sys_aio_write(struct thread *td, struct aio_write_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); } int sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) { return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); } static int kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, struct aiocb **acb_list, int nent, struct sigevent *sig, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct aiocb *iocb; struct kaioinfo *ki; struct aioliojob *lj; struct kevent kev; int error; int nerror; int i; if ((mode != LIO_NOWAIT) && (mode != LIO_WAIT)) return (EINVAL); if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; lj = uma_zalloc(aiolio_zone, M_WAITOK); lj->lioj_flags = 0; lj->lioj_count = 0; lj->lioj_finished_count = 0; knlist_init_mtx(&lj->klist, AIO_MTX(ki)); ksiginfo_init(&lj->lioj_ksi); /* * Setup signal. */ if (sig && (mode == LIO_NOWAIT)) { bcopy(sig, &lj->lioj_signal, sizeof(lj->lioj_signal)); if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { /* Assume only new style KEVENT */ kev.filter = EVFILT_LIO; kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1; kev.ident = (uintptr_t)uacb_list; /* something unique */ kev.data = (intptr_t)lj; /* pass user defined sigval data */ kev.udata = lj->lioj_signal.sigev_value.sival_ptr; error = kqfd_register( lj->lioj_signal.sigev_notify_kqueue, &kev, td, 1); if (error) { uma_zfree(aiolio_zone, lj); return (error); } } else if (lj->lioj_signal.sigev_notify == SIGEV_NONE) { ; } else if (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID) { if (!_SIG_VALID(lj->lioj_signal.sigev_signo)) { uma_zfree(aiolio_zone, lj); return EINVAL; } lj->lioj_flags |= LIOJ_SIGNAL; } else { uma_zfree(aiolio_zone, lj); return EINVAL; } } AIO_LOCK(ki); TAILQ_INSERT_TAIL(&ki->kaio_liojoblist, lj, lioj_list); /* * Add extra aiocb count to avoid the lio to be freed * by other threads doing aio_waitcomplete or aio_return, * and prevent event from being sent until we have queued * all tasks. */ lj->lioj_count = 1; AIO_UNLOCK(ki); /* * Get pointers to the list of I/O requests. */ nerror = 0; for (i = 0; i < nent; i++) { iocb = acb_list[i]; if (iocb != NULL) { error = aio_aqueue(td, iocb, lj, LIO_NOP, ops); if (error != 0) nerror++; } } error = 0; AIO_LOCK(ki); if (mode == LIO_WAIT) { while (lj->lioj_count - 1 != lj->lioj_finished_count) { ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiospn", 0); if (error == ERESTART) error = EINTR; if (error) break; } } else { if (lj->lioj_count - 1 == lj->lioj_finished_count) { if (lj->lioj_signal.sigev_notify == SIGEV_KEVENT) { lj->lioj_flags |= LIOJ_KEVENT_POSTED; KNOTE_LOCKED(&lj->klist, 1); } if ((lj->lioj_flags & (LIOJ_SIGNAL|LIOJ_SIGNAL_POSTED)) == LIOJ_SIGNAL && (lj->lioj_signal.sigev_notify == SIGEV_SIGNAL || lj->lioj_signal.sigev_notify == SIGEV_THREAD_ID)) { aio_sendsig(p, &lj->lioj_signal, &lj->lioj_ksi); lj->lioj_flags |= LIOJ_SIGNAL_POSTED; } } } lj->lioj_count--; if (lj->lioj_count == 0) { TAILQ_REMOVE(&ki->kaio_liojoblist, lj, lioj_list); knlist_delete(&lj->klist, curthread, 1); PROC_LOCK(p); sigqueue_take(&lj->lioj_ksi); PROC_UNLOCK(p); AIO_UNLOCK(ki); uma_zfree(aiolio_zone, lj); } else AIO_UNLOCK(ki); if (nerror) return (EIO); return (error); } /* syscall - list directed I/O (REALTIME) */ int sys_olio_listio(struct thread *td, struct olio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct osigevent osig; int error, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &osig, sizeof(osig)); if (error) return (error); error = convert_old_sigevent(&osig, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); if (error == 0) error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb_ops_osigevent); free(acb_list, M_LIO); return (error); } /* syscall - list directed I/O (REALTIME) */ int sys_lio_listio(struct thread *td, struct lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; int error, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &sig, sizeof(sig)); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list, nent * sizeof(acb_list[0])); if (error == 0) error = kern_lio_listio(td, uap->mode, uap->acb_list, acb_list, nent, sigp, &aiocb_ops); free(acb_list, M_LIO); return (error); } /* * Called from interrupt thread for physio, we should return as fast * as possible, so we schedule a biohelper task. */ static void aio_physwakeup(struct buf *bp) { struct aiocblist *aiocbe; aiocbe = (struct aiocblist *)bp->b_caller1; taskqueue_enqueue(taskqueue_aiod_bio, &aiocbe->biotask); } /* * Task routine to perform heavy tasks, process wakeup, and signals. */ static void biohelper(void *context, int pending) { struct aiocblist *aiocbe = context; struct buf *bp; struct proc *userp; struct kaioinfo *ki; int nblks; bp = aiocbe->bp; userp = aiocbe->userproc; ki = userp->p_aioinfo; AIO_LOCK(ki); aiocbe->uaiocb._aiocb_private.status -= bp->b_resid; aiocbe->uaiocb._aiocb_private.error = 0; if (bp->b_ioflags & BIO_ERROR) aiocbe->uaiocb._aiocb_private.error = bp->b_error; nblks = btodb(aiocbe->uaiocb.aio_nbytes); if (aiocbe->uaiocb.aio_lio_opcode == LIO_WRITE) aiocbe->outputcharge += nblks; else aiocbe->inputcharge += nblks; aiocbe->bp = NULL; TAILQ_REMOVE(&userp->p_aioinfo->kaio_bufqueue, aiocbe, plist); ki->kaio_buffer_count--; aio_bio_done_notify(userp, aiocbe, DONE_BUF); AIO_UNLOCK(ki); /* Release mapping into kernel space. */ vunmapbuf(bp); relpbuf(bp, NULL); atomic_subtract_int(&num_buf_aio, 1); } /* syscall - wait for the next completion of an aio request */ static int kern_aio_waitcomplete(struct thread *td, struct aiocb **aiocbp, struct timespec *ts, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct timeval atv; struct kaioinfo *ki; struct aiocblist *cb; struct aiocb *uuaiocb; int error, status, timo; ops->store_aiocb(aiocbp, NULL); timo = 0; if (ts) { if ((ts->tv_nsec < 0) || (ts->tv_nsec >= 1000000000)) return (EINVAL); TIMESPEC_TO_TIMEVAL(&atv, ts); if (itimerfix(&atv)) return (EINVAL); timo = tvtohz(&atv); } if (p->p_aioinfo == NULL) aio_init_aioinfo(p); ki = p->p_aioinfo; error = 0; cb = NULL; AIO_LOCK(ki); while ((cb = TAILQ_FIRST(&ki->kaio_done)) == NULL) { ki->kaio_flags |= KAIO_WAKEUP; error = msleep(&p->p_aioinfo, AIO_MTX(ki), PRIBIO | PCATCH, "aiowc", timo); if (timo && error == ERESTART) error = EINTR; if (error) break; } if (cb != NULL) { MPASS(cb->jobstate == JOBST_JOBFINISHED); uuaiocb = cb->uuaiocb; status = cb->uaiocb._aiocb_private.status; error = cb->uaiocb._aiocb_private.error; td->td_retval[0] = status; if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { td->td_ru.ru_oublock += cb->outputcharge; cb->outputcharge = 0; } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { td->td_ru.ru_inblock += cb->inputcharge; cb->inputcharge = 0; } aio_free_entry(cb); AIO_UNLOCK(ki); ops->store_aiocb(aiocbp, uuaiocb); ops->store_error(uuaiocb, error); ops->store_status(uuaiocb, status); } else AIO_UNLOCK(ki); return (error); } int sys_aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) { struct timespec ts, *tsp; int error; if (uap->timeout) { /* Get timespec struct. */ error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) return (error); tsp = &ts; } else tsp = NULL; return (kern_aio_waitcomplete(td, uap->aiocbp, tsp, &aiocb_ops)); } static int kern_aio_fsync(struct thread *td, int op, struct aiocb *aiocbp, struct aiocb_ops *ops) { struct proc *p = td->td_proc; struct kaioinfo *ki; if (op != O_SYNC) /* XXX lack of O_DSYNC */ return (EINVAL); ki = p->p_aioinfo; if (ki == NULL) aio_init_aioinfo(p); return (aio_aqueue(td, aiocbp, NULL, LIO_SYNC, ops)); } int sys_aio_fsync(struct thread *td, struct aio_fsync_args *uap) { return (kern_aio_fsync(td, uap->op, uap->aiocbp, &aiocb_ops)); } /* kqueue attach function */ static int filt_aioattach(struct knote *kn) { struct aiocblist *aiocbe = (struct aiocblist *)kn->kn_sdata; /* * The aiocbe pointer must be validated before using it, so * registration is restricted to the kernel; the user cannot * set EV_FLAG1. */ if ((kn->kn_flags & EV_FLAG1) == 0) return (EPERM); kn->kn_ptr.p_aio = aiocbe; kn->kn_flags &= ~EV_FLAG1; knlist_add(&aiocbe->klist, kn, 0); return (0); } /* kqueue detach function */ static void filt_aiodetach(struct knote *kn) { struct knlist *knl; knl = &kn->kn_ptr.p_aio->klist; knl->kl_lock(knl->kl_lockarg); if (!knlist_empty(knl)) knlist_remove(knl, kn, 1); knl->kl_unlock(knl->kl_lockarg); } /* kqueue filter function */ /*ARGSUSED*/ static int filt_aio(struct knote *kn, long hint) { struct aiocblist *aiocbe = kn->kn_ptr.p_aio; kn->kn_data = aiocbe->uaiocb._aiocb_private.error; if (aiocbe->jobstate != JOBST_JOBFINISHED) return (0); kn->kn_flags |= EV_EOF; return (1); } /* kqueue attach function */ static int filt_lioattach(struct knote *kn) { struct aioliojob * lj = (struct aioliojob *)kn->kn_sdata; /* * The aioliojob pointer must be validated before using it, so * registration is restricted to the kernel; the user cannot * set EV_FLAG1. */ if ((kn->kn_flags & EV_FLAG1) == 0) return (EPERM); kn->kn_ptr.p_lio = lj; kn->kn_flags &= ~EV_FLAG1; knlist_add(&lj->klist, kn, 0); return (0); } /* kqueue detach function */ static void filt_liodetach(struct knote *kn) { struct knlist *knl; knl = &kn->kn_ptr.p_lio->klist; knl->kl_lock(knl->kl_lockarg); if (!knlist_empty(knl)) knlist_remove(knl, kn, 1); knl->kl_unlock(knl->kl_lockarg); } /* kqueue filter function */ /*ARGSUSED*/ static int filt_lio(struct knote *kn, long hint) { struct aioliojob * lj = kn->kn_ptr.p_lio; return (lj->lioj_flags & LIOJ_KEVENT_POSTED); } #ifdef COMPAT_FREEBSD32 struct __aiocb_private32 { int32_t status; int32_t error; uint32_t kernelinfo; }; typedef struct oaiocb32 { int aio_fildes; /* File descriptor */ uint64_t aio_offset __packed; /* File offset for I/O */ uint32_t aio_buf; /* I/O buffer in process space */ uint32_t aio_nbytes; /* Number of bytes for I/O */ struct osigevent32 aio_sigevent; /* Signal to deliver */ int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private32 _aiocb_private; } oaiocb32_t; typedef struct aiocb32 { int32_t aio_fildes; /* File descriptor */ uint64_t aio_offset __packed; /* File offset for I/O */ uint32_t aio_buf; /* I/O buffer in process space */ uint32_t aio_nbytes; /* Number of bytes for I/O */ int __spare__[2]; uint32_t __spare2__; int aio_lio_opcode; /* LIO opcode */ int aio_reqprio; /* Request priority -- ignored */ struct __aiocb_private32 _aiocb_private; struct sigevent32 aio_sigevent; /* Signal to deliver */ } aiocb32_t; static int convert_old_sigevent32(struct osigevent32 *osig, struct sigevent *nsig) { /* * Only SIGEV_NONE, SIGEV_SIGNAL, and SIGEV_KEVENT are * supported by AIO with the old sigevent structure. */ CP(*osig, *nsig, sigev_notify); switch (nsig->sigev_notify) { case SIGEV_NONE: break; case SIGEV_SIGNAL: nsig->sigev_signo = osig->__sigev_u.__sigev_signo; break; case SIGEV_KEVENT: nsig->sigev_notify_kqueue = osig->__sigev_u.__sigev_notify_kqueue; PTRIN_CP(*osig, *nsig, sigev_value.sival_ptr); break; default: return (EINVAL); } return (0); } static int aiocb32_copyin_old_sigevent(struct aiocb *ujob, struct aiocb *kjob) { struct oaiocb32 job32; int error; bzero(kjob, sizeof(struct aiocb)); error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); CP(job32, *kjob, aio_fildes); CP(job32, *kjob, aio_offset); PTRIN_CP(job32, *kjob, aio_buf); CP(job32, *kjob, aio_nbytes); CP(job32, *kjob, aio_lio_opcode); CP(job32, *kjob, aio_reqprio); CP(job32, *kjob, _aiocb_private.status); CP(job32, *kjob, _aiocb_private.error); PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); return (convert_old_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); } static int -convert_sigevent32(struct sigevent32 *sig32, struct sigevent *sig) -{ - - CP(*sig32, *sig, sigev_notify); - switch (sig->sigev_notify) { - case SIGEV_NONE: - break; - case SIGEV_THREAD_ID: - CP(*sig32, *sig, sigev_notify_thread_id); - /* FALLTHROUGH */ - case SIGEV_SIGNAL: - CP(*sig32, *sig, sigev_signo); - break; - case SIGEV_KEVENT: - CP(*sig32, *sig, sigev_notify_kqueue); - CP(*sig32, *sig, sigev_notify_kevent_flags); - PTRIN_CP(*sig32, *sig, sigev_value.sival_ptr); - break; - default: - return (EINVAL); - } - return (0); -} - -static int aiocb32_copyin(struct aiocb *ujob, struct aiocb *kjob) { struct aiocb32 job32; int error; error = copyin(ujob, &job32, sizeof(job32)); if (error) return (error); CP(job32, *kjob, aio_fildes); CP(job32, *kjob, aio_offset); PTRIN_CP(job32, *kjob, aio_buf); CP(job32, *kjob, aio_nbytes); CP(job32, *kjob, aio_lio_opcode); CP(job32, *kjob, aio_reqprio); CP(job32, *kjob, _aiocb_private.status); CP(job32, *kjob, _aiocb_private.error); PTRIN_CP(job32, *kjob, _aiocb_private.kernelinfo); return (convert_sigevent32(&job32.aio_sigevent, &kjob->aio_sigevent)); } static long aiocb32_fetch_status(struct aiocb *ujob) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (fuword32(&ujob32->_aiocb_private.status)); } static long aiocb32_fetch_error(struct aiocb *ujob) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (fuword32(&ujob32->_aiocb_private.error)); } static int aiocb32_store_status(struct aiocb *ujob, long status) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.status, status)); } static int aiocb32_store_error(struct aiocb *ujob, long error) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.error, error)); } static int aiocb32_store_kernelinfo(struct aiocb *ujob, long jobref) { struct aiocb32 *ujob32; ujob32 = (struct aiocb32 *)ujob; return (suword32(&ujob32->_aiocb_private.kernelinfo, jobref)); } static int aiocb32_store_aiocb(struct aiocb **ujobp, struct aiocb *ujob) { return (suword32(ujobp, (long)ujob)); } static struct aiocb_ops aiocb32_ops = { .copyin = aiocb32_copyin, .fetch_status = aiocb32_fetch_status, .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; static struct aiocb_ops aiocb32_ops_osigevent = { .copyin = aiocb32_copyin_old_sigevent, .fetch_status = aiocb32_fetch_status, .fetch_error = aiocb32_fetch_error, .store_status = aiocb32_store_status, .store_error = aiocb32_store_error, .store_kernelinfo = aiocb32_store_kernelinfo, .store_aiocb = aiocb32_store_aiocb, }; int freebsd32_aio_return(struct thread *td, struct freebsd32_aio_return_args *uap) { return (kern_aio_return(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } int freebsd32_aio_suspend(struct thread *td, struct freebsd32_aio_suspend_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; struct aiocb **ujoblist; uint32_t *ujoblist32; int error, i; if (uap->nent < 0 || uap->nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->timeout) { /* Get timespec struct. */ if ((error = copyin(uap->timeout, &ts32, sizeof(ts32))) != 0) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; ujoblist = uma_zalloc(aiol_zone, M_WAITOK); ujoblist32 = (uint32_t *)ujoblist; error = copyin(uap->aiocbp, ujoblist32, uap->nent * sizeof(ujoblist32[0])); if (error == 0) { for (i = uap->nent; i > 0; i--) ujoblist[i] = PTRIN(ujoblist32[i]); error = kern_aio_suspend(td, uap->nent, ujoblist, tsp); } uma_zfree(aiol_zone, ujoblist); return (error); } int freebsd32_aio_cancel(struct thread *td, struct freebsd32_aio_cancel_args *uap) { return (sys_aio_cancel(td, (struct aio_cancel_args *)uap)); } int freebsd32_aio_error(struct thread *td, struct freebsd32_aio_error_args *uap) { return (kern_aio_error(td, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } int freebsd32_oaio_read(struct thread *td, struct freebsd32_oaio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb32_ops_osigevent)); } int freebsd32_aio_read(struct thread *td, struct freebsd32_aio_read_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_READ, &aiocb32_ops)); } int freebsd32_oaio_write(struct thread *td, struct freebsd32_oaio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb32_ops_osigevent)); } int freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_WRITE, &aiocb32_ops)); } int freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) { return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, &aiocb32_ops)); } int freebsd32_aio_waitcomplete(struct thread *td, struct freebsd32_aio_waitcomplete_args *uap) { struct timespec32 ts32; struct timespec ts, *tsp; int error; if (uap->timeout) { /* Get timespec struct. */ error = copyin(uap->timeout, &ts32, sizeof(ts32)); if (error) return (error); CP(ts32, ts, tv_sec); CP(ts32, ts, tv_nsec); tsp = &ts; } else tsp = NULL; return (kern_aio_waitcomplete(td, (struct aiocb **)uap->aiocbp, tsp, &aiocb32_ops)); } int freebsd32_aio_fsync(struct thread *td, struct freebsd32_aio_fsync_args *uap) { return (kern_aio_fsync(td, uap->op, (struct aiocb *)uap->aiocbp, &aiocb32_ops)); } int freebsd32_olio_listio(struct thread *td, struct freebsd32_olio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct osigevent32 osig; uint32_t *acb_list32; int error, i, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &osig, sizeof(osig)); if (error) return (error); error = convert_old_sigevent32(&osig, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); if (error) { free(acb_list32, M_LIO); return (error); } acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); for (i = 0; i < nent; i++) acb_list[i] = PTRIN(acb_list32[i]); free(acb_list32, M_LIO); error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb32_ops_osigevent); free(acb_list, M_LIO); return (error); } int freebsd32_lio_listio(struct thread *td, struct freebsd32_lio_listio_args *uap) { struct aiocb **acb_list; struct sigevent *sigp, sig; struct sigevent32 sig32; uint32_t *acb_list32; int error, i, nent; if ((uap->mode != LIO_NOWAIT) && (uap->mode != LIO_WAIT)) return (EINVAL); nent = uap->nent; if (nent < 0 || nent > AIO_LISTIO_MAX) return (EINVAL); if (uap->sig && (uap->mode == LIO_NOWAIT)) { error = copyin(uap->sig, &sig32, sizeof(sig32)); if (error) return (error); error = convert_sigevent32(&sig32, &sig); if (error) return (error); sigp = &sig; } else sigp = NULL; acb_list32 = malloc(sizeof(uint32_t) * nent, M_LIO, M_WAITOK); error = copyin(uap->acb_list, acb_list32, nent * sizeof(uint32_t)); if (error) { free(acb_list32, M_LIO); return (error); } acb_list = malloc(sizeof(struct aiocb *) * nent, M_LIO, M_WAITOK); for (i = 0; i < nent; i++) acb_list[i] = PTRIN(acb_list32[i]); free(acb_list32, M_LIO); error = kern_lio_listio(td, uap->mode, (struct aiocb * const *)uap->acb_list, acb_list, nent, sigp, &aiocb32_ops); free(acb_list, M_LIO); return (error); } #endif