Index: projects/clang700-import/lib/libc/amd64/string/bcopy.S =================================================================== --- projects/clang700-import/lib/libc/amd64/string/bcopy.S (revision 339014) +++ projects/clang700-import/lib/libc/amd64/string/bcopy.S (revision 339015) @@ -1,104 +1,103 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from locore.s. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #if 0 RCSID("$NetBSD: bcopy.S,v 1.2 2003/08/07 16:42:36 agc Exp $") #endif /* * (ov)bcopy (src,dst,cnt) * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ #ifdef MEMCOPY ENTRY(memcpy) #else #ifdef MEMMOVE ENTRY(memmove) #else ENTRY(bcopy) #endif #endif #if defined(MEMCOPY) || defined(MEMMOVE) movq %rdi,%rax /* return dst */ #else xchgq %rdi,%rsi #endif movq %rdx,%rcx movq %rdi,%r8 subq %rsi,%r8 cmpq %rcx,%r8 /* overlapping? */ jb 1f - cld /* nope, copy forwards. */ shrq $3,%rcx /* copy by words */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ jne 2f ret 2: rep movsb ret 1: addq %rcx,%rdi /* copy backwards. */ addq %rcx,%rsi std decq %rdi decq %rsi andq $7,%rcx /* any fractional bytes? */ je 3f rep movsb 3: movq %rdx,%rcx /* copy remainder by words */ shrq $3,%rcx subq $7,%rsi subq $7,%rdi rep movsq cld ret #ifdef MEMCOPY END(memcpy) #else #ifdef MEMMOVE END(memmove) #else END(bcopy) #endif #endif .section .note.GNU-stack,"",%progbits Index: projects/clang700-import/lib/libusb/libusb10_io.c =================================================================== --- projects/clang700-import/lib/libusb/libusb10_io.c (revision 339014) +++ projects/clang700-import/lib/libusb/libusb10_io.c (revision 339015) @@ -1,831 +1,837 @@ /* $FreeBSD$ */ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 2009 Sylvestre Gallon. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #ifdef LIBUSB_GLOBAL_INCLUDE_FILE #include LIBUSB_GLOBAL_INCLUDE_FILE #else #include #include #include #include #include #include #include #include #include #include #endif #define libusb_device_handle libusb20_device #include "libusb20.h" #include "libusb20_desc.h" #include "libusb20_int.h" #include "libusb.h" #include "libusb10.h" UNEXPORTED void libusb10_add_pollfd(libusb_context *ctx, struct libusb_super_pollfd *pollfd, struct libusb20_device *pdev, int fd, short events) { if (ctx == NULL) return; /* invalid */ if (pollfd->entry.tqe_prev != NULL) return; /* already queued */ if (fd < 0) return; /* invalid */ pollfd->pdev = pdev; pollfd->pollfd.fd = fd; pollfd->pollfd.events = events; CTX_LOCK(ctx); TAILQ_INSERT_TAIL(&ctx->pollfds, pollfd, entry); CTX_UNLOCK(ctx); if (ctx->fd_added_cb) ctx->fd_added_cb(fd, events, ctx->fd_cb_user_data); } UNEXPORTED void libusb10_remove_pollfd(libusb_context *ctx, struct libusb_super_pollfd *pollfd) { if (ctx == NULL) return; /* invalid */ if (pollfd->entry.tqe_prev == NULL) return; /* already dequeued */ CTX_LOCK(ctx); TAILQ_REMOVE(&ctx->pollfds, pollfd, entry); pollfd->entry.tqe_prev = NULL; CTX_UNLOCK(ctx); if (ctx->fd_removed_cb) ctx->fd_removed_cb(pollfd->pollfd.fd, ctx->fd_cb_user_data); } /* This function must be called locked */ static int libusb10_handle_events_sub(struct libusb_context *ctx, struct timeval *tv) { struct libusb_device *dev; struct libusb20_device **ppdev; struct libusb_super_pollfd *pfd; struct pollfd *fds; struct libusb_super_transfer *sxfer; struct libusb_transfer *uxfer; nfds_t nfds; int timeout; int i; int err; DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb10_handle_events_sub enter"); nfds = 0; i = 0; TAILQ_FOREACH(pfd, &ctx->pollfds, entry) nfds++; fds = alloca(sizeof(*fds) * nfds); if (fds == NULL) return (LIBUSB_ERROR_NO_MEM); ppdev = alloca(sizeof(*ppdev) * nfds); if (ppdev == NULL) return (LIBUSB_ERROR_NO_MEM); TAILQ_FOREACH(pfd, &ctx->pollfds, entry) { fds[i].fd = pfd->pollfd.fd; fds[i].events = pfd->pollfd.events; fds[i].revents = 0; ppdev[i] = pfd->pdev; if (pfd->pdev != NULL) libusb_get_device(pfd->pdev)->refcnt++; i++; } if (tv == NULL) timeout = -1; else timeout = (tv->tv_sec * 1000) + ((tv->tv_usec + 999) / 1000); CTX_UNLOCK(ctx); err = poll(fds, nfds, timeout); CTX_LOCK(ctx); if ((err == -1) && (errno == EINTR)) err = LIBUSB_ERROR_INTERRUPTED; else if (err < 0) err = LIBUSB_ERROR_IO; if (err < 1) { for (i = 0; i != (int)nfds; i++) { if (ppdev[i] != NULL) { CTX_UNLOCK(ctx); libusb_unref_device(libusb_get_device(ppdev[i])); CTX_LOCK(ctx); } } goto do_done; } for (i = 0; i != (int)nfds; i++) { if (ppdev[i] != NULL) { dev = libusb_get_device(ppdev[i]); if (fds[i].revents != 0) { err = libusb20_dev_process(ppdev[i]); if (err) { /* set device is gone */ dev->device_is_gone = 1; /* remove USB device from polling loop */ libusb10_remove_pollfd(dev->ctx, &dev->dev_poll); /* cancel all pending transfers */ libusb10_cancel_all_transfer_locked(ppdev[i], dev); } } CTX_UNLOCK(ctx); libusb_unref_device(dev); CTX_LOCK(ctx); } else { uint8_t dummy; while (read(fds[i].fd, &dummy, 1) == 1) ; } } err = 0; do_done: /* Do all done callbacks */ while ((sxfer = TAILQ_FIRST(&ctx->tr_done))) { uint8_t flags; TAILQ_REMOVE(&ctx->tr_done, sxfer, entry); sxfer->entry.tqe_prev = NULL; ctx->tr_done_ref++; CTX_UNLOCK(ctx); uxfer = (struct libusb_transfer *)( ((uint8_t *)sxfer) + sizeof(*sxfer)); /* Allow the callback to free the transfer itself. */ flags = uxfer->flags; if (uxfer->callback != NULL) (uxfer->callback) (uxfer); /* Check if the USB transfer should be automatically freed. */ if (flags & LIBUSB_TRANSFER_FREE_TRANSFER) libusb_free_transfer(uxfer); CTX_LOCK(ctx); ctx->tr_done_ref--; ctx->tr_done_gen++; } /* Wakeup other waiters */ pthread_cond_broadcast(&ctx->ctx_cond); return (err); } /* Polling and timing */ int libusb_try_lock_events(libusb_context *ctx) { int err; ctx = GET_CONTEXT(ctx); if (ctx == NULL) return (1); err = CTX_TRYLOCK(ctx); if (err) return (1); err = (ctx->ctx_handler != NO_THREAD); if (err) CTX_UNLOCK(ctx); else ctx->ctx_handler = pthread_self(); return (err); } void libusb_lock_events(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); CTX_LOCK(ctx); if (ctx->ctx_handler == NO_THREAD) ctx->ctx_handler = pthread_self(); } void libusb_unlock_events(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); if (ctx->ctx_handler == pthread_self()) { ctx->ctx_handler = NO_THREAD; pthread_cond_broadcast(&ctx->ctx_cond); } CTX_UNLOCK(ctx); } int libusb_event_handling_ok(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); return (ctx->ctx_handler == pthread_self()); } int libusb_event_handler_active(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); return (ctx->ctx_handler != NO_THREAD); } void libusb_lock_event_waiters(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); CTX_LOCK(ctx); } void libusb_unlock_event_waiters(libusb_context *ctx) { ctx = GET_CONTEXT(ctx); CTX_UNLOCK(ctx); } int libusb_wait_for_event(libusb_context *ctx, struct timeval *tv) { struct timespec ts; int err; ctx = GET_CONTEXT(ctx); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_wait_for_event enter"); if (tv == NULL) { pthread_cond_wait(&ctx->ctx_cond, &ctx->ctx_lock); + /* try to grab polling of actual events, if any */ + if (ctx->ctx_handler == NO_THREAD) + ctx->ctx_handler = pthread_self(); return (0); } err = clock_gettime(CLOCK_MONOTONIC, &ts); if (err < 0) return (LIBUSB_ERROR_OTHER); /* * The "tv" arguments points to a relative time structure and * not an absolute time structure. */ ts.tv_sec += tv->tv_sec; ts.tv_nsec += tv->tv_usec * 1000; if (ts.tv_nsec >= 1000000000) { ts.tv_nsec -= 1000000000; ts.tv_sec++; } err = pthread_cond_timedwait(&ctx->ctx_cond, &ctx->ctx_lock, &ts); + /* try to grab polling of actual events, if any */ + if (ctx->ctx_handler == NO_THREAD) + ctx->ctx_handler = pthread_self(); if (err == ETIMEDOUT) return (1); return (0); } int libusb_handle_events_timeout_completed(libusb_context *ctx, struct timeval *tv, int *completed) { int err = 0; ctx = GET_CONTEXT(ctx); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_handle_events_timeout_completed enter"); libusb_lock_events(ctx); while (1) { if (completed != NULL) { if (*completed != 0 || err != 0) break; } err = libusb_handle_events_locked(ctx, tv); if (completed == NULL) break; } libusb_unlock_events(ctx); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_handle_events_timeout_completed exit"); return (err); } int libusb_handle_events_completed(libusb_context *ctx, int *completed) { return (libusb_handle_events_timeout_completed(ctx, NULL, completed)); } int libusb_handle_events_timeout(libusb_context *ctx, struct timeval *tv) { return (libusb_handle_events_timeout_completed(ctx, tv, NULL)); } int libusb_handle_events(libusb_context *ctx) { return (libusb_handle_events_timeout_completed(ctx, NULL, NULL)); } int libusb_handle_events_locked(libusb_context *ctx, struct timeval *tv) { int err; ctx = GET_CONTEXT(ctx); if (libusb_event_handling_ok(ctx)) { err = libusb10_handle_events_sub(ctx, tv); } else { err = libusb_wait_for_event(ctx, tv); if (err != 0) err = LIBUSB_ERROR_TIMEOUT; } return (err); } int libusb_get_next_timeout(libusb_context *ctx, struct timeval *tv) { /* all timeouts are currently being done by the kernel */ timerclear(tv); return (0); } void libusb_set_pollfd_notifiers(libusb_context *ctx, libusb_pollfd_added_cb added_cb, libusb_pollfd_removed_cb removed_cb, void *user_data) { ctx = GET_CONTEXT(ctx); ctx->fd_added_cb = added_cb; ctx->fd_removed_cb = removed_cb; ctx->fd_cb_user_data = user_data; } const struct libusb_pollfd ** libusb_get_pollfds(libusb_context *ctx) { struct libusb_super_pollfd *pollfd; libusb_pollfd **ret; int i; ctx = GET_CONTEXT(ctx); CTX_LOCK(ctx); i = 0; TAILQ_FOREACH(pollfd, &ctx->pollfds, entry) i++; ret = calloc(i + 1, sizeof(struct libusb_pollfd *)); if (ret == NULL) goto done; i = 0; TAILQ_FOREACH(pollfd, &ctx->pollfds, entry) ret[i++] = &pollfd->pollfd; ret[i] = NULL; done: CTX_UNLOCK(ctx); return ((const struct libusb_pollfd **)ret); } /* Synchronous device I/O */ int libusb_control_transfer(libusb_device_handle *devh, uint8_t bmRequestType, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint8_t *data, uint16_t wLength, unsigned int timeout) { struct LIBUSB20_CONTROL_SETUP_DECODED req; int err; uint16_t actlen; if (devh == NULL) return (LIBUSB_ERROR_INVALID_PARAM); if ((wLength != 0) && (data == NULL)) return (LIBUSB_ERROR_INVALID_PARAM); LIBUSB20_INIT(LIBUSB20_CONTROL_SETUP, &req); req.bmRequestType = bmRequestType; req.bRequest = bRequest; req.wValue = wValue; req.wIndex = wIndex; req.wLength = wLength; err = libusb20_dev_request_sync(devh, &req, data, &actlen, timeout, 0); if (err == LIBUSB20_ERROR_PIPE) return (LIBUSB_ERROR_PIPE); else if (err == LIBUSB20_ERROR_TIMEOUT) return (LIBUSB_ERROR_TIMEOUT); else if (err) return (LIBUSB_ERROR_NO_DEVICE); return (actlen); } static libusb_context * libusb10_get_context_by_device_handle(libusb_device_handle *devh) { libusb_context *ctx; if (devh != NULL) ctx = libusb_get_device(devh)->ctx; else ctx = NULL; return (GET_CONTEXT(ctx)); } static void libusb10_do_transfer_cb(struct libusb_transfer *transfer) { libusb_context *ctx; int *pdone; ctx = libusb10_get_context_by_device_handle(transfer->dev_handle); DPRINTF(ctx, LIBUSB_DEBUG_TRANSFER, "sync I/O done"); pdone = transfer->user_data; *pdone = 1; } /* * TODO: Replace the following function. Allocating and freeing on a * per-transfer basis is slow. --HPS */ static int libusb10_do_transfer(libusb_device_handle *devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, unsigned int timeout, int type) { libusb_context *ctx; struct libusb_transfer *xfer; int done; int ret; if (devh == NULL) return (LIBUSB_ERROR_INVALID_PARAM); if ((length != 0) && (data == NULL)) return (LIBUSB_ERROR_INVALID_PARAM); xfer = libusb_alloc_transfer(0); if (xfer == NULL) return (LIBUSB_ERROR_NO_MEM); ctx = libusb_get_device(devh)->ctx; xfer->dev_handle = devh; xfer->endpoint = endpoint; xfer->type = type; xfer->timeout = timeout; xfer->buffer = data; xfer->length = length; xfer->user_data = (void *)&done; xfer->callback = libusb10_do_transfer_cb; done = 0; if ((ret = libusb_submit_transfer(xfer)) < 0) { libusb_free_transfer(xfer); return (ret); } while (done == 0) { if ((ret = libusb_handle_events(ctx)) < 0) { libusb_cancel_transfer(xfer); usleep(1000); /* nice it */ } } *transferred = xfer->actual_length; switch (xfer->status) { case LIBUSB_TRANSFER_COMPLETED: ret = 0; break; case LIBUSB_TRANSFER_TIMED_OUT: ret = LIBUSB_ERROR_TIMEOUT; break; case LIBUSB_TRANSFER_OVERFLOW: ret = LIBUSB_ERROR_OVERFLOW; break; case LIBUSB_TRANSFER_STALL: ret = LIBUSB_ERROR_PIPE; break; case LIBUSB_TRANSFER_NO_DEVICE: ret = LIBUSB_ERROR_NO_DEVICE; break; default: ret = LIBUSB_ERROR_OTHER; break; } libusb_free_transfer(xfer); return (ret); } int libusb_bulk_transfer(libusb_device_handle *devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, unsigned int timeout) { libusb_context *ctx; int ret; ctx = libusb10_get_context_by_device_handle(devh); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_bulk_transfer enter"); ret = libusb10_do_transfer(devh, endpoint, data, length, transferred, timeout, LIBUSB_TRANSFER_TYPE_BULK); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_bulk_transfer leave"); return (ret); } int libusb_interrupt_transfer(libusb_device_handle *devh, uint8_t endpoint, uint8_t *data, int length, int *transferred, unsigned int timeout) { libusb_context *ctx; int ret; ctx = libusb10_get_context_by_device_handle(devh); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_interrupt_transfer enter"); ret = libusb10_do_transfer(devh, endpoint, data, length, transferred, timeout, LIBUSB_TRANSFER_TYPE_INTERRUPT); DPRINTF(ctx, LIBUSB_DEBUG_FUNCTION, "libusb_interrupt_transfer leave"); return (ret); } uint8_t * libusb_get_iso_packet_buffer(struct libusb_transfer *transfer, uint32_t off) { uint8_t *ptr; uint32_t n; if (transfer->num_iso_packets < 0) return (NULL); if (off >= (uint32_t)transfer->num_iso_packets) return (NULL); ptr = transfer->buffer; if (ptr == NULL) return (NULL); for (n = 0; n != off; n++) { ptr += transfer->iso_packet_desc[n].length; } return (ptr); } uint8_t * libusb_get_iso_packet_buffer_simple(struct libusb_transfer *transfer, uint32_t off) { uint8_t *ptr; if (transfer->num_iso_packets < 0) return (NULL); if (off >= (uint32_t)transfer->num_iso_packets) return (NULL); ptr = transfer->buffer; if (ptr == NULL) return (NULL); ptr += transfer->iso_packet_desc[0].length * off; return (ptr); } void libusb_set_iso_packet_lengths(struct libusb_transfer *transfer, uint32_t length) { int n; if (transfer->num_iso_packets < 0) return; for (n = 0; n != transfer->num_iso_packets; n++) transfer->iso_packet_desc[n].length = length; } uint8_t * libusb_control_transfer_get_data(struct libusb_transfer *transfer) { if (transfer->buffer == NULL) return (NULL); return (transfer->buffer + LIBUSB_CONTROL_SETUP_SIZE); } struct libusb_control_setup * libusb_control_transfer_get_setup(struct libusb_transfer *transfer) { return ((struct libusb_control_setup *)transfer->buffer); } void libusb_fill_control_setup(uint8_t *buf, uint8_t bmRequestType, uint8_t bRequest, uint16_t wValue, uint16_t wIndex, uint16_t wLength) { struct libusb_control_setup *req = (struct libusb_control_setup *)buf; /* The alignment is OK for all fields below. */ req->bmRequestType = bmRequestType; req->bRequest = bRequest; req->wValue = htole16(wValue); req->wIndex = htole16(wIndex); req->wLength = htole16(wLength); } void libusb_fill_control_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t *buf, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout) { struct libusb_control_setup *setup = (struct libusb_control_setup *)buf; transfer->dev_handle = devh; transfer->endpoint = 0; transfer->type = LIBUSB_TRANSFER_TYPE_CONTROL; transfer->timeout = timeout; transfer->buffer = buf; if (setup != NULL) transfer->length = LIBUSB_CONTROL_SETUP_SIZE + le16toh(setup->wLength); else transfer->length = 0; transfer->user_data = user_data; transfer->callback = callback; } void libusb_fill_bulk_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout) { transfer->dev_handle = devh; transfer->endpoint = endpoint; transfer->type = LIBUSB_TRANSFER_TYPE_BULK; transfer->timeout = timeout; transfer->buffer = buf; transfer->length = length; transfer->user_data = user_data; transfer->callback = callback; } void libusb_fill_interrupt_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout) { transfer->dev_handle = devh; transfer->endpoint = endpoint; transfer->type = LIBUSB_TRANSFER_TYPE_INTERRUPT; transfer->timeout = timeout; transfer->buffer = buf; transfer->length = length; transfer->user_data = user_data; transfer->callback = callback; } void libusb_fill_iso_transfer(struct libusb_transfer *transfer, libusb_device_handle *devh, uint8_t endpoint, uint8_t *buf, int length, int npacket, libusb_transfer_cb_fn callback, void *user_data, uint32_t timeout) { transfer->dev_handle = devh; transfer->endpoint = endpoint; transfer->type = LIBUSB_TRANSFER_TYPE_ISOCHRONOUS; transfer->timeout = timeout; transfer->buffer = buf; transfer->length = length; transfer->num_iso_packets = npacket; transfer->user_data = user_data; transfer->callback = callback; } int libusb_alloc_streams(libusb_device_handle *dev, uint32_t num_streams, unsigned char *endpoints, int num_endpoints) { if (num_streams > 1) return (LIBUSB_ERROR_INVALID_PARAM); return (0); } int libusb_free_streams(libusb_device_handle *dev, unsigned char *endpoints, int num_endpoints) { return (0); } void libusb_transfer_set_stream_id(struct libusb_transfer *transfer, uint32_t stream_id) { struct libusb_super_transfer *sxfer; if (transfer == NULL) return; sxfer = (struct libusb_super_transfer *)( ((uint8_t *)transfer) - sizeof(*sxfer)); /* set stream ID */ sxfer->stream_id = stream_id; } uint32_t libusb_transfer_get_stream_id(struct libusb_transfer *transfer) { struct libusb_super_transfer *sxfer; if (transfer == NULL) return (0); sxfer = (struct libusb_super_transfer *)( ((uint8_t *)transfer) - sizeof(*sxfer)); /* get stream ID */ return (sxfer->stream_id); } Index: projects/clang700-import/sbin/init/rc.d/ldconfig =================================================================== --- projects/clang700-import/sbin/init/rc.d/ldconfig (revision 339014) +++ projects/clang700-import/sbin/init/rc.d/ldconfig (revision 339015) @@ -1,106 +1,106 @@ #!/bin/sh # # $FreeBSD$ # # PROVIDE: ldconfig # REQUIRE: FILESYSTEMS # BEFORE: DAEMON . /etc/rc.subr name="ldconfig" desc="Configure the shared library cache" ldconfig_command="/sbin/ldconfig" start_cmd="ldconfig_start" stop_cmd=":" ldconfig_start() { local _files _ins _ins= ldconfig=${ldconfig_command} checkyesno ldconfig_insecure && _ins="-i" if [ -x "${ldconfig_command}" ]; then _LDC="/lib /usr/lib" for i in ${ldconfig_local_dirs}; do if [ -d "${i}" ]; then _files=`find ${i} -type f` if [ -n "${_files}" ]; then ldconfig_paths="${ldconfig_paths} `cat ${_files} | sort -u`" fi fi done for i in ${ldconfig_paths} /etc/ld-elf.so.conf; do if [ -r "${i}" ]; then _LDC="${_LDC} ${i}" fi done check_startmsgs && echo 'ELF ldconfig path:' ${_LDC} ${ldconfig} -elf ${_ins} ${_LDC} case `sysctl -n hw.machine_arch` in - amd64|powerpc64) + amd64|mips64|powerpc64) for i in ${ldconfig_local32_dirs}; do if [ -d "${i}" ]; then _files=`find ${i} -type f` if [ -n "${_files}" ]; then ldconfig32_paths="${ldconfig32_paths} `cat ${_files} | sort -u`" fi fi done _LDC="" for i in ${ldconfig32_paths}; do if [ -r "${i}" ]; then _LDC="${_LDC} ${i}" fi done check_startmsgs && echo '32-bit compatibility ldconfig path:' ${_LDC} ${ldconfig} -32 ${_ins} ${_LDC} ;; esac case `sysctl -n hw.machine_arch` in armv[67]) for i in ${ldconfig_localsoft_dirs}; do if [ -d "${i}" ]; then _files=`find ${i} -type f` if [ -n "${_files}" ]; then ldconfigsoft_paths="${ldconfigsoft_paths} `cat ${_files} | sort -u`" fi fi done _LDC="" for i in ${ldconfigsoft_paths}; do if [ -r "${i}" ]; then _LDC="${_LDC} ${i}" fi done check_startmsgs && echo 'Soft Float compatibility ldconfig path:' ${_LDC} ${ldconfig} -soft ${_ins} ${_LDC} ;; esac # Legacy aout support for i386 only case `sysctl -n hw.machine_arch` in i386) # Default the a.out ldconfig path. : ${ldconfig_paths_aout=${ldconfig_paths}} _LDC="" for i in /usr/lib/aout ${ldconfig_paths_aout} /etc/ld.so.conf; do if [ -r "${i}" ]; then _LDC="${_LDC} ${i}" fi done check_startmsgs && echo 'a.out ldconfig path:' ${_LDC} ${ldconfig} -aout ${_ins} ${_LDC} ;; esac fi } load_rc_config $name run_rc_command "$1" Index: projects/clang700-import/sys/amd64/amd64/pmap.c =================================================================== --- projects/clang700-import/sys/amd64/amd64/pmap.c (revision 339014) +++ projects/clang700-import/sys/amd64/amd64/pmap.c (revision 339015) @@ -1,8446 +1,8446 @@ /*- * SPDX-License-Identifier: BSD-4-Clause * * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * Copyright (c) 2014-2018 The FreeBSD Foundation * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Portions of this software were developed by * Konstantin Belousov under sponsorship from * the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #define AMD64_NPT_AWARE #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include "opt_pmap.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif #include static __inline boolean_t pmap_type_guest(pmap_t pmap) { return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); } static __inline boolean_t pmap_emulate_ad_bits(pmap_t pmap) { return ((pmap->pm_flags & PMAP_EMULATE_AD_BITS) != 0); } static __inline pt_entry_t pmap_valid_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_V; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_EMUL_V; else mask = EPT_PG_READ; break; default: panic("pmap_valid_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_rw_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_RW; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_EMUL_RW; else mask = EPT_PG_WRITE; break; default: panic("pmap_rw_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static pt_entry_t pg_g; static __inline pt_entry_t pmap_global_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: mask = pg_g; break; case PT_RVI: case PT_EPT: mask = 0; break; default: panic("pmap_global_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_accessed_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_A; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_READ; else mask = EPT_PG_A; break; default: panic("pmap_accessed_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } static __inline pt_entry_t pmap_modified_bit(pmap_t pmap) { pt_entry_t mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = X86_PG_M; break; case PT_EPT: if (pmap_emulate_ad_bits(pmap)) mask = EPT_PG_WRITE; else mask = EPT_PG_M; break; default: panic("pmap_modified_bit: invalid pm_type %d", pmap->pm_type); } return (mask); } #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pa_index(pa) ((pa) >> PDRSHIFT) #define pa_to_pvh(pa) (&pv_table[pa_index(pa)]) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ int nkpt; SYSCTL_INT(_machdep, OID_AUTO, nkpt, CTLFLAG_RD, &nkpt, 0, "Number of kernel page table pages allocated on bootup"); static int ndmpdp; vm_paddr_t dmaplimit; vm_offset_t kernel_vm_end = VM_MIN_KERNEL_ADDRESS; pt_entry_t pg_nx; static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); static int pat_works = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pat_works, CTLFLAG_RD, &pat_works, 1, "Is page attribute table fully functional?"); static int pg_ps_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pg_ps_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pg_ps_enabled, 0, "Are large page mappings enabled?"); #define PAT_INDEX_SIZE 8 static int pat_index[PAT_INDEX_SIZE]; /* cache mode to PAT index conversion */ static u_int64_t KPTphys; /* phys addr of kernel level 1 */ static u_int64_t KPDphys; /* phys addr of kernel level 2 */ u_int64_t KPDPphys; /* phys addr of kernel level 3 */ u_int64_t KPML4phys; /* phys addr of kernel level 4 */ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ static int ndmpdpphys; /* number of DMPDPphys pages */ static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */ /* * pmap_mapdev support pre initialization (i.e. console) */ #define PMAP_PREINIT_MAPPING_COUNT 8 static struct pmap_preinit_mapping { vm_paddr_t pa; vm_offset_t va; vm_size_t sz; int mode; } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; static int pmap_initialized; /* * Data for the pv entry allocation mechanism. * Updates to pv_invl_gen are protected by the pv_list_locks[] * elements, but reads are not. */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx __exclusive_cache_line pv_chunks_mutex; static struct rwlock __exclusive_cache_line pv_list_locks[NPV_LIST_LOCKS]; static u_long pv_invl_gen[NPV_LIST_LOCKS]; static struct md_page *pv_table; static struct md_page pv_dummy; /* * All those kernel PT submaps that BSD is so fond of */ pt_entry_t *CMAP1 = NULL; caddr_t CADDR1 = 0; static vm_offset_t qframe = 0; static struct mtx qframe_mtx; static int pmap_flags = PMAP_PDE_SUPERPAGE; /* flags for x86 pmaps */ int pmap_pcid_enabled = 1; SYSCTL_INT(_vm_pmap, OID_AUTO, pcid_enabled, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pmap_pcid_enabled, 0, "Is TLB Context ID enabled ?"); int invpcid_works = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, invpcid_works, CTLFLAG_RD, &invpcid_works, 0, "Is the invpcid instruction available ?"); int __read_frequently pti = 0; SYSCTL_INT(_vm_pmap, OID_AUTO, pti, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pti, 0, "Page Table Isolation enabled"); static vm_object_t pti_obj; static pml4_entry_t *pti_pml4; static vm_pindex_t pti_pg_idx; static bool pti_finalized; static int pmap_pcid_save_cnt_proc(SYSCTL_HANDLER_ARGS) { int i; uint64_t res; res = 0; CPU_FOREACH(i) { res += cpuid_to_pcpu[i]->pc_pm_save_cnt; } return (sysctl_handle_64(oidp, &res, 0, req)); } SYSCTL_PROC(_vm_pmap, OID_AUTO, pcid_save_cnt, CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, pmap_pcid_save_cnt_proc, "QU", "Count of saved TLB context on switch"); static LIST_HEAD(, pmap_invl_gen) pmap_invl_gen_tracker = LIST_HEAD_INITIALIZER(&pmap_invl_gen_tracker); static struct mtx invl_gen_mtx; static u_long pmap_invl_gen = 0; /* Fake lock object to satisfy turnstiles interface. */ static struct lock_object invl_gen_ts = { .lo_name = "invlts", }; static bool pmap_not_in_di(void) { return (curthread->td_md.md_invl_gen.gen == 0); } #define PMAP_ASSERT_NOT_IN_DI() \ KASSERT(pmap_not_in_di(), ("DI already started")) /* * Start a new Delayed Invalidation (DI) block of code, executed by * the current thread. Within a DI block, the current thread may * destroy both the page table and PV list entries for a mapping and * then release the corresponding PV list lock before ensuring that * the mapping is flushed from the TLBs of any processors with the * pmap active. */ static void pmap_delayed_invl_started(void) { struct pmap_invl_gen *invl_gen; u_long currgen; invl_gen = &curthread->td_md.md_invl_gen; PMAP_ASSERT_NOT_IN_DI(); mtx_lock(&invl_gen_mtx); if (LIST_EMPTY(&pmap_invl_gen_tracker)) currgen = pmap_invl_gen; else currgen = LIST_FIRST(&pmap_invl_gen_tracker)->gen; invl_gen->gen = currgen + 1; LIST_INSERT_HEAD(&pmap_invl_gen_tracker, invl_gen, link); mtx_unlock(&invl_gen_mtx); } /* * Finish the DI block, previously started by the current thread. All * required TLB flushes for the pages marked by * pmap_delayed_invl_page() must be finished before this function is * called. * * This function works by bumping the global DI generation number to * the generation number of the current thread's DI, unless there is a * pending DI that started earlier. In the latter case, bumping the * global DI generation number would incorrectly signal that the * earlier DI had finished. Instead, this function bumps the earlier * DI's generation number to match the generation number of the * current thread's DI. */ static void pmap_delayed_invl_finished(void) { struct pmap_invl_gen *invl_gen, *next; struct turnstile *ts; invl_gen = &curthread->td_md.md_invl_gen; KASSERT(invl_gen->gen != 0, ("missed invl_started")); mtx_lock(&invl_gen_mtx); next = LIST_NEXT(invl_gen, link); if (next == NULL) { turnstile_chain_lock(&invl_gen_ts); ts = turnstile_lookup(&invl_gen_ts); pmap_invl_gen = invl_gen->gen; if (ts != NULL) { turnstile_broadcast(ts, TS_SHARED_QUEUE); turnstile_unpend(ts); } turnstile_chain_unlock(&invl_gen_ts); } else { next->gen = invl_gen->gen; } LIST_REMOVE(invl_gen, link); mtx_unlock(&invl_gen_mtx); invl_gen->gen = 0; } #ifdef PV_STATS static long invl_wait; SYSCTL_LONG(_vm_pmap, OID_AUTO, invl_wait, CTLFLAG_RD, &invl_wait, 0, "Number of times DI invalidation blocked pmap_remove_all/write"); #endif static u_long * pmap_delayed_invl_genp(vm_page_t m) { return (&pv_invl_gen[pa_index(VM_PAGE_TO_PHYS(m)) % NPV_LIST_LOCKS]); } /* * Ensure that all currently executing DI blocks, that need to flush * TLB for the given page m, actually flushed the TLB at the time the * function returned. If the page m has an empty PV list and we call * pmap_delayed_invl_wait(), upon its return we know that no CPU has a * valid mapping for the page m in either its page table or TLB. * * This function works by blocking until the global DI generation * number catches up with the generation number associated with the * given page m and its PV list. Since this function's callers * typically own an object lock and sometimes own a page lock, it * cannot sleep. Instead, it blocks on a turnstile to relinquish the * processor. */ static void pmap_delayed_invl_wait(vm_page_t m) { struct turnstile *ts; u_long *m_gen; #ifdef PV_STATS bool accounted = false; #endif m_gen = pmap_delayed_invl_genp(m); while (*m_gen > pmap_invl_gen) { #ifdef PV_STATS if (!accounted) { atomic_add_long(&invl_wait, 1); accounted = true; } #endif ts = turnstile_trywait(&invl_gen_ts); if (*m_gen > pmap_invl_gen) turnstile_wait(ts, NULL, TS_SHARED_QUEUE); else turnstile_cancel(ts); } } /* * Mark the page m's PV list as participating in the current thread's * DI block. Any threads concurrently using m's PV list to remove or * restrict all mappings to m will wait for the current thread's DI * block to complete before proceeding. * * The function works by setting the DI generation number for m's PV * list to at least the DI generation number of the current thread. * This forces a caller of pmap_delayed_invl_wait() to block until * current thread calls pmap_delayed_invl_finished(). */ static void pmap_delayed_invl_page(vm_page_t m) { u_long gen, *m_gen; rw_assert(VM_PAGE_TO_PV_LIST_LOCK(m), RA_WLOCKED); gen = curthread->td_md.md_invl_gen.gen; if (gen == 0) return; m_gen = pmap_delayed_invl_genp(m); if (*m_gen < gen) *m_gen = gen; } /* * Crashdump maps. */ static caddr_t crashdumpmap; /* * Internal flags for pmap_enter()'s helper functions. */ #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static int popcnt_pc_map_pq(uint64_t *map); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp); static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags, struct rwlock **lockp); #if VM_NRESERVLEVEL > 0 static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp); #endif static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va); static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va); static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp); static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m, struct rwlock **lockp); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte); static int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte); static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva); static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde); static void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode); static void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask); #if VM_NRESERVLEVEL > 0 static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp); #endif static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot); static void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask); static void pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, bool exec); static pdp_entry_t *pmap_pti_pdpe(vm_offset_t va); static pd_entry_t *pmap_pti_pde(vm_offset_t va); static void pmap_pti_wire_pte(void *pte); static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free, struct rwlock **lockp); static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free); static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pd_entry_t *pde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde); static void pmap_update_pde_invalidate(pmap_t, vm_offset_t va, pd_entry_t pde); static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp); static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); /********************/ /* Inline functions */ /********************/ /* Return a non-clipped PD index for a given VA */ static __inline vm_pindex_t pmap_pde_pindex(vm_offset_t va) { return (va >> PDRSHIFT); } /* Return a pointer to the PML4 slot that corresponds to a VA */ static __inline pml4_entry_t * pmap_pml4e(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_pml4[pmap_pml4e_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pml4e_to_pdpe(pml4_entry_t *pml4e, vm_offset_t va) { pdp_entry_t *pdpe; pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME); return (&pdpe[pmap_pdpe_index(va)]); } /* Return a pointer to the PDP slot that corresponds to a VA */ static __inline pdp_entry_t * pmap_pdpe(pmap_t pmap, vm_offset_t va) { pml4_entry_t *pml4e; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pml4e = pmap_pml4e(pmap, va); if ((*pml4e & PG_V) == 0) return (NULL); return (pmap_pml4e_to_pdpe(pml4e, va)); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pdpe_to_pde(pdp_entry_t *pdpe, vm_offset_t va) { pd_entry_t *pde; pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME); return (&pde[pmap_pde_index(va)]); } /* Return a pointer to the PD slot that corresponds to a VA */ static __inline pd_entry_t * pmap_pde(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pdpe = pmap_pdpe(pmap, va); if (pdpe == NULL || (*pdpe & PG_V) == 0) return (NULL); return (pmap_pdpe_to_pde(pdpe, va)); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pde_to_pte(pd_entry_t *pde, vm_offset_t va) { pt_entry_t *pte; pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); return (&pte[pmap_pte_index(va)]); } /* Return a pointer to the PT slot that corresponds to a VA */ static __inline pt_entry_t * pmap_pte(pmap_t pmap, vm_offset_t va) { pd_entry_t *pde; pt_entry_t PG_V; PG_V = pmap_valid_bit(pmap); pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) return (NULL); if ((*pde & PG_PS) != 0) /* compat with i386 pmap_pte() */ return ((pt_entry_t *)pde); return (pmap_pde_to_pte(pde, va)); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } PMAP_INLINE pt_entry_t * vtopte(vm_offset_t va) { u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopte on a uva/gpa 0x%0lx", va)); return (PTmap + ((va >> PAGE_SHIFT) & mask)); } static __inline pd_entry_t * vtopde(vm_offset_t va) { u_int64_t mask = ((1ul << (NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1); KASSERT(va >= VM_MAXUSER_ADDRESS, ("vtopde on a uva/gpa 0x%0lx", va)); return (PDmap + ((va >> PDRSHIFT) & mask)); } static u_int64_t allocpages(vm_paddr_t *firstaddr, int n) { u_int64_t ret; ret = *firstaddr; bzero((void *)ret, n * PAGE_SIZE); *firstaddr += n * PAGE_SIZE; return (ret); } CTASSERT(powerof2(NDMPML4E)); /* number of kernel PDP slots */ #define NKPDPE(ptpgs) howmany(ptpgs, NPDEPG) static void nkpt_init(vm_paddr_t addr) { int pt_pages; #ifdef NKPT pt_pages = NKPT; #else pt_pages = howmany(addr, 1 << PDRSHIFT); pt_pages += NKPDPE(pt_pages); /* * Add some slop beyond the bare minimum required for bootstrapping * the kernel. * * This is quite important when allocating KVA for kernel modules. * The modules are required to be linked in the negative 2GB of * the address space. If we run out of KVA in this region then * pmap_growkernel() will need to allocate page table pages to map * the entire 512GB of KVA space which is an unnecessary tax on * physical memory. * * Secondly, device memory mapped as part of setting up the low- * level console(s) is taken from KVA, starting at virtual_avail. * This is because cninit() is called after pmap_bootstrap() but * before vm_init() and pmap_init(). 20MB for a frame buffer is * not uncommon. */ pt_pages += 32; /* 64MB additional slop. */ #endif nkpt = pt_pages; } /* * Returns the proper write/execute permission for a physical page that is * part of the initial boot allocations. * * If the page has kernel text, it is marked as read-only. If the page has * kernel read-only data, it is marked as read-only/not-executable. If the * page has only read-write data, it is marked as read-write/not-executable. * If the page is below/above the kernel range, it is marked as read-write. * * This function operates on 2M pages, since we map the kernel space that * way. * * Note that this doesn't currently provide any protection for modules. */ static inline pt_entry_t bootaddr_rwx(vm_paddr_t pa) { /* * Everything in the same 2M page as the start of the kernel * should be static. On the other hand, things in the same 2M * page as the end of the kernel could be read-write/executable, * as the kernel image is not guaranteed to end on a 2M boundary. */ if (pa < trunc_2mpage(btext - KERNBASE) || pa >= trunc_2mpage(_end - KERNBASE)) return (X86_PG_RW); /* * The linker should ensure that the read-only and read-write * portions don't share the same 2M page, so this shouldn't * impact read-only data. However, in any case, any page with * read-write data needs to be read-write. */ if (pa >= trunc_2mpage(brwsection - KERNBASE)) return (X86_PG_RW | pg_nx); /* * Mark any 2M page containing kernel text as read-only. Mark * other pages with read-only data as read-only and not executable. * (It is likely a small portion of the read-only data section will * be marked as read-only, but executable. This should be acceptable * since the read-only protection will keep the data from changing.) * Note that fixups to the .text section will still work until we * set CR0.WP. */ if (pa < round_2mpage(etext - KERNBASE)) return (0); return (pg_nx); } static void create_pagetables(vm_paddr_t *firstaddr) { int i, j, ndm1g, nkpdpe, nkdmpde; pt_entry_t *pt_p; pd_entry_t *pd_p; pdp_entry_t *pdp_p; pml4_entry_t *p4_p; uint64_t DMPDkernphys; /* Allocate page table pages for the direct map */ ndmpdp = howmany(ptoa(Maxmem), NBPDP); if (ndmpdp < 4) /* Minimum 4GB of dirmap */ ndmpdp = 4; ndmpdpphys = howmany(ndmpdp, NPDPEPG); if (ndmpdpphys > NDMPML4E) { /* * Each NDMPML4E allows 512 GB, so limit to that, * and then readjust ndmpdp and ndmpdpphys. */ printf("NDMPML4E limits system to %d GB\n", NDMPML4E * 512); Maxmem = atop(NDMPML4E * NBPML4); ndmpdpphys = NDMPML4E; ndmpdp = NDMPML4E * NPDEPG; } DMPDPphys = allocpages(firstaddr, ndmpdpphys); ndm1g = 0; if ((amd_feature & AMDID_PAGE1GB) != 0) { /* * Calculate the number of 1G pages that will fully fit in * Maxmem. */ ndm1g = ptoa(Maxmem) >> PDPSHIFT; /* * Allocate 2M pages for the kernel. These will be used in * place of the first one or more 1G pages from ndm1g. */ nkdmpde = howmany((vm_offset_t)(brwsection - KERNBASE), NBPDP); DMPDkernphys = allocpages(firstaddr, nkdmpde); } if (ndm1g < ndmpdp) DMPDphys = allocpages(firstaddr, ndmpdp - ndm1g); dmaplimit = (vm_paddr_t)ndmpdp << PDPSHIFT; /* Allocate pages */ KPML4phys = allocpages(firstaddr, 1); KPDPphys = allocpages(firstaddr, NKPML4E); /* * Allocate the initial number of kernel page table pages required to * bootstrap. We defer this until after all memory-size dependent * allocations are done (e.g. direct map), so that we don't have to * build in too much slop in our estimate. * * Note that when NKPML4E > 1, we have an empty page underneath * all but the KPML4I'th one, so we need NKPML4E-1 extra (zeroed) * pages. (pmap_enter requires a PD page to exist for each KPML4E.) */ nkpt_init(*firstaddr); nkpdpe = NKPDPE(nkpt); KPTphys = allocpages(firstaddr, nkpt); KPDphys = allocpages(firstaddr, nkpdpe); /* Fill in the underlying page table pages */ /* XXX not fully used, underneath 2M pages */ pt_p = (pt_entry_t *)KPTphys; for (i = 0; ptoa(i) < *firstaddr; i++) pt_p[i] = ptoa(i) | X86_PG_V | pg_g | bootaddr_rwx(ptoa(i)); /* Now map the page tables at their location within PTmap */ pd_p = (pd_entry_t *)KPDphys; for (i = 0; i < nkpt; i++) pd_p[i] = (KPTphys + ptoa(i)) | X86_PG_RW | X86_PG_V; /* Map from zero to end of allocations under 2M pages */ /* This replaces some of the KPTphys entries above */ for (i = 0; (i << PDRSHIFT) < *firstaddr; i++) /* Preset PG_M and PG_A because demotion expects it. */ pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g | X86_PG_M | X86_PG_A | bootaddr_rwx(i << PDRSHIFT); /* * Because we map the physical blocks in 2M pages, adjust firstaddr * to record the physical blocks we've actually mapped into kernel * virtual address space. */ *firstaddr = round_2mpage(*firstaddr); /* And connect up the PD to the PDP (leaving room for L4 pages) */ pdp_p = (pdp_entry_t *)(KPDPphys + ptoa(KPML4I - KPML4BASE)); for (i = 0; i < nkpdpe; i++) pdp_p[i + KPDPI] = (KPDphys + ptoa(i)) | X86_PG_RW | X86_PG_V; /* * Now, set up the direct map region using 2MB and/or 1GB pages. If * the end of physical memory is not aligned to a 1GB page boundary, * then the residual physical memory is mapped with 2MB pages. Later, * if pmap_mapdev{_attr}() uses the direct map for non-write-back * memory, pmap_change_attr() will demote any 2MB or 1GB page mappings * that are partially used. */ pd_p = (pd_entry_t *)DMPDphys; for (i = NPDEPG * ndm1g, j = 0; i < NPDEPG * ndmpdp; i++, j++) { pd_p[j] = (vm_paddr_t)i << PDRSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ pd_p[j] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g | X86_PG_M | X86_PG_A | pg_nx; } pdp_p = (pdp_entry_t *)DMPDPphys; for (i = 0; i < ndm1g; i++) { pdp_p[i] = (vm_paddr_t)i << PDPSHIFT; /* Preset PG_M and PG_A because demotion expects it. */ pdp_p[i] |= X86_PG_RW | X86_PG_V | PG_PS | pg_g | X86_PG_M | X86_PG_A | pg_nx; } for (j = 0; i < ndmpdp; i++, j++) { pdp_p[i] = DMPDphys + ptoa(j); pdp_p[i] |= X86_PG_RW | X86_PG_V; } /* * Instead of using a 1G page for the memory containing the kernel, * use 2M pages with appropriate permissions. (If using 1G pages, * this will partially overwrite the PDPEs above.) */ if (ndm1g) { pd_p = (pd_entry_t *)DMPDkernphys; for (i = 0; i < (NPDEPG * nkdmpde); i++) pd_p[i] = (i << PDRSHIFT) | X86_PG_V | PG_PS | pg_g | X86_PG_M | X86_PG_A | pg_nx | bootaddr_rwx(i << PDRSHIFT); for (i = 0; i < nkdmpde; i++) pdp_p[i] = (DMPDkernphys + ptoa(i)) | X86_PG_RW | X86_PG_V; } /* And recursively map PML4 to itself in order to get PTmap */ p4_p = (pml4_entry_t *)KPML4phys; p4_p[PML4PML4I] = KPML4phys; p4_p[PML4PML4I] |= X86_PG_RW | X86_PG_V | pg_nx; /* Connect the Direct Map slot(s) up to the PML4. */ for (i = 0; i < ndmpdpphys; i++) { p4_p[DMPML4I + i] = DMPDPphys + ptoa(i); p4_p[DMPML4I + i] |= X86_PG_RW | X86_PG_V; } /* Connect the KVA slots up to the PML4 */ for (i = 0; i < NKPML4E; i++) { p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } } /* * Bootstrap the system enough to run with virtual memory. * * On amd64 this is called after mapping has already been enabled * and just syncs the pmap module with what has already been done. * [We can't call it easily with mapping off since the kernel is not * mapped with PA == VA, hence we would have to relocate every address * from the linked base (virtual) address "KERNBASE" to the actual * (physical) address starting relative to 0] */ void pmap_bootstrap(vm_paddr_t *firstaddr) { vm_offset_t va; pt_entry_t *pte; uint64_t cr4; int i; KERNend = *firstaddr; if (!pti) pg_g = X86_PG_G; /* * Create an initial set of page tables to run the kernel in. */ create_pagetables(firstaddr); /* * Add a physical memory segment (vm_phys_seg) corresponding to the * preallocated kernel page table pages so that vm_page structures * representing these pages will be created. The vm_page structures * are required for promotion of the corresponding kernel virtual * addresses to superpage mappings. */ vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt)); virtual_avail = (vm_offset_t) KERNBASE + *firstaddr; virtual_end = VM_MAX_KERNEL_ADDRESS; /* * Enable PG_G global pages, then switch to the kernel page * table from the bootstrap page table. After the switch, it * is possible to enable SMEP and SMAP since PG_U bits are * correct now. */ cr4 = rcr4(); cr4 |= CR4_PGE; load_cr4(cr4); load_cr3(KPML4phys); if (cpu_stdext_feature & CPUID_STDEXT_SMEP) cr4 |= CR4_SMEP; if (cpu_stdext_feature & CPUID_STDEXT_SMAP) cr4 |= CR4_SMAP; load_cr4(cr4); /* * Initialize the kernel pmap (which is statically allocated). */ PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_cr3 = KPML4phys; kernel_pmap->pm_ucr3 = PMAP_NO_CR3; CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); kernel_pmap->pm_flags = pmap_flags; /* * Initialize the TLB invalidations generation number lock. */ mtx_init(&invl_gen_mtx, "invlgn", NULL, MTX_DEF); /* * Reserve some special page table entries/VA space for temporary * mapping of pages. */ #define SYSMAP(c, p, v, n) \ v = (c)va; va += ((n)*PAGE_SIZE); p = pte; pte += (n); va = virtual_avail; pte = vtopte(va); /* * Crashdump maps. The first page is reused as CMAP1 for the * memory test. */ SYSMAP(caddr_t, CMAP1, crashdumpmap, MAXDUMPPGS) CADDR1 = crashdumpmap; virtual_avail = va; /* * Initialize the PAT MSR. * pmap_init_pat() clears and sets CR4_PGE, which, as a * side-effect, invalidates stale PG_G TLB entries that might * have been created in our pre-boot environment. */ pmap_init_pat(); /* Initialize TLB Context Id. */ if (pmap_pcid_enabled) { for (i = 0; i < MAXCPU; i++) { kernel_pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN; kernel_pmap->pm_pcids[i].pm_gen = 1; } /* * PMAP_PCID_KERN + 1 is used for initialization of * proc0 pmap. The pmap' pcid state might be used by * EFIRT entry before first context switch, so it * needs to be valid. */ PCPU_SET(pcid_next, PMAP_PCID_KERN + 2); PCPU_SET(pcid_gen, 1); /* * pcpu area for APs is zeroed during AP startup. * pc_pcid_next and pc_pcid_gen are initialized by AP * during pcpu setup. */ load_cr4(rcr4() | CR4_PCIDE); } } /* * Setup the PAT MSR. */ void pmap_init_pat(void) { int pat_table[PAT_INDEX_SIZE]; uint64_t pat_msr; u_long cr0, cr4; int i; /* Bail if this CPU doesn't implement PAT. */ if ((cpu_feature & CPUID_PAT) == 0) panic("no PAT??"); /* Set default PAT index table. */ for (i = 0; i < PAT_INDEX_SIZE; i++) pat_table[i] = -1; pat_table[PAT_WRITE_BACK] = 0; pat_table[PAT_WRITE_THROUGH] = 1; pat_table[PAT_UNCACHEABLE] = 3; pat_table[PAT_WRITE_COMBINING] = 3; pat_table[PAT_WRITE_PROTECTED] = 3; pat_table[PAT_UNCACHED] = 3; /* Initialize default PAT entries. */ pat_msr = PAT_VALUE(0, PAT_WRITE_BACK) | PAT_VALUE(1, PAT_WRITE_THROUGH) | PAT_VALUE(2, PAT_UNCACHED) | PAT_VALUE(3, PAT_UNCACHEABLE) | PAT_VALUE(4, PAT_WRITE_BACK) | PAT_VALUE(5, PAT_WRITE_THROUGH) | PAT_VALUE(6, PAT_UNCACHED) | PAT_VALUE(7, PAT_UNCACHEABLE); if (pat_works) { /* * Leave the indices 0-3 at the default of WB, WT, UC-, and UC. * Program 5 and 6 as WP and WC. * Leave 4 and 7 as WB and UC. */ pat_msr &= ~(PAT_MASK(5) | PAT_MASK(6)); pat_msr |= PAT_VALUE(5, PAT_WRITE_PROTECTED) | PAT_VALUE(6, PAT_WRITE_COMBINING); pat_table[PAT_UNCACHED] = 2; pat_table[PAT_WRITE_PROTECTED] = 5; pat_table[PAT_WRITE_COMBINING] = 6; } else { /* * Just replace PAT Index 2 with WC instead of UC-. */ pat_msr &= ~PAT_MASK(2); pat_msr |= PAT_VALUE(2, PAT_WRITE_COMBINING); pat_table[PAT_WRITE_COMBINING] = 2; } /* Disable PGE. */ cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* Disable caches (CD = 1, NW = 0). */ cr0 = rcr0(); load_cr0((cr0 & ~CR0_NW) | CR0_CD); /* Flushes caches and TLBs. */ wbinvd(); invltlb(); /* Update PAT and index table. */ wrmsr(MSR_PAT, pat_msr); for (i = 0; i < PAT_INDEX_SIZE; i++) pat_index[i] = pat_table[i]; /* Flush caches and TLBs again. */ wbinvd(); invltlb(); /* Restore caches and PGE. */ load_cr0(cr0); load_cr4(cr4); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pat_mode = PAT_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { struct pmap_preinit_mapping *ppim; vm_page_t mpte; vm_size_t s; int error, i, pv_npg, ret, skz63; /* L1TF, reserve page @0 unconditionally */ vm_page_blacklist_add(0, bootverbose); /* Detect bare-metal Skylake Server and Skylake-X. */ if (vm_guest == VM_GUEST_NO && cpu_vendor_id == CPU_VENDOR_INTEL && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) == 0x55) { /* * Skylake-X errata SKZ63. Processor May Hang When * Executing Code In an HLE Transaction Region between * 40000000H and 403FFFFFH. * * Mark the pages in the range as preallocated. It * seems to be impossible to distinguish between * Skylake Server and Skylake X. */ skz63 = 1; TUNABLE_INT_FETCH("hw.skz63_enable", &skz63); if (skz63 != 0) { if (bootverbose) printf("SKZ63: skipping 4M RAM starting " "at physical 1G\n"); for (i = 0; i < atop(0x400000); i++) { ret = vm_page_blacklist_add(0x40000000 + ptoa(i), FALSE); if (!ret && bootverbose) printf("page at %#lx already used\n", 0x40000000 + ptoa(i)); } } } /* * Initialize the vm page array entries for the kernel pmap's * page table pages. */ PMAP_LOCK(kernel_pmap); for (i = 0; i < nkpt; i++) { mpte = PHYS_TO_VM_PAGE(KPTphys + (i << PAGE_SHIFT)); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_init: page table page is out of range")); mpte->pindex = pmap_pde_pindex(KERNBASE) + i; mpte->phys_addr = KPTphys + (i << PAGE_SHIFT); mpte->wire_count = 1; if (i << PDRSHIFT < KERNend && pmap_insert_pt_page(kernel_pmap, mpte)) panic("pmap_init: pmap_insert_pt_page failed"); } PMAP_UNLOCK(kernel_pmap); vm_wire_add(nkpt); /* * If the kernel is running on a virtual machine, then it must assume * that MCA is enabled by the hypervisor. Moreover, the kernel must * be prepared for the hypervisor changing the vendor and family that * are reported by CPUID. Consequently, the workaround for AMD Family * 10h Erratum 383 is enabled if the processor's feature set does not * include at least one feature that is only supported by older Intel * or newer AMD processors. */ if (vm_guest != VM_GUEST_NO && (cpu_feature & CPUID_SS) == 0 && (cpu_feature2 & (CPUID2_SSSE3 | CPUID2_SSE41 | CPUID2_AESNI | CPUID2_AVX | CPUID2_XSAVE)) == 0 && (amd_feature2 & (AMDID2_XOP | AMDID2_FMA4)) == 0) workaround_erratum383 = 1; /* * Are large page mappings enabled? */ TUNABLE_INT_FETCH("vm.pmap.pg_ps_enabled", &pg_ps_enabled); if (pg_ps_enabled) { KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, ("pmap_init: can't assign to pagesizes[1]")); pagesizes[1] = NBPDR; } /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); /* * Calculate the size of the pv head table for superpages. */ pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, NBPDR); /* * Allocate memory for the pv head table for superpages. */ s = (vm_size_t)(pv_npg * sizeof(struct md_page)); s = round_page(s); pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); for (i = 0; i < pv_npg; i++) TAILQ_INIT(&pv_table[i].pv_list); TAILQ_INIT(&pv_dummy.pv_list); pmap_initialized = 1; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) continue; /* Make the direct map consistent */ if (ppim->pa < dmaplimit && ppim->pa + ppim->sz <= dmaplimit) { (void)pmap_change_attr(PHYS_TO_DMAP(ppim->pa), ppim->sz, ppim->mode); } if (!bootverbose) continue; printf("PPIM %u: PA=%#lx, VA=%#lx, size=%#lx, mode=%#x\n", i, ppim->pa, ppim->va, ppim->sz, ppim->mode); } mtx_init(&qframe_mtx, "qfrmlk", NULL, MTX_SPIN); error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, (vmem_addr_t *)&qframe); if (error != 0) panic("qframe allocation failed"); } static SYSCTL_NODE(_vm_pmap, OID_AUTO, pde, CTLFLAG_RD, 0, "2MB page mapping counters"); static u_long pmap_pde_demotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pde_demotions, 0, "2MB page demotions"); static u_long pmap_pde_mappings; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, mappings, CTLFLAG_RD, &pmap_pde_mappings, 0, "2MB page mappings"); static u_long pmap_pde_p_failures; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, p_failures, CTLFLAG_RD, &pmap_pde_p_failures, 0, "2MB page promotion failures"); static u_long pmap_pde_promotions; SYSCTL_ULONG(_vm_pmap_pde, OID_AUTO, promotions, CTLFLAG_RD, &pmap_pde_promotions, 0, "2MB page promotions"); static SYSCTL_NODE(_vm_pmap, OID_AUTO, pdpe, CTLFLAG_RD, 0, "1GB page mapping counters"); static u_long pmap_pdpe_demotions; SYSCTL_ULONG(_vm_pmap_pdpe, OID_AUTO, demotions, CTLFLAG_RD, &pmap_pdpe_demotions, 0, "1GB page demotions"); /*************************************************** * Low level helper routines..... ***************************************************/ static pt_entry_t pmap_swap_pat(pmap_t pmap, pt_entry_t entry) { int x86_pat_bits = X86_PG_PTE_PAT | X86_PG_PDE_PAT; switch (pmap->pm_type) { case PT_X86: case PT_RVI: /* Verify that both PAT bits are not set at the same time */ KASSERT((entry & x86_pat_bits) != x86_pat_bits, ("Invalid PAT bits in entry %#lx", entry)); /* Swap the PAT bits if one of them is set */ if ((entry & x86_pat_bits) != 0) entry ^= x86_pat_bits; break; case PT_EPT: /* * Nothing to do - the memory attributes are represented * the same way for regular pages and superpages. */ break; default: panic("pmap_switch_pat_bits: bad pm_type %d", pmap->pm_type); } return (entry); } boolean_t pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) { return (mode >= 0 && mode < PAT_INDEX_SIZE && pat_index[(int)mode] >= 0); } /* * Determine the appropriate bits to set in a PTE or PDE for a specified * caching mode. */ int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) { int cache_bits, pat_flag, pat_idx; if (!pmap_is_valid_memattr(pmap, mode)) panic("Unknown caching mode %d\n", mode); switch (pmap->pm_type) { case PT_X86: case PT_RVI: /* The PAT bit is different for PTE's and PDE's. */ pat_flag = is_pde ? X86_PG_PDE_PAT : X86_PG_PTE_PAT; /* Map the caching mode to a PAT index. */ pat_idx = pat_index[mode]; /* Map the 3-bit index value into the PAT, PCD, and PWT bits. */ cache_bits = 0; if (pat_idx & 0x4) cache_bits |= pat_flag; if (pat_idx & 0x2) cache_bits |= PG_NC_PCD; if (pat_idx & 0x1) cache_bits |= PG_NC_PWT; break; case PT_EPT: cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode); break; default: panic("unsupported pmap type %d", pmap->pm_type); } return (cache_bits); } static int pmap_cache_mask(pmap_t pmap, boolean_t is_pde) { int mask; switch (pmap->pm_type) { case PT_X86: case PT_RVI: mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE; break; case PT_EPT: mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7); break; default: panic("pmap_cache_mask: invalid pm_type %d", pmap->pm_type); } return (mask); } bool pmap_ps_enabled(pmap_t pmap) { return (pg_ps_enabled && (pmap->pm_flags & PMAP_PDE_SUPERPAGE) != 0); } static void pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde) { switch (pmap->pm_type) { case PT_X86: break; case PT_RVI: case PT_EPT: /* * XXX * This is a little bogus since the generation number is * supposed to be bumped up when a region of the address * space is invalidated in the page tables. * * In this case the old PDE entry is valid but yet we want * to make sure that any mappings using the old entry are * invalidated in the TLB. * * The reason this works as expected is because we rendezvous * "all" host cpus and force any vcpu context to exit as a * side-effect. */ atomic_add_acq_long(&pmap->pm_eptgen, 1); break; default: panic("pmap_update_pde_store: bad pm_type %d", pmap->pm_type); } pde_store(pde, newpde); } /* * After changing the page size for the specified virtual address in the page * table, flush the corresponding entries from the processor's TLB. Only the * calling processor's TLB is affected. * * The calling thread must be pinned to a processor. */ static void pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde) { pt_entry_t PG_G; if (pmap_type_guest(pmap)) return; KASSERT(pmap->pm_type == PT_X86, ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type)); PG_G = pmap_global_bit(pmap); if ((newpde & PG_PS) == 0) /* Demotion: flush a specific 2MB page mapping. */ invlpg(va); else if ((newpde & PG_G) == 0) /* * Promotion: flush every 4KB page mapping from the TLB * because there are too many to flush individually. */ invltlb(); else { /* * Promotion: flush every 4KB page mapping from the TLB, * including any global (PG_G) mappings. */ invltlb_glob(); } } #ifdef SMP /* * For SMP, these functions have to use the IPI mechanism for coherence. * * N.B.: Before calling any of the following TLB invalidation functions, * the calling processor must ensure that all stores updating a non- * kernel page table are globally performed. Otherwise, another * processor could cache an old, pre-update entry without being * invalidated. This can happen one of two ways: (1) The pmap becomes * active on another processor after its pm_active field is checked by * one of the following functions but before a store updating the page * table is globally performed. (2) The pmap becomes active on another * processor before its pm_active field is checked but due to * speculative loads one of the following functions stills reads the * pmap as inactive on the other processor. * * The kernel page table is exempt because its pm_active field is * immutable. The kernel page table is always active on every * processor. */ /* * Interrupt the cpus that are executing in the guest context. * This will force the vcpu to exit and the cached EPT mappings * will be invalidated by the host before the next vmresume. */ static __inline void pmap_invalidate_ept(pmap_t pmap) { int ipinum; sched_pin(); KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active), ("pmap_invalidate_ept: absurd pm_active")); /* * The TLB mappings associated with a vcpu context are not * flushed each time a different vcpu is chosen to execute. * * This is in contrast with a process's vtop mappings that * are flushed from the TLB on each context switch. * * Therefore we need to do more than just a TLB shootdown on * the active cpus in 'pmap->pm_active'. To do this we keep * track of the number of invalidations performed on this pmap. * * Each vcpu keeps a cache of this counter and compares it * just before a vmresume. If the counter is out-of-date an * invept will be done to flush stale mappings from the TLB. */ atomic_add_acq_long(&pmap->pm_eptgen, 1); /* * Force the vcpu to exit and trap back into the hypervisor. */ ipinum = pmap->pm_flags & PMAP_NESTED_IPIMASK; ipi_selected(pmap->pm_active, ipinum); sched_unpin(); } static cpuset_t pmap_invalidate_cpu_mask(pmap_t pmap) { return (pmap == kernel_pmap ? all_cpus : pmap->pm_active); } static inline void pmap_invalidate_page_pcid(pmap_t pmap, vm_offset_t va, const bool invpcid_works1) { struct invpcid_descr d; uint64_t kcr3, ucr3; uint32_t pcid; u_int cpuid, i; cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { if (pmap->pm_ucr3 != PMAP_NO_CR3) { /* * Because pm_pcid is recalculated on a * context switch, we must disable switching. * Otherwise, we might use a stale value * below. */ critical_enter(); pcid = pmap->pm_pcids[cpuid].pm_pcid; if (invpcid_works1) { d.pcid = pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = va; invpcid(&d, INVPCID_ADDR); } else { kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 = pmap->pm_ucr3 | pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlpg(ucr3, kcr3, va); } critical_exit(); } } else pmap->pm_pcids[cpuid].pm_gen = 0; CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } /* * The fence is between stores to pm_gen and the read of the * pm_active mask. We need to ensure that it is impossible * for us to miss the bit update in pm_active and * simultaneously observe a non-zero pm_gen in * pmap_activate_sw(), otherwise TLB update is missed. * Without the fence, IA32 allows such an outcome. Note that * pm_active is updated by a locked operation, which provides * the reciprocal fence. */ atomic_thread_fence_seq_cst(); } static void pmap_invalidate_page_pcid_invpcid(pmap_t pmap, vm_offset_t va) { pmap_invalidate_page_pcid(pmap, va, true); } static void pmap_invalidate_page_pcid_noinvpcid(pmap_t pmap, vm_offset_t va) { pmap_invalidate_page_pcid(pmap, va, false); } static void pmap_invalidate_page_nopcid(pmap_t pmap, vm_offset_t va) { } DEFINE_IFUNC(static, void, pmap_invalidate_page_mode, (pmap_t, vm_offset_t), static) { if (pmap_pcid_enabled) return (invpcid_works ? pmap_invalidate_page_pcid_invpcid : pmap_invalidate_page_pcid_noinvpcid); return (pmap_invalidate_page_nopcid); } void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); sched_pin(); - smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap); if (pmap == kernel_pmap) { invlpg(va); } else { if (pmap == PCPU_GET(curpmap)) invlpg(va); pmap_invalidate_page_mode(pmap, va); } + smp_masked_invlpg(pmap_invalidate_cpu_mask(pmap), va, pmap); sched_unpin(); } /* 4k PTEs -- Chosen to exceed the total size of Broadwell L2 TLB */ #define PMAP_INVLPG_THRESHOLD (4 * 1024 * PAGE_SIZE) static void pmap_invalidate_range_pcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, const bool invpcid_works1) { struct invpcid_descr d; uint64_t kcr3, ucr3; uint32_t pcid; u_int cpuid, i; cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { if (pmap->pm_ucr3 != PMAP_NO_CR3) { critical_enter(); pcid = pmap->pm_pcids[cpuid].pm_pcid; if (invpcid_works1) { d.pcid = pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = sva; for (; d.addr < eva; d.addr += PAGE_SIZE) invpcid(&d, INVPCID_ADDR); } else { kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 = pmap->pm_ucr3 | pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); } critical_exit(); } } else pmap->pm_pcids[cpuid].pm_gen = 0; CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } /* See the comment in pmap_invalidate_page_pcid(). */ atomic_thread_fence_seq_cst(); } static void pmap_invalidate_range_pcid_invpcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { pmap_invalidate_range_pcid(pmap, sva, eva, true); } static void pmap_invalidate_range_pcid_noinvpcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { pmap_invalidate_range_pcid(pmap, sva, eva, false); } static void pmap_invalidate_range_nopcid(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { } DEFINE_IFUNC(static, void, pmap_invalidate_range_mode, (pmap_t, vm_offset_t, vm_offset_t), static) { if (pmap_pcid_enabled) return (invpcid_works ? pmap_invalidate_range_pcid_invpcid : pmap_invalidate_range_pcid_noinvpcid); return (pmap_invalidate_range_nopcid); } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; if (eva - sva >= PMAP_INVLPG_THRESHOLD) { pmap_invalidate_all(pmap); return; } if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: invalid type %d", pmap->pm_type)); sched_pin(); - smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap); if (pmap == kernel_pmap) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } else { if (pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } pmap_invalidate_range_mode(pmap, sva, eva); } + smp_masked_invlpg_range(pmap_invalidate_cpu_mask(pmap), sva, eva, pmap); sched_unpin(); } static inline void pmap_invalidate_all_pcid(pmap_t pmap, bool invpcid_works1) { struct invpcid_descr d; uint64_t kcr3, ucr3; uint32_t pcid; u_int cpuid, i; if (pmap == kernel_pmap) { if (invpcid_works1) { bzero(&d, sizeof(d)); invpcid(&d, INVPCID_CTXGLOB); } else { invltlb_glob(); } } else { cpuid = PCPU_GET(cpuid); if (pmap == PCPU_GET(curpmap)) { critical_enter(); pcid = pmap->pm_pcids[cpuid].pm_pcid; if (invpcid_works1) { d.pcid = pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); if (pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid |= PMAP_PCID_USER_PT; invpcid(&d, INVPCID_CTX); } } else { kcr3 = pmap->pm_cr3 | pcid; ucr3 = pmap->pm_ucr3; if (ucr3 != PMAP_NO_CR3) { ucr3 |= pcid | PMAP_PCID_USER_PT; pmap_pti_pcid_invalidate(ucr3, kcr3); } else { load_cr3(kcr3); } } critical_exit(); } else pmap->pm_pcids[cpuid].pm_gen = 0; CPU_FOREACH(i) { if (cpuid != i) pmap->pm_pcids[i].pm_gen = 0; } } /* See the comment in pmap_invalidate_page_pcid(). */ atomic_thread_fence_seq_cst(); } static void pmap_invalidate_all_pcid_invpcid(pmap_t pmap) { pmap_invalidate_all_pcid(pmap, true); } static void pmap_invalidate_all_pcid_noinvpcid(pmap_t pmap) { pmap_invalidate_all_pcid(pmap, false); } static void pmap_invalidate_all_nopcid(pmap_t pmap) { if (pmap == kernel_pmap) invltlb_glob(); else if (pmap == PCPU_GET(curpmap)) invltlb(); } DEFINE_IFUNC(static, void, pmap_invalidate_all_mode, (pmap_t), static) { if (pmap_pcid_enabled) return (invpcid_works ? pmap_invalidate_all_pcid_invpcid : pmap_invalidate_all_pcid_noinvpcid); return (pmap_invalidate_all_nopcid); } void pmap_invalidate_all(pmap_t pmap) { if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: invalid type %d", pmap->pm_type)); sched_pin(); - smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap); pmap_invalidate_all_mode(pmap); + smp_masked_invltlb(pmap_invalidate_cpu_mask(pmap), pmap); sched_unpin(); } void pmap_invalidate_cache(void) { sched_pin(); wbinvd(); smp_cache_flush(); sched_unpin(); } struct pde_action { cpuset_t invalidate; /* processors that invalidate their TLB */ pmap_t pmap; vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; u_int store; /* processor that updates the PDE */ }; static void pmap_update_pde_action(void *arg) { struct pde_action *act = arg; if (act->store == PCPU_GET(cpuid)) pmap_update_pde_store(act->pmap, act->pde, act->newpde); } static void pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; if (CPU_ISSET(PCPU_GET(cpuid), &act->invalidate)) pmap_update_pde_invalidate(act->pmap, act->va, act->newpde); } /* * Change the page size for the specified virtual address in a way that * prevents any possibility of the TLB ever having two entries that map the * same virtual address using different page sizes. This is the recommended * workaround for Erratum 383 on AMD Family 10h processors. It prevents a * machine check exception for a TLB state that is improperly diagnosed as a * hardware error. */ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; cpuset_t active, other_cpus; u_int cpuid; sched_pin(); cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); if (pmap == kernel_pmap || pmap_type_guest(pmap)) active = all_cpus; else { active = pmap->pm_active; } if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpuid; act.invalidate = active; act.va = va; act.pmap = pmap; act.pde = pde; act.newpde = newpde; CPU_SET(cpuid, &active); smp_rendezvous_cpus(active, smp_no_rendezvous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pmap_update_pde_store(pmap, pde, newpde); if (CPU_ISSET(cpuid, &active)) pmap_update_pde_invalidate(pmap, va, newpde); } sched_unpin(); } #else /* !SMP */ /* * Normal, non-SMP, invalidation functions. */ void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { struct invpcid_descr d; uint64_t kcr3, ucr3; uint32_t pcid; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { invlpg(va); if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { critical_enter(); pcid = pmap->pm_pcids[0].pm_pcid; if (invpcid_works) { d.pcid = pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = va; invpcid(&d, INVPCID_ADDR); } else { kcr3 = pmap->pm_cr3 | pcid | CR3_PCID_SAVE; ucr3 = pmap->pm_ucr3 | pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlpg(ucr3, kcr3, va); } critical_exit(); } } else if (pmap_pcid_enabled) pmap->pm_pcids[0].pm_gen = 0; } void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct invpcid_descr d; vm_offset_t addr; uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_range: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); if (pmap == PCPU_GET(curpmap) && pmap_pcid_enabled && pmap->pm_ucr3 != PMAP_NO_CR3) { critical_enter(); if (invpcid_works) { d.pcid = pmap->pm_pcids[0].pm_pcid | PMAP_PCID_USER_PT; d.pad = 0; d.addr = sva; for (; d.addr < eva; d.addr += PAGE_SIZE) invpcid(&d, INVPCID_ADDR); } else { kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0]. pm_pcid | CR3_PCID_SAVE; ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[0]. pm_pcid | PMAP_PCID_USER_PT | CR3_PCID_SAVE; pmap_pti_pcid_invlrng(ucr3, kcr3, sva, eva); } critical_exit(); } } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } } void pmap_invalidate_all(pmap_t pmap) { struct invpcid_descr d; uint64_t kcr3, ucr3; if (pmap->pm_type == PT_RVI || pmap->pm_type == PT_EPT) { pmap->pm_eptgen++; return; } KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_all: unknown type %d", pmap->pm_type)); if (pmap == kernel_pmap) { if (pmap_pcid_enabled && invpcid_works) { bzero(&d, sizeof(d)); invpcid(&d, INVPCID_CTXGLOB); } else { invltlb_glob(); } } else if (pmap == PCPU_GET(curpmap)) { if (pmap_pcid_enabled) { critical_enter(); if (invpcid_works) { d.pcid = pmap->pm_pcids[0].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); if (pmap->pm_ucr3 != PMAP_NO_CR3) { d.pcid |= PMAP_PCID_USER_PT; invpcid(&d, INVPCID_CTX); } } else { kcr3 = pmap->pm_cr3 | pmap->pm_pcids[0].pm_pcid; if (pmap->pm_ucr3 != PMAP_NO_CR3) { ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[ 0].pm_pcid | PMAP_PCID_USER_PT; pmap_pti_pcid_invalidate(ucr3, kcr3); } else load_cr3(kcr3); } critical_exit(); } else { invltlb(); } } else if (pmap_pcid_enabled) { pmap->pm_pcids[0].pm_gen = 0; } } PMAP_INLINE void pmap_invalidate_cache(void) { wbinvd(); } static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pmap_update_pde_store(pmap, pde, newpde); if (pmap == kernel_pmap || pmap == PCPU_GET(curpmap)) pmap_update_pde_invalidate(pmap, va, newpde); else pmap->pm_pcids[0].pm_gen = 0; } #endif /* !SMP */ static void pmap_invalidate_pde_page(pmap_t pmap, vm_offset_t va, pd_entry_t pde) { /* * When the PDE has PG_PROMOTED set, the 2MB page mapping was created * by a promotion that did not invalidate the 512 4KB page mappings * that might exist in the TLB. Consequently, at this point, the TLB * may hold both 4KB and 2MB page mappings for the address range [va, * va + NBPDR). Therefore, the entire range must be invalidated here. * In contrast, when PG_PROMOTED is clear, the TLB will not hold any * 4KB page mappings for the address range [va, va + NBPDR), and so a * single INVLPG suffices to invalidate the 2MB page mapping from the * TLB. */ if ((pde & PG_PROMOTED) != 0) pmap_invalidate_range(pmap, va, va + NBPDR - 1); else pmap_invalidate_page(pmap, va); } DEFINE_IFUNC(, void, pmap_invalidate_cache_range, (vm_offset_t sva, vm_offset_t eva), static) { if ((cpu_feature & CPUID_SS) != 0) return (pmap_invalidate_cache_range_selfsnoop); if ((cpu_feature & CPUID_CLFSH) != 0) return (pmap_force_invalidate_cache_range); return (pmap_invalidate_cache_range_all); } #define PMAP_CLFLUSH_THRESHOLD (2 * 1024 * 1024) static void pmap_invalidate_cache_range_check_align(vm_offset_t sva, vm_offset_t eva) { KASSERT((sva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: sva not page-aligned")); KASSERT((eva & PAGE_MASK) == 0, ("pmap_invalidate_cache_range: eva not page-aligned")); } static void pmap_invalidate_cache_range_selfsnoop(vm_offset_t sva, vm_offset_t eva) { pmap_invalidate_cache_range_check_align(sva, eva); } void pmap_force_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) { sva &= ~(vm_offset_t)(cpu_clflush_line_size - 1); if (eva - sva >= PMAP_CLFLUSH_THRESHOLD) { /* * The supplied range is bigger than 2MB. * Globally invalidate cache. */ pmap_invalidate_cache(); return; } /* * XXX: Some CPUs fault, hang, or trash the local APIC * registers if we use CLFLUSH on the local APIC range. The * local APIC is always uncached, so we don't need to flush * for that range anyway. */ if (pmap_kextract(sva) == lapic_paddr) return; if ((cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0) { /* * Do per-cache line flush. Use the sfence * instruction to insure that previous stores are * included in the write-back. The processor * propagates flush to other processors in the cache * coherence domain. */ sfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflushopt(sva); sfence(); } else { /* * Writes are ordered by CLFLUSH on Intel CPUs. */ if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (; sva < eva; sva += cpu_clflush_line_size) clflush(sva); if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } } static void pmap_invalidate_cache_range_all(vm_offset_t sva, vm_offset_t eva) { pmap_invalidate_cache_range_check_align(sva, eva); pmap_invalidate_cache(); } /* * Remove the specified set of pages from the data and instruction caches. * * In contrast to pmap_invalidate_cache_range(), this function does not * rely on the CPU's self-snoop feature, because it is intended for use * when moving pages into a different cache domain. */ void pmap_invalidate_cache_pages(vm_page_t *pages, int count) { vm_offset_t daddr, eva; int i; bool useclflushopt; useclflushopt = (cpu_stdext_feature & CPUID_STDEXT_CLFLUSHOPT) != 0; if (count >= PMAP_CLFLUSH_THRESHOLD / PAGE_SIZE || ((cpu_feature & CPUID_CLFSH) == 0 && !useclflushopt)) pmap_invalidate_cache(); else { if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); for (i = 0; i < count; i++) { daddr = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pages[i])); eva = daddr + PAGE_SIZE; for (; daddr < eva; daddr += cpu_clflush_line_size) { if (useclflushopt) clflushopt(daddr); else clflush(daddr); } } if (useclflushopt) sfence(); else if (cpu_vendor_id != CPU_VENDOR_INTEL) mfence(); } } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte, PG_V; vm_paddr_t pa; pa = 0; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { if ((*pdpe & PG_PS) != 0) pa = (*pdpe & PG_PS_FRAME) | (va & PDPMASK); else { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & PG_V) != 0) { if ((*pde & PG_PS) != 0) { pa = (*pde & PG_PS_FRAME) | (va & PDRMASK); } else { pte = pmap_pde_to_pte(pde, va); pa = (*pte & PG_FRAME) | (va & PAGE_MASK); } } } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pd_entry_t pde, *pdep; pt_entry_t pte, PG_RW, PG_V; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, va); if (pdep != NULL && (pde = *pdep)) { if (pde & PG_PS) { if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | (va & PDRMASK), &pa)) goto retry; m = PHYS_TO_VM_PAGE(pa); } } else { pte = *pmap_pde_to_pte(pdep, va); if ((pte & PG_V) && ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) goto retry; m = PHYS_TO_VM_PAGE(pa); } } if (m != NULL) vm_page_hold(m); } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t pde; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { pde = *vtopde(va); if (pde & PG_PS) { pa = (pde & PG_PS_FRAME) | (va & PDRMASK); } else { /* * Beware of a concurrent promotion that changes the * PDE at this point! For example, vtopte() must not * be used to access the PTE because it would use the * new PDE. It is, however, safe to use the old PDE * because the page table page is preserved by the * promotion. */ pa = *pmap_pde_to_pte(&pde, va); pa = (pa & PG_FRAME) | (va & PAGE_MASK); } } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ /* * Add a wired page to the kva. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kenter(vm_offset_t va, vm_paddr_t pa) { pt_entry_t *pte; pte = vtopte(va); pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g); } static __inline void pmap_kenter_attr(vm_offset_t va, vm_paddr_t pa, int mode) { pt_entry_t *pte; int cache_bits; pte = vtopte(va); cache_bits = pmap_cache_bits(kernel_pmap, mode, 0); pte_store(pte, pa | X86_PG_RW | X86_PG_V | pg_g | cache_bits); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *pte; pte = vtopte(va); pte_clear(pte); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *endpte, oldpte, pa, *pte; vm_page_t m; int cache_bits; oldpte = 0; pte = vtopte(sva); endpte = pte + count; while (pte < endpte) { m = *ma++; cache_bits = pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0); pa = VM_PAGE_TO_PHYS(m) | cache_bits; if ((*pte & (PG_FRAME | X86_PG_PTE_CACHE)) != pa) { oldpte |= *pte; pte_store(pte, pa | pg_g | pg_nx | X86_PG_RW | X86_PG_V); } pte++; } if (__predict_false((oldpte & X86_PG_V) != 0)) pmap_invalidate_range(kernel_pmap, sva, sva + count * PAGE_SIZE); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { vm_offset_t va; va = sva; while (count-- > 0) { KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va)); pmap_kremove(va); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Inserts the specified page table page into the specified pmap's collection * of idle page table pages. Each of a pmap's page table pages is responsible * for mapping a distinct range of virtual addresses. The pmap's collection is * ordered by this virtual address range. */ static __inline int pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_insert(&pmap->pm_root, mpte)); } /* * Removes the page table page mapping the specified virtual address from the * specified pmap's collection of idle page table pages, and returns it. * Otherwise, returns NULL if there is no page table page corresponding to the * specified virtual address. */ static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); return (vm_radix_remove(&pmap->pm_root, pmap_pde_pindex(va))); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_ptp(pmap, va, m, free); return (TRUE); } else return (FALSE); } static void _pmap_unwire_ptp(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= (NUPDE + NUPDPE)) { /* PDP page */ pml4_entry_t *pml4; pml4 = pmap_pml4e(pmap, va); *pml4 = 0; if (pmap->pm_pml4u != NULL && va <= VM_MAXUSER_ADDRESS) { pml4 = &pmap->pm_pml4u[pmap_pml4e_index(va)]; *pml4 = 0; } } else if (m->pindex >= NUPDE) { /* PD page */ pdp_entry_t *pdp; pdp = pmap_pdpe(pmap, va); *pdp = 0; } else { /* PTE page */ pd_entry_t *pd; pd = pmap_pde(pmap, va); *pd = 0; } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; pdpg = PHYS_TO_VM_PAGE(*pmap_pdpe(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdpg, free); } if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) { /* We just released a PD, unhold the matching PDP */ vm_page_t pdppg; pdppg = PHYS_TO_VM_PAGE(*pmap_pml4e(pmap, va) & PG_FRAME); pmap_unwire_ptp(pmap, va, pdppg, free); } /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing a page table entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); mpte = PHYS_TO_VM_PAGE(ptepde & PG_FRAME); return (pmap_unwire_ptp(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { int i; PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_pml4u = NULL; pmap->pm_cr3 = KPML4phys; /* hack to keep pmap_pti_pcid_invalidate() alive */ pmap->pm_ucr3 = PMAP_NO_CR3; pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_flags = pmap_flags; CPU_FOREACH(i) { pmap->pm_pcids[i].pm_pcid = PMAP_PCID_KERN + 1; pmap->pm_pcids[i].pm_gen = 1; } pmap_activate_boot(pmap); } void pmap_pinit_pml4(vm_page_t pml4pg) { pml4_entry_t *pm_pml4; int i; pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); /* Wire in kernel global address entries. */ for (i = 0; i < NKPML4E; i++) { pm_pml4[KPML4BASE + i] = (KPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V; } for (i = 0; i < ndmpdpphys; i++) { pm_pml4[DMPML4I + i] = (DMPDPphys + ptoa(i)) | X86_PG_RW | X86_PG_V; } /* install self-referential address mapping entry(s) */ pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; } static void pmap_pinit_pml4_pti(vm_page_t pml4pg) { pml4_entry_t *pm_pml4; int i; pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg)); for (i = 0; i < NPML4EPG; i++) pm_pml4[i] = pti_pml4[i]; } /* * Initialize a preallocated and zeroed pmap structure, * such as one in a vmspace structure. */ int pmap_pinit_type(pmap_t pmap, enum pmap_type pm_type, int flags) { vm_page_t pml4pg, pml4pgu; vm_paddr_t pml4phys; int i; /* * allocate the page directory page */ pml4pg = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO | VM_ALLOC_WAITOK); pml4phys = VM_PAGE_TO_PHYS(pml4pg); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(pml4phys); CPU_FOREACH(i) { pmap->pm_pcids[i].pm_pcid = PMAP_PCID_NONE; pmap->pm_pcids[i].pm_gen = 0; } pmap->pm_cr3 = PMAP_NO_CR3; /* initialize to an invalid value */ pmap->pm_ucr3 = PMAP_NO_CR3; pmap->pm_pml4u = NULL; pmap->pm_type = pm_type; if ((pml4pg->flags & PG_ZERO) == 0) pagezero(pmap->pm_pml4); /* * Do not install the host kernel mappings in the nested page * tables. These mappings are meaningless in the guest physical * address space. * Install minimal kernel mappings in PTI case. */ if (pm_type == PT_X86) { pmap->pm_cr3 = pml4phys; pmap_pinit_pml4(pml4pg); if (pti) { pml4pgu = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_WAITOK); pmap->pm_pml4u = (pml4_entry_t *)PHYS_TO_DMAP( VM_PAGE_TO_PHYS(pml4pgu)); pmap_pinit_pml4_pti(pml4pgu); pmap->pm_ucr3 = VM_PAGE_TO_PHYS(pml4pgu); } } pmap->pm_root.rt_root = 0; CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); pmap->pm_flags = flags; pmap->pm_eptgen = 0; return (1); } int pmap_pinit(pmap_t pmap) { return (pmap_pinit_type(pmap, PT_X86, pmap_flags)); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, pdppg, pdpg; pt_entry_t PG_A, PG_M, PG_RW, PG_V; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); PMAP_ASSERT_NOT_IN_DI(); vm_wait(NULL); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= (NUPDE + NUPDPE)) { pml4_entry_t *pml4, *pml4u; vm_pindex_t pml4index; /* Wire up a new PDPE page */ pml4index = ptepindex - (NUPDE + NUPDPE); pml4 = &pmap->pm_pml4[pml4index]; *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; if (pmap->pm_pml4u != NULL && pml4index < NUPML4E) { /* * PTI: Make all user-space mappings in the * kernel-mode page table no-execute so that * we detect any programming errors that leave * the kernel-mode page table active on return * to user space. */ if (pmap->pm_ucr3 != PMAP_NO_CR3) *pml4 |= pg_nx; pml4u = &pmap->pm_pml4u[pml4index]; *pml4u = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } } else if (ptepindex >= NUPDE) { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; /* Wire up a new PDE page */ pdpindex = ptepindex - NUPDE; pml4index = pdpindex >> NPML4EPGSHIFT; pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pdp, recurse */ if (_pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to pdp page */ pdppg = PHYS_TO_VM_PAGE(*pml4 & PG_FRAME); pdppg->wire_count++; } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); /* Now find the pdp page */ pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } else { vm_pindex_t pml4index; vm_pindex_t pdpindex; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pd; /* Wire up a new PTE page */ pdpindex = ptepindex >> NPDPEPGSHIFT; pml4index = pdpindex >> NPML4EPGSHIFT; /* First, find the pdp and check that its valid. */ pml4 = &pmap->pm_pml4[pml4index]; if ((*pml4 & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; } else { pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME); pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)]; if ((*pdp & PG_V) == 0) { /* Have to allocate a new pd, recurse */ if (_pmap_allocpte(pmap, NUPDE + pdpindex, lockp) == NULL) { vm_page_unwire_noq(m); vm_page_free_zero(m); return (NULL); } } else { /* Add reference to the pd page */ pdpg = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); pdpg->wire_count++; } } pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME); /* Now we know where the page directory page is */ pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)]; *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M; } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_allocpde(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t pdpindex, ptepindex; pdp_entry_t *pdpe, PG_V; vm_page_t pdpg; PG_V = pmap_valid_bit(pmap); retry: pdpe = pmap_pdpe(pmap, va); if (pdpe != NULL && (*pdpe & PG_V) != 0) { /* Add a reference to the pd page. */ pdpg = PHYS_TO_VM_PAGE(*pdpe & PG_FRAME); pdpg->wire_count++; } else { /* Allocate a pd page. */ ptepindex = pmap_pde_pindex(va); pdpindex = ptepindex >> NPDPEPGSHIFT; pdpg = _pmap_allocpte(pmap, NUPDE + pdpindex, lockp); if (pdpg == NULL && lockp != NULL) goto retry; } return (pdpg); } static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *pd, PG_V; vm_page_t m; PG_V = pmap_valid_bit(pmap); /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); retry: /* * Get the page directory entry */ pd = pmap_pde(pmap, va); /* * This supports switching from a 2MB page to a * normal 4K page. */ if (pd != NULL && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) { if (!pmap_demote_pde_locked(pmap, pd, va, lockp)) { /* * Invalidation of the 2MB page mapping may have caused * the deallocation of the underlying PD page. */ pd = NULL; } } /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (pd != NULL && (*pd & PG_V) != 0) { m = PHYS_TO_VM_PAGE(*pd & PG_FRAME); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_allocpte(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; int i; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); KASSERT(vm_radix_is_empty(&pmap->pm_root), ("pmap_release: pmap has reserved page table page(s)")); KASSERT(CPU_EMPTY(&pmap->pm_active), ("releasing active pmap %p", pmap)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4)); for (i = 0; i < NKPML4E; i++) /* KVA */ pmap->pm_pml4[KPML4BASE + i] = 0; for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ vm_page_unwire_noq(m); vm_page_free_zero(m); if (pmap->pm_pml4u != NULL) { m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4u)); vm_page_unwire_noq(m); vm_page_free(m); } } static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *pde, newpdir; pdp_entry_t *pdpe; mtx_assert(&kernel_map->system_mtx, MA_OWNED); /* * Return if "addr" is within the range of kernel page table pages * that were preallocated during pmap bootstrap. Moreover, leave * "kernel_vm_end" and the kernel page table as they were. * * The correctness of this action is based on the following * argument: vm_map_insert() allocates contiguous ranges of the * kernel virtual address space. It calls this function if a range * ends after "kernel_vm_end". If the kernel is mapped between * "kernel_vm_end" and "addr", then the range cannot begin at * "kernel_vm_end". In fact, its beginning address cannot be less * than the kernel. Thus, there is no immediate need to allocate * any new kernel page table pages between "kernel_vm_end" and * "KERNBASE". */ if (KERNBASE < addr && addr <= KERNBASE + nkpt * NBPDR) return; addr = roundup2(addr, NBPDR); if (addr - 1 >= vm_map_max(kernel_map)) addr = vm_map_max(kernel_map); while (kernel_vm_end < addr) { pdpe = pmap_pdpe(kernel_pmap, kernel_vm_end); if ((*pdpe & X86_PG_V) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); *pdpe = (pdp_entry_t)(paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M); continue; /* try again */ } pde = pmap_pdpe_to_pde(pdpe, kernel_vm_end); if ((*pde & X86_PG_V) != 0) { kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } continue; } nkpg = vm_page_alloc(NULL, pmap_pde_pindex(kernel_vm_end), VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); newpdir = paddr | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; pde_store(pde, newpdir); kernel_vm_end = (kernel_vm_end + NBPDR) & ~PDRMASK; if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { kernel_vm_end = vm_map_max(kernel_map); break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif static void reclaim_pv_chunk_leave_pmap(pmap_t pmap, pmap_t locked_pmap, bool start_di) { if (pmap == NULL) return; pmap_invalidate_all(pmap); if (pmap != locked_pmap) PMAP_UNLOCK(pmap); if (start_di) pmap_delayed_invl_finished(); } /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { struct pv_chunk *pc, *pc_marker, *pc_marker_end; struct pv_chunk_header pc_marker_b, pc_marker_end_b; struct md_page *pvh; pd_entry_t *pde; pmap_t next_pmap, pmap; pt_entry_t *pte, tpte; pt_entry_t PG_G, PG_A, PG_M, PG_RW; pv_entry_t pv; vm_offset_t va; vm_page_t m, m_pc; struct spglist free; uint64_t inuse; int bit, field, freed; bool start_di; static int active_reclaims = 0; PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); pmap = NULL; m_pc = NULL; PG_G = PG_A = PG_M = PG_RW = 0; SLIST_INIT(&free); bzero(&pc_marker_b, sizeof(pc_marker_b)); bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); pc_marker = (struct pv_chunk *)&pc_marker_b; pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; /* * A delayed invalidation block should already be active if * pmap_advise() or pmap_remove() called this function by way * of pmap_demote_pde_locked(). */ start_di = pmap_not_in_di(); mtx_lock(&pv_chunks_mutex); active_reclaims++; TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && SLIST_EMPTY(&free)) { next_pmap = pc->pc_pmap; if (next_pmap == NULL) { /* * The next chunk is a marker. However, it is * not our marker, so active_reclaims must be * > 1. Consequently, the next_chunk code * will not rotate the pv_chunks list. */ goto next_chunk; } mtx_unlock(&pv_chunks_mutex); /* * A pv_chunk can only be removed from the pc_lru list * when both pc_chunks_mutex is owned and the * corresponding pmap is locked. */ if (pmap != next_pmap) { reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di); pmap = next_pmap; /* Avoid deadlock and lock recursion. */ if (pmap > locked_pmap) { RELEASE_PV_LIST_LOCK(lockp); PMAP_LOCK(pmap); if (start_di) pmap_delayed_invl_started(); mtx_lock(&pv_chunks_mutex); continue; } else if (pmap != locked_pmap) { if (PMAP_TRYLOCK(pmap)) { if (start_di) pmap_delayed_invl_started(); mtx_lock(&pv_chunks_mutex); continue; } else { pmap = NULL; /* pmap is not locked */ mtx_lock(&pv_chunks_mutex); pc = TAILQ_NEXT(pc_marker, pc_lru); if (pc == NULL || pc->pc_pmap != next_pmap) continue; goto next_chunk; } } else if (start_di) pmap_delayed_invl_started(); PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); } /* * Destroy every non-wired, 4 KB page mapping in the chunk. */ freed = 0; for (field = 0; field < _NPCM; field++) { for (inuse = ~pc->pc_map[field] & pc_freemask[field]; inuse != 0; inuse &= ~(1UL << bit)) { bit = bsfq(inuse); pv = &pc->pc_pventry[field * 64 + bit]; va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_PS) != 0) continue; pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_W) != 0) continue; tpte = pte_load_clear(pte); if ((tpte & PG_G) != 0) pmap_invalidate_page(pmap, va); m = PHYS_TO_VM_PAGE(tpte & PG_FRAME); if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((tpte & PG_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) { vm_page_aflag_clear(m, PGA_WRITEABLE); } } pmap_delayed_invl_page(m); pc->pc_map[field] |= 1UL << bit; pmap_unuse_pt(pmap, va, *pde, &free); freed++; } } if (freed == 0) { mtx_lock(&pv_chunks_mutex); goto next_chunk; } /* Every freed mapping is for a 4 KB page. */ pmap_resident_count_dec(pmap, freed); PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && pc->pc_map[2] == PC_FREE2) { PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* Entire chunk is free; return it. */ m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m_pc->phys_addr); mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); break; } TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); mtx_lock(&pv_chunks_mutex); /* One freed pv entry in locked_pmap is sufficient. */ if (pmap == locked_pmap) break; next_chunk: TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); if (active_reclaims == 1 && pmap != NULL) { /* * Rotate the pv chunks list so that we do not * scan the same pv chunks that could not be * freed (because they contained a wired * and/or superpage mapping) on every * invocation of reclaim_pv_chunk(). */ while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { MPASS(pc->pc_pmap != NULL); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); } } } TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); active_reclaims--; mtx_unlock(&pv_chunks_mutex); reclaim_pv_chunk_leave_pmap(pmap, locked_pmap, start_di); if (m_pc == NULL && !SLIST_EMPTY(&free)) { m_pc = SLIST_FIRST(&free); SLIST_REMOVE_HEAD(&free, plinks.s.ss); /* Recycle a freed page table page. */ m_pc->wire_count = 1; } vm_page_free_pages_toq(&free, true); return (m_pc); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); dump_drop_page(m->phys_addr); vm_page_unwire(m, PQ_NONE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = bsfq(pc->pc_map[field]); break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * Returns the number of one bits within the given PV chunk map. * * The erratas for Intel processors state that "POPCNT Instruction May * Take Longer to Execute Than Expected". It is believed that the * issue is the spurious dependency on the destination register. * Provide a hint to the register rename logic that the destination * value is overwritten, by clearing it, as suggested in the * optimization manual. It should be cheap for unaffected processors * as well. * * Reference numbers for erratas are * 4th Gen Core: HSD146 * 5th Gen Core: BDM85 * 6th Gen Core: SKL029 */ static int popcnt_pc_map_pq(uint64_t *map) { u_long result, tmp; __asm __volatile("xorl %k0,%k0;popcntq %2,%0;" "xorl %k1,%k1;popcntq %3,%1;addl %k1,%k0;" "xorl %k1,%k1;popcntq %4,%1;addl %k1,%k0" : "=&r" (result), "=&r" (tmp) : "m" (map[0]), "m" (map[1]), "m" (map[2])); return (result); } /* * Ensure that the number of spare PV entries in the specified pmap meets or * exceeds the given count, "needed". * * The given PV list lock may be released. */ static void reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) { struct pch new_tail; struct pv_chunk *pc; vm_page_t m; int avail, free; bool reclaimed; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); /* * Newly allocated PV chunks must be stored in a private list until * the required number of PV chunks have been allocated. Otherwise, * reclaim_pv_chunk() could recycle one of these chunks. In * contrast, these chunks must be added to the pmap upon allocation. */ TAILQ_INIT(&new_tail); retry: avail = 0; TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { #ifndef __POPCNT__ if ((cpu_feature2 & CPUID2_POPCNT) == 0) bit_count((bitstr_t *)pc->pc_map, 0, sizeof(pc->pc_map) * NBBY, &free); else #endif free = popcnt_pc_map_pq(pc->pc_map); if (free == 0) break; avail += free; if (avail >= needed) break; } for (reclaimed = false; avail < needed; avail += _NPCPV) { m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; reclaimed = true; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); dump_add_page(m->phys_addr); pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0; pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); /* * The reclaim might have freed a chunk from the current pmap. * If that chunk contained available entries, we need to * re-count the number of available entries. */ if (reclaimed) goto retry; } if (!TAILQ_EMPTY(&new_tail)) { mtx_lock(&pv_chunks_mutex); TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); mtx_unlock(&pv_chunks_mutex); } } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * After demotion from a 2MB page mapping to 512 4KB page mappings, * destroy the pv entry for the 2MB page mapping and reinstantiate the pv * entries for each of the 4KB page mappings. */ static void pmap_pv_demote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; struct pv_chunk *pc; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; int bit, field; PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((pa & PDRMASK) == 0, ("pmap_pv_demote_pde: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the 2mpage's pv entry for this mapping to the first * page's pv list. Once this transfer begins, the pv list lock * must not be released until the last pv entry is reinstantiated. */ pvh = pa_to_pvh(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pv_demote_pde: pv not found")); m = PHYS_TO_VM_PAGE(pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; /* Instantiate the remaining NPTEPG - 1 pv entries. */ PV_STAT(atomic_add_long(&pv_entry_allocs, NPTEPG - 1)); va_last = va + NBPDR - PAGE_SIZE; for (;;) { pc = TAILQ_FIRST(&pmap->pm_pvchunk); KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || pc->pc_map[2] != 0, ("pmap_pv_demote_pde: missing spare")); for (field = 0; field < _NPCM; field++) { while (pc->pc_map[field]) { bit = bsfq(pc->pc_map[field]); pc->pc_map[field] &= ~(1ul << bit); pv = &pc->pc_pventry[field * 64 + bit]; va += PAGE_SIZE; pv->pv_va = va; m++; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_pv_demote_pde: page %p is not managed", m)); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (va == va_last) goto out; } } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } out: if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, NPTEPG - 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, NPTEPG - 1)); } #if VM_NRESERVLEVEL > 0 /* * After promotion from 512 4KB page mappings to a single 2MB page mapping, * replace the many pv entries for the 4KB page mappings by a single pv entry * for the 2MB page mapping. */ static void pmap_pv_promote_pde(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_offset_t va_last; vm_page_t m; KASSERT((pa & PDRMASK) == 0, ("pmap_pv_promote_pde: pa is not 2mpage aligned")); CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); /* * Transfer the first page's pv entry for this mapping to the 2mpage's * pv list. Aside from avoiding the cost of a call to get_pv_entry(), * a transfer avoids the possibility that get_pv_entry() calls * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the * mappings that is being promoted. */ m = PHYS_TO_VM_PAGE(pa); va = trunc_2mpage(va); pv = pmap_pvh_remove(&m->md, pmap, va); KASSERT(pv != NULL, ("pmap_pv_promote_pde: pv not found")); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; /* Free the remaining NPTEPG - 1 pv entries. */ va_last = va + NBPDR - PAGE_SIZE; do { m++; va += PAGE_SIZE; pmap_pvh_free(&m->md, pmap, va); } while (va < va_last); } #endif /* VM_NRESERVLEVEL > 0 */ /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * Create the PV entry for a 2MB page mapping. Always returns true unless the * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns * false if the PV entry cannot be allocated without resorting to reclamation. */ static bool pmap_pv_insert_pde(pmap_t pmap, vm_offset_t va, pd_entry_t pde, u_int flags, struct rwlock **lockp) { struct md_page *pvh; pv_entry_t pv; vm_paddr_t pa; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? NULL : lockp)) == NULL) return (false); pv->pv_va = va; pa = pde & PG_PS_FRAME; CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); pvh = pa_to_pvh(pa); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; return (true); } /* * Fills a page table page with mappings to consecutive physical pages. */ static void pmap_fill_ptp(pt_entry_t *firstpte, pt_entry_t newpte) { pt_entry_t *pte; for (pte = firstpte; pte < firstpte + NPTEPG; pte++) { *pte = newpte; newpte += PAGE_SIZE; } } /* * Tries to demote a 2MB page mapping. If demotion fails, the 2MB page * mapping is invalidated. */ static boolean_t pmap_demote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { struct rwlock *lock; boolean_t rv; lock = NULL; rv = pmap_demote_pde_locked(pmap, pde, va, &lock); if (lock != NULL) rw_wunlock(lock); return (rv); } static boolean_t pmap_demote_pde_locked(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { pd_entry_t newpde, oldpde; pt_entry_t *firstpte, newpte; pt_entry_t PG_A, PG_G, PG_M, PG_RW, PG_V; vm_paddr_t mptepa; vm_page_t mpte; struct spglist free; vm_offset_t sva; int PG_PTE_CACHE; PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpde = *pde; KASSERT((oldpde & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pde: oldpde is missing PG_PS and/or PG_V")); if ((oldpde & PG_A) == 0 || (mpte = pmap_remove_pt_page(pmap, va)) == NULL) { KASSERT((oldpde & PG_W) == 0, ("pmap_demote_pde: page table page for a wired mapping" " is missing")); /* * Invalidate the 2MB page mapping and return "failure" if the * mapping was never accessed or the allocation of the new * page table page fails. If the 2MB page mapping belongs to * the direct map region of the kernel's address space, then * the page allocation request specifies the highest possible * priority (VM_ALLOC_INTERRUPT). Otherwise, the priority is * normal. Page table pages are preallocated for every other * part of the kernel address space, so the direct map region * is the only part of the kernel address space that must be * handled here. */ if ((oldpde & PG_A) == 0 || (mpte = vm_page_alloc(NULL, pmap_pde_pindex(va), (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { SLIST_INIT(&free); sva = trunc_2mpage(va); pmap_remove_pde(pmap, pde, sva, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, sva, oldpde); vm_page_free_pages_toq(&free, true); CTR2(KTR_PMAP, "pmap_demote_pde: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } if (va < VM_MAXUSER_ADDRESS) pmap_resident_count_inc(pmap, 1); } mptepa = VM_PAGE_TO_PHYS(mpte); firstpte = (pt_entry_t *)PHYS_TO_DMAP(mptepa); newpde = mptepa | PG_M | PG_A | (oldpde & PG_U) | PG_RW | PG_V; KASSERT((oldpde & PG_A) != 0, ("pmap_demote_pde: oldpde is missing PG_A")); KASSERT((oldpde & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pde: oldpde is missing PG_M")); newpte = oldpde & ~PG_PS; newpte = pmap_swap_pat(pmap, newpte); /* * If the page table page is new, initialize it. */ if (mpte->wire_count == 1) { mpte->wire_count = NPTEPG; pmap_fill_ptp(firstpte, newpte); } KASSERT((*firstpte & PG_FRAME) == (newpte & PG_FRAME), ("pmap_demote_pde: firstpte and newpte map different physical" " addresses")); /* * If the mapping has changed attributes, update the page table * entries. */ if ((*firstpte & PG_PTE_PROMOTE) != (newpte & PG_PTE_PROMOTE)) pmap_fill_ptp(firstpte, newpte); /* * The spare PV entries must be reserved prior to demoting the * mapping, that is, prior to changing the PDE. Otherwise, the state * of the PDE and the PV lists will be inconsistent, which can result * in reclaim_pv_chunk() attempting to remove a PV entry from the * wrong PV list and pmap_pv_demote_pde() failing to find the expected * PV entry for the 2MB page mapping that is being demoted. */ if ((oldpde & PG_MANAGED) != 0) reserve_pv_entries(pmap, NPTEPG - 1, lockp); /* * Demote the mapping. This pmap is locked. The old PDE has * PG_A set. If the old PDE has PG_RW set, it also has PG_M * set. Thus, there is no danger of a race with another * processor changing the setting of PG_A and/or PG_M between * the read above and the store below. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. */ if (va >= VM_MAXUSER_ADDRESS) pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); /* * Demote the PV entry. */ if ((oldpde & PG_MANAGED) != 0) pmap_pv_demote_pde(pmap, va, oldpde & PG_PS_FRAME, lockp); atomic_add_long(&pmap_pde_demotions, 1); CTR2(KTR_PMAP, "pmap_demote_pde: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * pmap_remove_kernel_pde: Remove a kernel superpage mapping. */ static void pmap_remove_kernel_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va) { pd_entry_t newpde; vm_paddr_t mptepa; vm_page_t mpte; KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); PMAP_LOCK_ASSERT(pmap, MA_OWNED); mpte = pmap_remove_pt_page(pmap, va); if (mpte == NULL) panic("pmap_remove_kernel_pde: Missing pt page."); mptepa = VM_PAGE_TO_PHYS(mpte); newpde = mptepa | X86_PG_M | X86_PG_A | X86_PG_RW | X86_PG_V; /* * Initialize the page table page. */ pagezero((void *)PHYS_TO_DMAP(mptepa)); /* * Demote the mapping. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, newpde); else pde_store(pde, newpde); /* * Invalidate a stale recursive mapping of the page table page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopte(va)); } /* * pmap_remove_pde: do the things to unmap a superpage in a process */ static int pmap_remove_pde(pmap_t pmap, pd_entry_t *pdq, vm_offset_t sva, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pd_entry_t oldpde; vm_offset_t eva, va; vm_page_t m, mpte; pt_entry_t PG_G, PG_A, PG_M, PG_RW; PG_G = pmap_global_bit(pmap); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_remove_pde: sva is not 2mpage aligned")); oldpde = pte_load_clear(pdq); if (oldpde & PG_W) pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; if ((oldpde & PG_G) != 0) pmap_invalidate_pde_page(kernel_pmap, sva, oldpde); pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); if (oldpde & PG_MANAGED) { CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, oldpde & PG_PS_FRAME); pvh = pa_to_pvh(oldpde & PG_PS_FRAME); pmap_pvh_free(pvh, pmap, sva); eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) { if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpde & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); if (TAILQ_EMPTY(&m->md.pv_list) && TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_delayed_invl_page(m); } } if (pmap == kernel_pmap) { pmap_remove_kernel_pde(pmap, pdq, sva); } else { mpte = pmap_remove_pt_page(pmap, sva); if (mpte != NULL) { pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pde: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, free, FALSE); } } return (pmap_unuse_pt(pmap, sva, *pmap_pdpe(pmap, sva), free)); } /* * pmap_remove_pte: do the things to unmap a page in a process */ static int pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp) { struct md_page *pvh; pt_entry_t oldpte, PG_A, PG_M, PG_RW; vm_page_t m; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpte = pte_load_clear(ptq); if (oldpte & PG_W) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (oldpte & PG_MANAGED) { m = PHYS_TO_VM_PAGE(oldpte & PG_FRAME); if ((oldpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if (oldpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); if (TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } pmap_delayed_invl_page(m); } return (pmap_unuse_pt(pmap, va, ptepde, free)); } /* * Remove a single page from a process address space */ static void pmap_remove_page(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, struct spglist *free) { struct rwlock *lock; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((*pde & PG_V) == 0) return; pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_V) == 0) return; lock = NULL; pmap_remove_pte(pmap, pte, va, *pde, free, &lock); if (lock != NULL) rw_wunlock(lock); pmap_invalidate_page(pmap, va); } /* * Removes the specified range of addresses from the page table page. */ static bool pmap_remove_ptes(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, pd_entry_t *pde, struct spglist *free, struct rwlock **lockp) { pt_entry_t PG_G, *pte; vm_offset_t va; bool anyvalid; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PG_G = pmap_global_bit(pmap); anyvalid = false; va = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != eva; pte++, sva += PAGE_SIZE) { if (*pte == 0) { if (va != eva) { pmap_invalidate_range(pmap, va, sva); va = eva; } continue; } if ((*pte & PG_G) == 0) anyvalid = true; else if (va == eva) va = sva; if (pmap_remove_pte(pmap, pte, sva, *pde, free, lockp)) { sva += PAGE_SIZE; break; } } if (va != eva) pmap_invalidate_range(pmap, va, sva); return (anyvalid); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t PG_G, PG_V; struct spglist free; int anyvalid; PG_G = pmap_global_bit(pmap); PG_V = pmap_valid_bit(pmap); /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); pmap_delayed_invl_started(); PMAP_LOCK(pmap); /* * special handling of removing one page. a very * common operation and easy to short circuit some * code. */ if (sva + PAGE_SIZE == eva) { pde = pmap_pde(pmap, sva); if (pde && (*pde & PG_PS) == 0) { pmap_remove_page(pmap, sva, pde, &free); goto out; } } lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we removing the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_remove_pde(). */ if ((ptpaddr & PG_G) == 0) anyvalid = 1; pmap_remove_pde(pmap, pde, sva, &free, &lock); continue; } else if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { /* The large page mapping was destroyed. */ continue; } else ptpaddr = *pde; } /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; if (pmap_remove_ptes(pmap, sva, va_next, pde, &free, &lock)) anyvalid = 1; } if (lock != NULL) rw_wunlock(lock); out: if (anyvalid) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_delayed_invl_finished(); vm_page_free_pages_toq(&free, true); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { struct md_page *pvh; pv_entry_t pv; pmap_t pmap; struct rwlock *lock; pt_entry_t *pte, tpte, PG_A, PG_M, PG_RW; pd_entry_t *pde; vm_offset_t va; struct spglist free; int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry: rw_wlock(lock); while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } } va = pv->pv_va; pde = pmap_pde(pmap, va); (void)pmap_demote_pde_locked(pmap, pde, va, &lock); PMAP_UNLOCK(pmap); } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { rw_wunlock(lock); PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pmap_resident_count_dec(pmap, 1); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_all: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); tpte = pte_load_clear(pte); if (tpte & PG_W) pmap->pm_stats.wired_count--; if (tpte & PG_A) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); pmap_unuse_pt(pmap, pv->pv_va, *pde, &free); pmap_invalidate_page(pmap, pv->pv_va); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(lock); pmap_delayed_invl_wait(m); vm_page_free_pages_toq(&free, true); } /* * pmap_protect_pde: do the things to protect a 2mpage in a process */ static boolean_t pmap_protect_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t sva, vm_prot_t prot) { pd_entry_t newpde, oldpde; vm_offset_t eva, va; vm_page_t m; boolean_t anychanged; pt_entry_t PG_G, PG_M, PG_RW; PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT((sva & PDRMASK) == 0, ("pmap_protect_pde: sva is not 2mpage aligned")); anychanged = FALSE; retry: oldpde = newpde = *pde; if ((oldpde & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { eva = sva + NBPDR; for (va = sva, m = PHYS_TO_VM_PAGE(oldpde & PG_PS_FRAME); va < eva; va += PAGE_SIZE, m++) vm_page_dirty(m); } if ((prot & VM_PROT_WRITE) == 0) newpde &= ~(PG_RW | PG_M); if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (newpde != oldpde) { /* * As an optimization to future operations on this PDE, clear * PG_PROMOTED. The impending invalidation will remove any * lingering 4KB page mappings from the TLB. */ if (!atomic_cmpset_long(pde, oldpde, newpde & ~PG_PROMOTED)) goto retry; if ((oldpde & PG_G) != 0) pmap_invalidate_pde_page(kernel_pmap, sva, oldpde); else anychanged = TRUE; } return (anychanged); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t ptpaddr, *pde; pt_entry_t *pte, PG_G, PG_M, PG_RW, PG_V; boolean_t anychanged; KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); if (prot == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & (VM_PROT_WRITE|VM_PROT_EXECUTE)) == (VM_PROT_WRITE|VM_PROT_EXECUTE)) return; PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); anychanged = FALSE; /* * Although this function delays and batches the invalidation * of stale TLB entries, it does not need to call * pmap_delayed_invl_started() and * pmap_delayed_invl_finished(), because it does not * ordinarily destroy mappings. Stale TLB entries from * protection-only changes need only be invalidated before the * pmap lock is released, because protection-only changes do * not destroy PV entries. Even operations that iterate over * a physical page's PV list of mappings, like * pmap_remove_write(), acquire the pmap lock for each * mapping. Consequently, for protection-only changes, the * pmap lock suffices to synchronize both page table and TLB * updates. * * This function only destroys a mapping if pmap_demote_pde() * fails. In that case, stale TLB entries are immediately * invalidated. */ PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); ptpaddr = *pde; /* * Weed out invalid mappings. */ if (ptpaddr == 0) continue; /* * Check for large page. */ if ((ptpaddr & PG_PS) != 0) { /* * Are we protecting the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { /* * The TLB entry for a PG_G mapping is * invalidated by pmap_protect_pde(). */ if (pmap_protect_pde(pmap, pde, sva, prot)) anychanged = TRUE; continue; } else if (!pmap_demote_pde(pmap, pde, sva)) { /* * The large page mapping was destroyed. */ continue; } } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { pt_entry_t obits, pbits; vm_page_t m; retry: obits = pbits = *pte; if ((pbits & PG_V) == 0) continue; if ((prot & VM_PROT_WRITE) == 0) { if ((pbits & (PG_MANAGED | PG_M | PG_RW)) == (PG_MANAGED | PG_M | PG_RW)) { m = PHYS_TO_VM_PAGE(pbits & PG_FRAME); vm_page_dirty(m); } pbits &= ~(PG_RW | PG_M); } if ((prot & VM_PROT_EXECUTE) == 0) pbits |= pg_nx; if (pbits != obits) { if (!atomic_cmpset_long(pte, obits, pbits)) goto retry; if (obits & PG_G) pmap_invalidate_page(pmap, sva); else anychanged = TRUE; } } } if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); } #if VM_NRESERVLEVEL > 0 /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion * to occur, two conditions must be met: (1) the 4KB page mappings must map * aligned, contiguous physical memory and (2) the 4KB page mappings must have * identical characteristics. */ static void pmap_promote_pde(pmap_t pmap, pd_entry_t *pde, vm_offset_t va, struct rwlock **lockp) { pd_entry_t newpde; pt_entry_t *firstpte, oldpte, pa, *pte; pt_entry_t PG_G, PG_A, PG_M, PG_RW, PG_V; vm_page_t mpte; int PG_PTE_CACHE; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PG_PTE_CACHE = pmap_cache_mask(pmap, 0); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Examine the first PTE in the specified PTP. Abort if this PTE is * either invalid, unused, or does not map the first 4KB physical page * within a 2MB page. */ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); setpde: newpde = *firstpte; if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((newpde & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared without * a TLB invalidation. */ if (!atomic_cmpset_long(firstpte, newpde, newpde & ~PG_RW)) goto setpde; newpde &= ~PG_RW; } /* * Examine each of the other PTEs in the specified PTP. Abort if this * PTE maps an unexpected 4KB physical page or does not have identical * characteristics to the first PTE. */ pa = (newpde & (PG_PS_FRAME | PG_A | PG_V)) + NBPDR - PAGE_SIZE; for (pte = firstpte + NPTEPG - 1; pte > firstpte; pte--) { setpte: oldpte = *pte; if ((oldpte & (PG_FRAME | PG_A | PG_V)) != pa) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } if ((oldpte & (PG_M | PG_RW)) == PG_RW) { /* * When PG_M is already clear, PG_RW can be cleared * without a TLB invalidation. */ if (!atomic_cmpset_long(pte, oldpte, oldpte & ~PG_RW)) goto setpte; oldpte &= ~PG_RW; CTR2(KTR_PMAP, "pmap_promote_pde: protect for va %#lx" " in pmap %p", (oldpte & PG_FRAME & PDRMASK) | (va & ~PDRMASK), pmap); } if ((oldpte & PG_PTE_PROMOTE) != (newpde & PG_PTE_PROMOTE)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); return; } pa -= PAGE_SIZE; } /* * Save the page table page in its current state until the PDE * mapping the superpage is demoted by pmap_demote_pde() or * destroyed by pmap_remove_pde(). */ mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); KASSERT(mpte >= vm_page_array && mpte < &vm_page_array[vm_page_array_size], ("pmap_promote_pde: page table page is out of range")); KASSERT(mpte->pindex == pmap_pde_pindex(va), ("pmap_promote_pde: page table page's pindex is wrong")); if (pmap_insert_pt_page(pmap, mpte)) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx in pmap %p", va, pmap); return; } /* * Promote the pv entries. */ if ((newpde & PG_MANAGED) != 0) pmap_pv_promote_pde(pmap, va, newpde & PG_PS_FRAME, lockp); /* * Propagate the PAT index to its proper position. */ newpde = pmap_swap_pat(pmap, newpde); /* * Map the superpage. */ if (workaround_erratum383) pmap_update_pde(pmap, va, pde, PG_PS | newpde); else pde_store(pde, PG_PROMOTED | PG_PS | newpde); atomic_add_long(&pmap_pde_promotions, 1); CTR2(KTR_PMAP, "pmap_promote_pde: success for va %#lx" " in pmap %p", va, pmap); } #endif /* VM_NRESERVLEVEL > 0 */ /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. * * When destroying both a page table and PV entry, this function * performs the TLB invalidation before releasing the PV list * lock, so we do not need pmap_delayed_invl_page() calls here. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind) { struct rwlock *lock; pd_entry_t *pde; pt_entry_t *pte, PG_G, PG_A, PG_M, PG_RW, PG_V; pt_entry_t newpte, origpte; pv_entry_t pv; vm_paddr_t opa, pa; vm_page_t mpte, om; int rv; boolean_t nosleep; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); va = trunc_page(va); KASSERT(va <= VM_MAX_KERNEL_ADDRESS, ("pmap_enter: toobig")); KASSERT(va < UPT_MIN_ADDRESS || va >= UPT_MAX_ADDRESS, ("pmap_enter: invalid to pmap_enter page table pages (va: 0x%lx)", va)); KASSERT((m->oflags & VPO_UNMANAGED) != 0 || va < kmi.clean_sva || va >= kmi.clean_eva, ("pmap_enter: managed mapping within the clean submap")); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); KASSERT((flags & PMAP_ENTER_RESERVED) == 0, ("pmap_enter: flags %u has reserved bits set", flags)); pa = VM_PAGE_TO_PHYS(m); newpte = (pt_entry_t)(pa | PG_A | PG_V); if ((flags & VM_PROT_WRITE) != 0) newpte |= PG_M; if ((prot & VM_PROT_WRITE) != 0) newpte |= PG_RW; KASSERT((newpte & (PG_M | PG_RW)) != PG_M, ("pmap_enter: flags includes VM_PROT_WRITE but prot doesn't")); if ((prot & VM_PROT_EXECUTE) == 0) newpte |= pg_nx; if ((flags & PMAP_ENTER_WIRED) != 0) newpte |= PG_W; if (va < VM_MAXUSER_ADDRESS) newpte |= PG_U; if (pmap == kernel_pmap) newpte |= PG_G; newpte |= pmap_cache_bits(pmap, m->md.pat_mode, psind > 0); /* * Set modified bit gratuitously for writeable mappings if * the page is unmanaged. We do not want to take a fault * to do the dirty bit accounting for these mappings. */ if ((m->oflags & VPO_UNMANAGED) != 0) { if ((newpte & PG_RW) != 0) newpte |= PG_M; } else newpte |= PG_MANAGED; lock = NULL; PMAP_LOCK(pmap); if (psind == 1) { /* Assert the required virtual and physical alignment. */ KASSERT((va & PDRMASK) == 0, ("pmap_enter: va unaligned")); KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); rv = pmap_enter_pde(pmap, va, newpte | PG_PS, flags, m, &lock); goto out; } mpte = NULL; /* * In the case that a page table page is not * resident, we are creating it here. */ retry: pde = pmap_pde(pmap, va); if (pde != NULL && (*pde & PG_V) != 0 && ((*pde & PG_PS) == 0 || pmap_demote_pde_locked(pmap, pde, va, &lock))) { pte = pmap_pde_to_pte(pde, va); if (va < VM_MAXUSER_ADDRESS && mpte == NULL) { mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); mpte->wire_count++; } } else if (va < VM_MAXUSER_ADDRESS) { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = _pmap_allocpte(pmap, pmap_pde_pindex(va), nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { rv = KERN_RESOURCE_SHORTAGE; goto out; } goto retry; } else panic("pmap_enter: invalid page directory va=%#lx", va); origpte = *pte; pv = NULL; /* * Is the specified virtual address already mapped? */ if ((origpte & PG_V) != 0) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((newpte & PG_W) != 0 && (origpte & PG_W) == 0) pmap->pm_stats.wired_count++; else if ((newpte & PG_W) == 0 && (origpte & PG_W) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ opa = origpte & PG_FRAME; if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((origpte & PG_MANAGED) != 0 && (newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); if (((origpte ^ newpte) & ~(PG_M | PG_A)) == 0) goto unchanged; goto validate; } /* * The physical page has changed. Temporarily invalidate * the mapping. This ensures that all threads sharing the * pmap keep a consistent view of the mapping, which is * necessary for the correct handling of COW faults. It * also permits reuse of the old mapping's PV entry, * avoiding an allocation. * * For consistency, handle unmanaged mappings the same way. */ origpte = pte_load_clear(pte); KASSERT((origpte & PG_FRAME) == opa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((origpte & PG_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); /* * The pmap lock is sufficient to synchronize with * concurrent calls to pmap_page_test_mappings() and * pmap_ts_referenced(). */ if ((origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(om); if ((origpte & PG_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pv = pmap_pvh_remove(&om->md, pmap, va); if ((newpte & PG_MANAGED) == 0) free_pv_entry(pmap, pv); if ((om->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&om->md.pv_list) && ((om->flags & PG_FICTITIOUS) != 0 || TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) vm_page_aflag_clear(om, PGA_WRITEABLE); } if ((origpte & PG_A) != 0) pmap_invalidate_page(pmap, va); origpte = 0; } else { /* * Increment the counters. */ if ((newpte & PG_W) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((newpte & PG_MANAGED) != 0) { if (pv == NULL) { pv = get_pv_entry(pmap, &lock); pv->pv_va = va; } CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((newpte & PG_RW) != 0) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the PTE. */ if ((origpte & PG_V) != 0) { validate: origpte = pte_load_store(pte, newpte); KASSERT((origpte & PG_FRAME) == pa, ("pmap_enter: unexpected pa update for %#lx", va)); if ((newpte & PG_M) == 0 && (origpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if ((origpte & PG_MANAGED) != 0) vm_page_dirty(m); /* * Although the PTE may still have PG_RW set, TLB * invalidation may nonetheless be required because * the PTE no longer has PG_M set. */ } else if ((origpte & PG_NX) != 0 || (newpte & PG_NX) == 0) { /* * This PTE change does not require TLB invalidation. */ goto unchanged; } if ((origpte & PG_A) != 0) pmap_invalidate_page(pmap, va); } else pte_store(pte, newpte); unchanged: #if VM_NRESERVLEVEL > 0 /* * If both the page table page and the reservation are fully * populated, then attempt promotion. */ if ((mpte == NULL || mpte->wire_count == NPTEPG) && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) pmap_promote_pde(pmap, pde, va, &lock); #endif rv = KERN_SUCCESS; out: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); return (rv); } /* * Tries to create a read- and/or execute-only 2MB page mapping. Returns true * if successful. Returns false if (1) a page table page cannot be allocated * without sleeping, (2) a mapping already exists at the specified virtual * address, or (3) a PV entry cannot be allocated without reclaiming another * PV entry. */ static bool pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, struct rwlock **lockp) { pd_entry_t newpde; pt_entry_t PG_V; PMAP_LOCK_ASSERT(pmap, MA_OWNED); PG_V = pmap_valid_bit(pmap); newpde = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 1) | PG_PS | PG_V; if ((m->oflags & VPO_UNMANAGED) == 0) newpde |= PG_MANAGED; if ((prot & VM_PROT_EXECUTE) == 0) newpde |= pg_nx; if (va < VM_MAXUSER_ADDRESS) newpde |= PG_U; return (pmap_enter_pde(pmap, va, newpde, PMAP_ENTER_NOSLEEP | PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == KERN_SUCCESS); } /* * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and * a mapping already exists at the specified virtual address. Returns * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. * * The parameter "m" is only used when creating a managed, writeable mapping. */ static int pmap_enter_pde(pmap_t pmap, vm_offset_t va, pd_entry_t newpde, u_int flags, vm_page_t m, struct rwlock **lockp) { struct spglist free; pd_entry_t oldpde, *pde; pt_entry_t PG_G, PG_RW, PG_V; vm_page_t mt, pdpg; PG_G = pmap_global_bit(pmap); PG_RW = pmap_rw_bit(pmap); KASSERT((newpde & (pmap_modified_bit(pmap) | PG_RW)) != PG_RW, ("pmap_enter_pde: newpde is missing PG_M")); PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); pde = &pde[pmap_pde_index(va)]; oldpde = *pde; if ((oldpde & PG_V) != 0) { KASSERT(pdpg->wire_count > 1, ("pmap_enter_pde: pdpg's wire count is too low")); if ((flags & PMAP_ENTER_NOREPLACE) != 0) { pdpg->wire_count--; CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_FAILURE); } /* Break the existing mapping(s). */ SLIST_INIT(&free); if ((oldpde & PG_PS) != 0) { /* * The reference to the PD page that was acquired by * pmap_allocpde() ensures that it won't be freed. * However, if the PDE resulted from a promotion, then * a reserved PT page could be freed. */ (void)pmap_remove_pde(pmap, pde, va, &free, lockp); if ((oldpde & PG_G) == 0) pmap_invalidate_pde_page(pmap, va, oldpde); } else { pmap_delayed_invl_started(); if (pmap_remove_ptes(pmap, va, va + NBPDR, pde, &free, lockp)) pmap_invalidate_all(pmap); pmap_delayed_invl_finished(); } vm_page_free_pages_toq(&free, true); if (va >= VM_MAXUSER_ADDRESS) { mt = PHYS_TO_VM_PAGE(*pde & PG_FRAME); if (pmap_insert_pt_page(pmap, mt)) { /* * XXX Currently, this can't happen because * we do not perform pmap_enter(psind == 1) * on the kernel pmap. */ panic("pmap_enter_pde: trie insert failed"); } } else KASSERT(*pde == 0, ("pmap_enter_pde: non-zero pde %p", pde)); } if ((newpde & PG_MANAGED) != 0) { /* * Abort this mapping if its PV entry could not be created. */ if (!pmap_pv_insert_pde(pmap, va, newpde, flags, lockp)) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, pdpg, &free)) { /* * Although "va" is not mapped, paging- * structure caches could nonetheless have * entries that refer to the freed page table * pages. Invalidate those entries. */ pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, true); } CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" " in pmap %p", va, pmap); return (KERN_RESOURCE_SHORTAGE); } if ((newpde & PG_RW) != 0) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_aflag_set(mt, PGA_WRITEABLE); } } /* * Increment counters. */ if ((newpde & PG_W) != 0) pmap->pm_stats.wired_count += NBPDR / PAGE_SIZE; pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); /* * Map the superpage. (This is not a promoted mapping; there will not * be any lingering 4KB page mappings in the TLB.) */ pde_store(pde, newpde); atomic_add_long(&pmap_pde_mappings, 1); CTR2(KTR_PMAP, "pmap_enter_pde: success for va %#lx" " in pmap %p", va, pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; pt_entry_t *pte, PG_V; vm_paddr_t pa; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t ptepindex; pd_entry_t *ptepa; /* * Calculate pagetable page index */ ptepindex = pmap_pde_pindex(va); if (mpte && (mpte->pindex == ptepindex)) { mpte->wire_count++; } else { /* * Get the page directory entry */ ptepa = pmap_pde(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (ptepa && (*ptepa & PG_V) != 0) { if (*ptepa & PG_PS) return (NULL); mpte = PHYS_TO_VM_PAGE(*ptepa & PG_FRAME); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_allocpte(pmap, ptepindex, NULL); if (mpte == NULL) return (mpte); } } pte = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); pte = &pte[pmap_pte_index(va)]; } else { mpte = NULL; pte = vtopte(va); } if (*pte) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_ptp(pmap, va, mpte, &free)) { /* * Although "va" is not mapped, paging- * structure caches could nonetheless have * entries that refer to the freed page table * pages. Invalidate those entries. */ pmap_invalidate_page(pmap, va); vm_page_free_pages_toq(&free, true); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m) | pmap_cache_bits(pmap, m->md.pat_mode, 0); if ((prot & VM_PROT_EXECUTE) == 0) pa |= pg_nx; /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) != 0) pte_store(pte, pa | PG_V | PG_U); else pte_store(pte, pa | PG_V | PG_U | PG_MANAGED); return (mpte); } /* * Make a temporary mapping for a physical address. This is only intended * to be used for panic dumps. */ void * pmap_kenter_temporary(vm_paddr_t pa, int i) { vm_offset_t va; va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE); pmap_kenter(va, pa); invlpg(va); return ((void *)crashdumpmap); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { pd_entry_t *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa, ptepa; vm_page_t p, pdpg; int pat_mode; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); if ((addr & (NBPDR - 1)) == 0 && (size & (NBPDR - 1)) == 0) { if (!pmap_ps_enabled(pmap)) return; if (!vm_object_populate(object, pindex, pindex + atop(size))) return; p = vm_page_lookup(object, pindex); KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); pat_mode = p->md.pat_mode; /* * Abort the mapping if the first page is not physically * aligned to a 2MB page boundary. */ ptepa = VM_PAGE_TO_PHYS(p); if (ptepa & (NBPDR - 1)) return; /* * Skip the first page. Abort the mapping if the rest of * the pages are not physically contiguous or have differing * memory attributes. */ p = TAILQ_NEXT(p, listq); for (pa = ptepa + PAGE_SIZE; pa < ptepa + size; pa += PAGE_SIZE) { KASSERT(p->valid == VM_PAGE_BITS_ALL, ("pmap_object_init_pt: invalid page %p", p)); if (pa != VM_PAGE_TO_PHYS(p) || pat_mode != p->md.pat_mode) return; p = TAILQ_NEXT(p, listq); } /* * Map using 2MB pages. Since "ptepa" is 2M aligned and * "size" is a multiple of 2M, adding the PAT setting to "pa" * will not affect the termination of this loop. */ PMAP_LOCK(pmap); for (pa = ptepa | pmap_cache_bits(pmap, pat_mode, 1); pa < ptepa + size; pa += NBPDR) { pdpg = pmap_allocpde(pmap, addr, NULL); if (pdpg == NULL) { /* * The creation of mappings below is only an * optimization. If a page directory page * cannot be allocated without blocking, * continue on to the next mapping rather than * blocking. */ addr += NBPDR; continue; } pde = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pdpg)); pde = &pde[pmap_pde_index(addr)]; if ((*pde & PG_V) == 0) { pde_store(pde, pa | PG_PS | PG_M | PG_A | PG_U | PG_RW | PG_V); pmap_resident_count_inc(pmap, NBPDR / PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); } else { /* Continue on if the PDE is already valid. */ pdpg->wire_count--; KASSERT(pdpg->wire_count > 0, ("pmap_object_init_pt: missing reference " "to page directory page, va: 0x%lx", addr)); } addr += NBPDR; } PMAP_UNLOCK(pmap); } } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware * feature, so there is no need to invalidate any TLB entries. * Since pmap_demote_pde() for the wired entry must never fail, * pmap_delayed_invl_started()/finished() calls around the * function are not needed. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte, PG_V; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); if ((*pde & PG_V) == 0) continue; if ((*pde & PG_PS) != 0) { if ((*pde & PG_W) == 0) panic("pmap_unwire: pde %#jx is missing PG_W", (uintmax_t)*pde); /* * Are we unwiring the entire large page? If not, * demote the mapping and fall through. */ if (sva + NBPDR == va_next && eva >= va_next) { atomic_clear_long(pde, PG_W); pmap->pm_stats.wired_count -= NBPDR / PAGE_SIZE; continue; } else if (!pmap_demote_pde(pmap, pde, sva)) panic("pmap_unwire: demotion failed"); } if (va_next > eva) va_next = eva; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if ((*pte & PG_V) == 0) continue; if ((*pte & PG_W) == 0) panic("pmap_unwire: pte %#jx is missing PG_W", (uintmax_t)*pte); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(pte, PG_W); pmap->pm_stats.wired_count--; } } PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { struct rwlock *lock; struct spglist free; vm_offset_t addr; vm_offset_t end_addr = src_addr + len; vm_offset_t va_next; vm_page_t dst_pdpg, dstmpte, srcmpte; pt_entry_t PG_A, PG_M, PG_V; if (dst_addr != src_addr) return; if (dst_pmap->pm_type != src_pmap->pm_type) return; /* * EPT page table entries that require emulation of A/D bits are * sensitive to clearing the PG_A bit (aka EPT_PG_READ). Although * we clear PG_M (aka EPT_PG_WRITE) concomitantly, the PG_U bit * (aka EPT_PG_EXECUTE) could still be set. Since some EPT * implementations flag an EPT misconfiguration for exec-only * mappings we skip this function entirely for emulated pmaps. */ if (pmap_emulate_ad_bits(dst_pmap)) return; lock = NULL; if (dst_pmap < src_pmap) { PMAP_LOCK(dst_pmap); PMAP_LOCK(src_pmap); } else { PMAP_LOCK(src_pmap); PMAP_LOCK(dst_pmap); } PG_A = pmap_accessed_bit(dst_pmap); PG_M = pmap_modified_bit(dst_pmap); PG_V = pmap_valid_bit(dst_pmap); for (addr = src_addr; addr < end_addr; addr = va_next) { pt_entry_t *src_pte, *dst_pte; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t srcptepaddr, *pde; KASSERT(addr < UPT_MIN_ADDRESS, ("pmap_copy: invalid to pmap_copy page tables")); pml4e = pmap_pml4e(src_pmap, addr); if ((*pml4e & PG_V) == 0) { va_next = (addr + NBPML4) & ~PML4MASK; if (va_next < addr) va_next = end_addr; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, addr); if ((*pdpe & PG_V) == 0) { va_next = (addr + NBPDP) & ~PDPMASK; if (va_next < addr) va_next = end_addr; continue; } va_next = (addr + NBPDR) & ~PDRMASK; if (va_next < addr) va_next = end_addr; pde = pmap_pdpe_to_pde(pdpe, addr); srcptepaddr = *pde; if (srcptepaddr == 0) continue; if (srcptepaddr & PG_PS) { if ((addr & PDRMASK) != 0 || addr + NBPDR > end_addr) continue; dst_pdpg = pmap_allocpde(dst_pmap, addr, NULL); if (dst_pdpg == NULL) break; pde = (pd_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dst_pdpg)); pde = &pde[pmap_pde_index(addr)]; if (*pde == 0 && ((srcptepaddr & PG_MANAGED) == 0 || pmap_pv_insert_pde(dst_pmap, addr, srcptepaddr, PMAP_ENTER_NORECLAIM, &lock))) { *pde = srcptepaddr & ~PG_W; pmap_resident_count_inc(dst_pmap, NBPDR / PAGE_SIZE); atomic_add_long(&pmap_pde_mappings, 1); } else dst_pdpg->wire_count--; continue; } srcptepaddr &= PG_FRAME; srcmpte = PHYS_TO_VM_PAGE(srcptepaddr); KASSERT(srcmpte->wire_count > 0, ("pmap_copy: source page table page is unused")); if (va_next > end_addr) va_next = end_addr; src_pte = (pt_entry_t *)PHYS_TO_DMAP(srcptepaddr); src_pte = &src_pte[pmap_pte_index(addr)]; dstmpte = NULL; while (addr < va_next) { pt_entry_t ptetemp; ptetemp = *src_pte; /* * we only virtual copy managed pages */ if ((ptetemp & PG_MANAGED) != 0) { if (dstmpte != NULL && dstmpte->pindex == pmap_pde_pindex(addr)) dstmpte->wire_count++; else if ((dstmpte = pmap_allocpte(dst_pmap, addr, NULL)) == NULL) goto out; dst_pte = (pt_entry_t *) PHYS_TO_DMAP(VM_PAGE_TO_PHYS(dstmpte)); dst_pte = &dst_pte[pmap_pte_index(addr)]; if (*dst_pte == 0 && pmap_try_insert_pv_entry(dst_pmap, addr, PHYS_TO_VM_PAGE(ptetemp & PG_FRAME), &lock)) { /* * Clear the wired, modified, and * accessed (referenced) bits * during the copy. */ *dst_pte = ptetemp & ~(PG_W | PG_M | PG_A); pmap_resident_count_inc(dst_pmap, 1); } else { SLIST_INIT(&free); if (pmap_unwire_ptp(dst_pmap, addr, dstmpte, &free)) { /* * Although "addr" is not * mapped, paging-structure * caches could nonetheless * have entries that refer to * the freed page table pages. * Invalidate those entries. */ pmap_invalidate_page(dst_pmap, addr); vm_page_free_pages_toq(&free, true); } goto out; } if (dstmpte->wire_count >= srcmpte->wire_count) break; } addr += PAGE_SIZE; src_pte++; } } out: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(src_pmap); PMAP_UNLOCK(dst_pmap); } /* * Zero the specified hardware page. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * Zero an an area within a single hardware page. off and size must not * cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * Copy 1 specified hardware page to another. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t pages[2]; vm_offset_t vaddr[2], a_pg_offset, b_pg_offset; int cnt; boolean_t mapped; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; pages[0] = ma[a_offset >> PAGE_SHIFT]; b_pg_offset = b_offset & PAGE_MASK; pages[1] = mb[b_offset >> PAGE_SHIFT]; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); mapped = pmap_map_io_transient(pages, vaddr, 2, FALSE); a_cp = (char *)vaddr[0] + a_pg_offset; b_cp = (char *)vaddr[1] + b_pg_offset; bcopy(a_cp, b_cp, cnt); if (__predict_false(mapped)) pmap_unmap_io_transient(pages, vaddr, 2, FALSE); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct md_page *pvh; struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } } rw_runlock(lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; struct md_page *pvh; pmap_t pmap; pt_entry_t *pte; pv_entry_t pv; int count, md_gen, pvh_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pde(pmap, pv->pv_va); if ((*pte & PG_W) != 0) count++; PMAP_UNLOCK(pmap); } } rw_runlock(lock); return (count); } /* * Returns TRUE if the given page is mapped individually or as part of * a 2mpage. Otherwise, returns FALSE. */ boolean_t pmap_page_is_mapped(vm_page_t m) { struct rwlock *lock; boolean_t rv; if ((m->oflags & VPO_UNMANAGED) != 0) return (FALSE); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); rv = !TAILQ_EMPTY(&m->md.pv_list) || ((m->flags & PG_FICTITIOUS) == 0 && !TAILQ_EMPTY(&pa_to_pvh(VM_PAGE_TO_PHYS(m))->pv_list)); rw_runlock(lock); return (rv); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. * * Although this function destroys all of the pmap's managed, * non-wired mappings, it can delay and batch the invalidation of TLB * entries without calling pmap_delayed_invl_started() and * pmap_delayed_invl_finished(). Because the pmap is not active on * any other processor, none of these TLB entries will ever be used * before their eventual invalidation. Consequently, there is no need * for either pmap_remove_all() or pmap_remove_write() to wait for * that eventual TLB invalidation. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t ptepde; pt_entry_t *pte, tpte; pt_entry_t PG_M, PG_RW, PG_V; struct spglist free; vm_page_t m, mpte, mt; pv_entry_t pv; struct md_page *pvh; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; boolean_t superpage; vm_paddr_t pa; /* * Assert that the given pmap is only active on the current * CPU. Unfortunately, we cannot block another CPU from * activating the pmap while this function is executing. */ KASSERT(pmap == PCPU_GET(curpmap), ("non-current pmap %p", pmap)); #ifdef INVARIANTS { cpuset_t other_cpus; other_cpus = all_cpus; critical_enter(); CPU_CLR(PCPU_GET(cpuid), &other_cpus); CPU_AND(&other_cpus, &pmap->pm_active); critical_exit(); KASSERT(CPU_EMPTY(&other_cpus), ("pmap active %p", pmap)); } #endif lock = NULL; PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); SLIST_INIT(&free); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = bsfq(inuse); bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; pte = pmap_pdpe(pmap, pv->pv_va); ptepde = *pte; pte = pmap_pdpe_to_pde(pte, pv->pv_va); tpte = *pte; if ((tpte & (PG_PS | PG_V)) == PG_V) { superpage = FALSE; ptepde = tpte; pte = (pt_entry_t *)PHYS_TO_DMAP(tpte & PG_FRAME); pte = &pte[pmap_pte_index(pv->pv_va)]; tpte = *pte; } else { /* * Keep track whether 'tpte' is a * superpage explicitly instead of * relying on PG_PS being set. * * This is because PG_PS is numerically * identical to PG_PTE_PAT and thus a * regular page could be mistaken for * a superpage. */ superpage = TRUE; } if ((tpte & PG_V) == 0) { panic("bad pte va %lx pte %lx", pv->pv_va, tpte); } /* * We cannot remove wired pages from a process' mapping at this time */ if (tpte & PG_W) { allfree = 0; continue; } if (superpage) pa = tpte & PG_PS_FRAME; else pa = tpte & PG_FRAME; m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tpte)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad tpte %#jx", (uintmax_t)tpte)); pte_clear(pte); /* * Update the vm_page_t clean/reference bits. */ if ((tpte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (superpage) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) vm_page_dirty(mt); } else vm_page_dirty(m); } CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; if (superpage) { pmap_resident_count_dec(pmap, NBPDR / PAGE_SIZE); pvh = pa_to_pvh(tpte & PG_PS_FRAME); TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; if (TAILQ_EMPTY(&pvh->pv_list)) { for (mt = m; mt < &m[NBPDR / PAGE_SIZE]; mt++) if ((mt->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&mt->md.pv_list)) vm_page_aflag_clear(mt, PGA_WRITEABLE); } mpte = pmap_remove_pt_page(pmap, pv->pv_va); if (mpte != NULL) { pmap_resident_count_dec(pmap, 1); KASSERT(mpte->wire_count == NPTEPG, ("pmap_remove_pages: pte page wire count error")); mpte->wire_count = 0; pmap_add_delayed_free_list(mpte, &free, FALSE); } } else { pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if ((m->aflags & PGA_WRITEABLE) != 0 && TAILQ_EMPTY(&m->md.pv_list) && (m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); if (TAILQ_EMPTY(&pvh->pv_list)) vm_page_aflag_clear(m, PGA_WRITEABLE); } } pmap_unuse_pt(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } if (lock != NULL) rw_wunlock(lock); pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); vm_page_free_pages_toq(&free, true); } static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; struct md_page *pvh; pt_entry_t *pte, mask; pt_entry_t PG_A, PG_M, PG_RW, PG_V; pmap_t pmap; int md_gen, pvh_gen; boolean_t rv; rv = FALSE; lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pte(pmap, pv->pv_va); mask = 0; if (modified) { PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); mask |= PG_RW | PG_M; } if (accessed) { PG_A = pmap_accessed_bit(pmap); PG_V = pmap_valid_bit(pmap); mask |= PG_V | PG_A; } rv = (*pte & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } if ((m->flags & PG_FICTITIOUS) == 0) { pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen || pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } pte = pmap_pde(pmap, pv->pv_va); mask = 0; if (modified) { PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); mask |= PG_RW | PG_M; } if (accessed) { PG_A = pmap_accessed_bit(pmap); PG_V = pmap_valid_bit(pmap); mask |= PG_V | PG_A; } rv = (*pte & mask) == mask; PMAP_UNLOCK(pmap); if (rv) goto out; } } out: rw_runlock(lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pd_entry_t *pde; pt_entry_t *pte, PG_V; boolean_t rv; PG_V = pmap_valid_bit(pmap); rv = FALSE; PMAP_LOCK(pmap); pde = pmap_pde(pmap, addr); if (pde != NULL && (*pde & (PG_PS | PG_V)) == PG_V) { pte = pmap_pde_to_pte(pde, addr); rv = (*pte & PG_V) == 0; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { struct md_page *pvh; pmap_t pmap; struct rwlock *lock; pv_entry_t next_pv, pv; pd_entry_t *pde; pt_entry_t oldpte, *pte, PG_M, PG_RW; vm_offset_t va; int pvh_gen, md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; lock = VM_PAGE_TO_PV_LIST_LOCK(m); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); if ((*pde & PG_RW) != 0) (void)pmap_demote_pde_locked(pmap, pde, va, &lock); KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_remove_write: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); retry: oldpte = *pte; if (oldpte & PG_RW) { if (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_RW | PG_M))) goto retry; if ((oldpte & PG_M) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); pmap_delayed_invl_wait(m); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { if (!pmap_emulate_ad_bits(pmap)) return (TRUE); KASSERT(pmap->pm_type == PT_EPT, ("invalid pm_type %d", pmap->pm_type)); /* * XWR = 010 or 110 will cause an unconditional EPT misconfiguration * so we don't let the referenced (aka EPT_PG_READ) bit to be cleared * if the EPT_PG_WRITE bit is set. */ if ((pte & EPT_PG_WRITE) != 0) return (FALSE); /* * XWR = 100 is allowed only if the PMAP_SUPPORTS_EXEC_ONLY is set. */ if ((pte & EPT_PG_EXECUTE) == 0 || ((pmap->pm_flags & PMAP_SUPPORTS_EXEC_ONLY) != 0)) return (TRUE); else return (FALSE); } /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * As an optimization, update the page's dirty field if a modified bit is * found while counting reference bits. This opportunistic update can be * performed at low cost and can eliminate the need for some future calls * to pmap_is_modified(). However, since this function stops after * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some * dirty pages. Those dirty pages will only be detected by a future call * to pmap_is_modified(). * * A DI block is not needed within this function, because * invalidations are performed before the PV list lock is * released. */ int pmap_ts_referenced(vm_page_t m) { struct md_page *pvh; pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW; vm_offset_t va; vm_paddr_t pa; int cleared, md_gen, not_cleared, pvh_gen; struct spglist free; boolean_t demoted; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) goto small_mappings; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, pv->pv_va); oldpde = *pde; if ((oldpde & (PG_M | PG_RW)) == (PG_M | PG_RW)) { /* * Although "oldpde" is mapping a 2MB page, because * this function is called at a 4KB page granularity, * we only update the 4KB page under test. */ vm_page_dirty(m); } if ((oldpde & PG_A) != 0) { /* * Since this reference bit is shared by 512 4KB * pages, it should not be cleared every time it is * tested. Apply a simple "hash" function on the * physical page number, the virtual superpage number, * and the pmap address to select one 4KB page out of * the 512 on which testing the reference bit will * result in clearing that reference bit. This * function is designed to avoid the selection of the * same 4KB page for every 2MB page mapping. * * On demotion, a mapping that hasn't been referenced * is simply destroyed. To avoid the possibility of a * subsequent page fault on a demoted wired mapping, * always leave its reference bit set. Moreover, * since the superpage is wired, the current state of * its reference bit won't affect page replacement. */ if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> PDRSHIFT) ^ (uintptr_t)pmap) & (NPTEPG - 1)) == 0 && (oldpde & PG_W) == 0) { if (safe_to_clear_referenced(pmap, oldpde)) { atomic_clear_long(pde, PG_A); pmap_invalidate_page(pmap, pv->pv_va); demoted = FALSE; } else if (pmap_demote_pde_locked(pmap, pde, pv->pv_va, &lock)) { /* * Remove the mapping to a single page * so that a subsequent access may * repromote. Since the underlying * page table page is fully populated, * this removal never frees a page * table page. */ demoted = TRUE; va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); pmap_remove_pte(pmap, pte, va, *pde, NULL, &lock); pmap_invalidate_page(pmap, va); } else demoted = TRUE; if (demoted) { /* * The superpage mapping was removed * entirely and therefore 'pv' is no * longer valid. */ if (pvf == pv) pvf = NULL; pv = NULL; } cleared++; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; } if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) goto out; } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); small_mappings: if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_ts_referenced: found a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) vm_page_dirty(m); if ((*pte & PG_A) != 0) { if (safe_to_clear_referenced(pmap, *pte)) { atomic_clear_long(pte, PG_A); pmap_invalidate_page(pmap, pv->pv_va); cleared++; } else if ((*pte & PG_W) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_remove_pte(pmap, pte, pv->pv_va, *pde, &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); vm_page_free_pages_toq(&free, true); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { struct rwlock *lock; pml4_entry_t *pml4e; pdp_entry_t *pdpe; pd_entry_t oldpde, *pde; pt_entry_t *pte, PG_A, PG_G, PG_M, PG_RW, PG_V; vm_offset_t va, va_next; vm_page_t m; boolean_t anychanged; if (advice != MADV_DONTNEED && advice != MADV_FREE) return; /* * A/D bit emulation requires an alternate code path when clearing * the modified and accessed bits below. Since this function is * advisory in nature we skip it entirely for pmaps that require * A/D bit emulation. */ if (pmap_emulate_ad_bits(pmap)) return; PG_A = pmap_accessed_bit(pmap); PG_G = pmap_global_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); anychanged = FALSE; pmap_delayed_invl_started(); PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { pml4e = pmap_pml4e(pmap, sva); if ((*pml4e & PG_V) == 0) { va_next = (sva + NBPML4) & ~PML4MASK; if (va_next < sva) va_next = eva; continue; } pdpe = pmap_pml4e_to_pdpe(pml4e, sva); if ((*pdpe & PG_V) == 0) { va_next = (sva + NBPDP) & ~PDPMASK; if (va_next < sva) va_next = eva; continue; } va_next = (sva + NBPDR) & ~PDRMASK; if (va_next < sva) va_next = eva; pde = pmap_pdpe_to_pde(pdpe, sva); oldpde = *pde; if ((oldpde & PG_V) == 0) continue; else if ((oldpde & PG_PS) != 0) { if ((oldpde & PG_MANAGED) == 0) continue; lock = NULL; if (!pmap_demote_pde_locked(pmap, pde, sva, &lock)) { if (lock != NULL) rw_wunlock(lock); /* * The large page mapping was destroyed. */ continue; } /* * Unless the page mappings are wired, remove the * mapping to a single page so that a subsequent * access may repromote. Since the underlying page * table page is fully populated, this removal never * frees a page table page. */ if ((oldpde & PG_W) == 0) { pte = pmap_pde_to_pte(pde, sva); KASSERT((*pte & PG_V) != 0, ("pmap_advise: invalid PTE")); pmap_remove_pte(pmap, pte, sva, *pde, NULL, &lock); anychanged = TRUE; } if (lock != NULL) rw_wunlock(lock); } if (va_next > eva) va_next = eva; va = va_next; for (pte = pmap_pde_to_pte(pde, sva); sva != va_next; pte++, sva += PAGE_SIZE) { if ((*pte & (PG_MANAGED | PG_V)) != (PG_MANAGED | PG_V)) goto maybe_invlrng; else if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { if (advice == MADV_DONTNEED) { /* * Future calls to pmap_is_modified() * can be avoided by making the page * dirty now. */ m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); vm_page_dirty(m); } atomic_clear_long(pte, PG_M | PG_A); } else if ((*pte & PG_A) != 0) atomic_clear_long(pte, PG_A); else goto maybe_invlrng; if ((*pte & PG_G) != 0) { if (va == va_next) va = sva; } else anychanged = TRUE; continue; maybe_invlrng: if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (anychanged) pmap_invalidate_all(pmap); PMAP_UNLOCK(pmap); pmap_delayed_invl_finished(); } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { struct md_page *pvh; pmap_t pmap; pv_entry_t next_pv, pv; pd_entry_t oldpde, *pde; pt_entry_t oldpte, *pte, PG_M, PG_RW, PG_V; struct rwlock *lock; vm_offset_t va; int md_gen, pvh_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(VM_PAGE_TO_PHYS(m)); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_wlock(lock); restart: TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); va = pv->pv_va; pde = pmap_pde(pmap, va); oldpde = *pde; if ((oldpde & PG_RW) != 0) { if (pmap_demote_pde_locked(pmap, pde, va, &lock)) { if ((oldpde & PG_W) == 0) { /* * Write protect the mapping to a * single page so that a subsequent * write access may repromote. */ va += VM_PAGE_TO_PHYS(m) - (oldpde & PG_PS_FRAME); pte = pmap_pde_to_pte(pde, va); oldpte = *pte; if ((oldpte & PG_V) != 0) { while (!atomic_cmpset_long(pte, oldpte, oldpte & ~(PG_M | PG_RW))) oldpte = *pte; vm_page_dirty(m); pmap_invalidate_page(pmap, va); } } } } PMAP_UNLOCK(pmap); } TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; pvh_gen = pvh->pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } PG_M = pmap_modified_bit(pmap); PG_RW = pmap_rw_bit(pmap); pde = pmap_pde(pmap, pv->pv_va); KASSERT((*pde & PG_PS) == 0, ("pmap_clear_modify: found" " a 2mpage in page %p's pv list", m)); pte = pmap_pde_to_pte(pde, pv->pv_va); if ((*pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) { atomic_clear_long(pte, PG_M); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); } /* * Miscellaneous support routines follow */ /* Adjust the cache mode for a 4KB page mapped via a PTE. */ static __inline void pmap_pte_attr(pt_entry_t *pte, int cache_bits, int mask) { u_int opte, npte; /* * The cache mode bits are all in the low 32-bits of the * PTE, so we can just spin on updating the low 32-bits. */ do { opte = *(u_int *)pte; npte = opte & ~mask; npte |= cache_bits; } while (npte != opte && !atomic_cmpset_int((u_int *)pte, opte, npte)); } /* Adjust the cache mode for a 2MB page mapped via a PDE. */ static __inline void pmap_pde_attr(pd_entry_t *pde, int cache_bits, int mask) { u_int opde, npde; /* * The cache mode bits are all in the low 32-bits of the * PDE, so we can just spin on updating the low 32-bits. */ do { opde = *(u_int *)pde; npde = opde & ~mask; npde |= cache_bits; } while (npde != opde && !atomic_cmpset_int((u_int *)pde, opde, npde)); } /* * Map a set of physical memory pages into the kernel virtual * address space. Return a pointer to where it is mapped. This * routine is intended to be used for mapping device memory, * NOT real memory. */ void * pmap_mapdev_attr(vm_paddr_t pa, vm_size_t size, int mode) { struct pmap_preinit_mapping *ppim; vm_offset_t va, offset; vm_size_t tmpsize; int i; offset = pa & PAGE_MASK; size = round_page(offset + size); pa = trunc_page(pa); if (!pmap_initialized) { va = 0; for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == 0) { ppim->pa = pa; ppim->sz = size; ppim->mode = mode; ppim->va = virtual_avail; virtual_avail += size; va = ppim->va; break; } } if (va == 0) panic("%s: too many preinit mappings", __func__); } else { /* * If we have a preinit mapping, re-use it. */ for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->pa == pa && ppim->sz == size && ppim->mode == mode) return ((void *)(ppim->va + offset)); } /* * If the specified range of physical addresses fits within * the direct map window, use the direct map. */ if (pa < dmaplimit && pa + size <= dmaplimit) { va = PHYS_TO_DMAP(pa); if (!pmap_change_attr(va, size, mode)) return ((void *)(va + offset)); } va = kva_alloc(size); if (va == 0) panic("%s: Couldn't allocate KVA", __func__); } for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) pmap_kenter_attr(va + tmpsize, pa + tmpsize, mode); pmap_invalidate_range(kernel_pmap, va, va + tmpsize); pmap_invalidate_cache_range(va, va + tmpsize); return ((void *)(va + offset)); } void * pmap_mapdev(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_UNCACHEABLE)); } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return (pmap_mapdev_attr(pa, size, PAT_WRITE_BACK)); } void pmap_unmapdev(vm_offset_t va, vm_size_t size) { struct pmap_preinit_mapping *ppim; vm_offset_t offset; int i; /* If we gave a direct map region in pmap_mapdev, do nothing */ if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) return; offset = va & PAGE_MASK; size = round_page(offset + size); va = trunc_page(va); for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { ppim = pmap_preinit_mapping + i; if (ppim->va == va && ppim->sz == size) { if (pmap_initialized) return; ppim->pa = 0; ppim->va = 0; ppim->sz = 0; ppim->mode = 0; if (va + size == virtual_avail) virtual_avail = va; return; } } if (pmap_initialized) kva_free(va, size); } /* * Tries to demote a 1GB page mapping. */ static boolean_t pmap_demote_pdpe(pmap_t pmap, pdp_entry_t *pdpe, vm_offset_t va) { pdp_entry_t newpdpe, oldpdpe; pd_entry_t *firstpde, newpde, *pde; pt_entry_t PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pdpgpa; vm_page_t pdpg; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); oldpdpe = *pdpe; KASSERT((oldpdpe & (PG_PS | PG_V)) == (PG_PS | PG_V), ("pmap_demote_pdpe: oldpdpe is missing PG_PS and/or PG_V")); if ((pdpg = vm_page_alloc(NULL, va >> PDPSHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { CTR2(KTR_PMAP, "pmap_demote_pdpe: failure for va %#lx" " in pmap %p", va, pmap); return (FALSE); } pdpgpa = VM_PAGE_TO_PHYS(pdpg); firstpde = (pd_entry_t *)PHYS_TO_DMAP(pdpgpa); newpdpe = pdpgpa | PG_M | PG_A | (oldpdpe & PG_U) | PG_RW | PG_V; KASSERT((oldpdpe & PG_A) != 0, ("pmap_demote_pdpe: oldpdpe is missing PG_A")); KASSERT((oldpdpe & (PG_M | PG_RW)) != PG_RW, ("pmap_demote_pdpe: oldpdpe is missing PG_M")); newpde = oldpdpe; /* * Initialize the page directory page. */ for (pde = firstpde; pde < firstpde + NPDEPG; pde++) { *pde = newpde; newpde += NBPDR; } /* * Demote the mapping. */ *pdpe = newpdpe; /* * Invalidate a stale recursive mapping of the page directory page. */ pmap_invalidate_page(pmap, (vm_offset_t)vtopde(va)); pmap_pdpe_demotions++; CTR2(KTR_PMAP, "pmap_demote_pdpe: success for va %#lx" " in pmap %p", va, pmap); return (TRUE); } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pat_mode = ma; /* * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, m->md.pat_mode)) panic("memory attribute change on the direct map failed"); } /* * Changes the specified virtual address range's memory type to that given by * the parameter "mode". The specified virtual address range must be * completely contained within either the direct map or the kernel map. If * the virtual address range is contained within the kernel map, then the * memory type for each of the corresponding ranges of the direct map is also * changed. (The corresponding ranges of the direct map are those ranges that * map the same physical pages as the specified virtual address range.) These * changes to the direct map are necessary because Intel describes the * behavior of their processors as "undefined" if two or more mappings to the * same physical page have different memory types. * * Returns zero if the change completed successfully, and either EINVAL or * ENOMEM if the change failed. Specifically, EINVAL is returned if some part * of the virtual address range was not mapped, and ENOMEM is returned if * there was insufficient memory available to complete the change. In the * latter case, the memory type may have been changed on some part of the * virtual address range or the direct map. */ int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) { int error; PMAP_LOCK(kernel_pmap); error = pmap_change_attr_locked(va, size, mode); PMAP_UNLOCK(kernel_pmap); return (error); } static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) { vm_offset_t base, offset, tmpva; vm_paddr_t pa_start, pa_end, pa_end1; pdp_entry_t *pdpe; pd_entry_t *pde; pt_entry_t *pte; int cache_bits_pte, cache_bits_pde, error; boolean_t changed; PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); base = trunc_page(va); offset = va & PAGE_MASK; size = round_page(offset + size); /* * Only supported on kernel virtual addresses, including the direct * map but excluding the recursive map. */ if (base < DMAP_MIN_ADDRESS) return (EINVAL); cache_bits_pde = pmap_cache_bits(kernel_pmap, mode, 1); cache_bits_pte = pmap_cache_bits(kernel_pmap, mode, 0); changed = FALSE; /* * Pages that aren't mapped aren't supported. Also break down 2MB pages * into 4KB pages if required. */ for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (pdpe == NULL || *pdpe == 0) return (EINVAL); if (*pdpe & PG_PS) { /* * If the current 1GB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 1GB page frame. */ if ((*pdpe & X86_PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } /* * If the current offset aligns with a 1GB page frame * and there is at least 1GB left within the range, then * we need not break down this page into 2MB pages. */ if ((tmpva & PDPMASK) == 0 && tmpva + PDPMASK < base + size) { tmpva += NBPDP; continue; } if (!pmap_demote_pdpe(kernel_pmap, pdpe, tmpva)) return (ENOMEM); } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde == 0) return (EINVAL); if (*pde & PG_PS) { /* * If the current 2MB page already has the required * memory type, then we need not demote this page. Just * increment tmpva to the next 2MB page frame. */ if ((*pde & X86_PG_PDE_CACHE) == cache_bits_pde) { tmpva = trunc_2mpage(tmpva) + NBPDR; continue; } /* * If the current offset aligns with a 2MB page frame * and there is at least 2MB left within the range, then * we need not break down this page into 4KB pages. */ if ((tmpva & PDRMASK) == 0 && tmpva + PDRMASK < base + size) { tmpva += NBPDR; continue; } if (!pmap_demote_pde(kernel_pmap, pde, tmpva)) return (ENOMEM); } pte = pmap_pde_to_pte(pde, tmpva); if (*pte == 0) return (EINVAL); tmpva += PAGE_SIZE; } error = 0; /* * Ok, all the pages exist, so run through them updating their * cache mode if required. */ pa_start = pa_end = 0; for (tmpva = base; tmpva < base + size; ) { pdpe = pmap_pdpe(kernel_pmap, tmpva); if (*pdpe & PG_PS) { if ((*pdpe & X86_PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pdpe, cache_bits_pde, X86_PG_PDE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pdpe & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } else if (pa_end == (*pdpe & PG_PS_FRAME)) pa_end += NBPDP; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pdpe & PG_PS_FRAME; pa_end = pa_start + NBPDP; } } tmpva = trunc_1gpage(tmpva) + NBPDP; continue; } pde = pmap_pdpe_to_pde(pdpe, tmpva); if (*pde & PG_PS) { if ((*pde & X86_PG_PDE_CACHE) != cache_bits_pde) { pmap_pde_attr(pde, cache_bits_pde, X86_PG_PDE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pde & PG_PS_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } else if (pa_end == (*pde & PG_PS_FRAME)) pa_end += NBPDR; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pde & PG_PS_FRAME; pa_end = pa_start + NBPDR; } } tmpva = trunc_2mpage(tmpva) + NBPDR; } else { pte = pmap_pde_to_pte(pde, tmpva); if ((*pte & X86_PG_PTE_CACHE) != cache_bits_pte) { pmap_pte_attr(pte, cache_bits_pte, X86_PG_PTE_CACHE); changed = TRUE; } if (tmpva >= VM_MIN_KERNEL_ADDRESS && (*pte & PG_FRAME) < dmaplimit) { if (pa_start == pa_end) { /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } else if (pa_end == (*pte & PG_FRAME)) pa_end += PAGE_SIZE; else { /* Run ended, update direct map. */ error = pmap_change_attr_locked( PHYS_TO_DMAP(pa_start), pa_end - pa_start, mode); if (error != 0) break; /* Start physical address run. */ pa_start = *pte & PG_FRAME; pa_end = pa_start + PAGE_SIZE; } } tmpva += PAGE_SIZE; } } if (error == 0 && pa_start != pa_end && pa_start < dmaplimit) { pa_end1 = MIN(pa_end, dmaplimit); if (pa_start != pa_end1) error = pmap_change_attr_locked(PHYS_TO_DMAP(pa_start), pa_end1 - pa_start, mode); } /* * Flush CPU caches if required to make sure any data isn't cached that * shouldn't be, etc. */ if (changed) { pmap_invalidate_range(kernel_pmap, base, tmpva); pmap_invalidate_cache_range(base, tmpva); } return (error); } /* * Demotes any mapping within the direct map region that covers more than the * specified range of physical addresses. This range's size must be a power * of two and its starting address must be a multiple of its size. Since the * demotion does not change any attributes of the mapping, a TLB invalidation * is not mandatory. The caller may, however, request a TLB invalidation. */ void pmap_demote_DMAP(vm_paddr_t base, vm_size_t len, boolean_t invalidate) { pdp_entry_t *pdpe; pd_entry_t *pde; vm_offset_t va; boolean_t changed; if (len == 0) return; KASSERT(powerof2(len), ("pmap_demote_DMAP: len is not a power of 2")); KASSERT((base & (len - 1)) == 0, ("pmap_demote_DMAP: base is not a multiple of len")); if (len < NBPDP && base < dmaplimit) { va = PHYS_TO_DMAP(base); changed = FALSE; PMAP_LOCK(kernel_pmap); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDPE"); if ((*pdpe & PG_PS) != 0) { if (!pmap_demote_pdpe(kernel_pmap, pdpe, va)) panic("pmap_demote_DMAP: PDPE failed"); changed = TRUE; } if (len < NBPDR) { pde = pmap_pdpe_to_pde(pdpe, va); if ((*pde & X86_PG_V) == 0) panic("pmap_demote_DMAP: invalid PDE"); if ((*pde & PG_PS) != 0) { if (!pmap_demote_pde(kernel_pmap, pde, va)) panic("pmap_demote_DMAP: PDE failed"); changed = TRUE; } } if (changed && invalidate) pmap_invalidate_page(kernel_pmap, va); PMAP_UNLOCK(kernel_pmap); } } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { pd_entry_t *pdep; pt_entry_t pte, PG_A, PG_M, PG_RW, PG_V; vm_paddr_t pa; int val; PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); PMAP_LOCK(pmap); retry: pdep = pmap_pde(pmap, addr); if (pdep != NULL && (*pdep & PG_V)) { if (*pdep & PG_PS) { pte = *pdep; /* Compute the physical address of the 4KB page. */ pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & PG_FRAME; val = MINCORE_SUPER; } else { pte = *pmap_pde_to_pte(pdep, addr); pa = pte & PG_FRAME; val = 0; } } else { pte = 0; pa = 0; val = 0; } if ((pte & PG_V) != 0) { val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && (pte & (PG_MANAGED | PG_V)) == (PG_MANAGED | PG_V)) { /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) goto retry; } else PA_UNLOCK_COND(*locked_pa); PMAP_UNLOCK(pmap); return (val); } static uint64_t pmap_pcid_alloc(pmap_t pmap, u_int cpuid) { uint32_t gen, new_gen, pcid_next; CRITICAL_ASSERT(curthread); gen = PCPU_GET(pcid_gen); if (pmap->pm_pcids[cpuid].pm_pcid == PMAP_PCID_KERN) return (pti ? 0 : CR3_PCID_SAVE); if (pmap->pm_pcids[cpuid].pm_gen == gen) return (CR3_PCID_SAVE); pcid_next = PCPU_GET(pcid_next); KASSERT((!pti && pcid_next <= PMAP_PCID_OVERMAX) || (pti && pcid_next <= PMAP_PCID_OVERMAX_KERN), ("cpu %d pcid_next %#x", cpuid, pcid_next)); if ((!pti && pcid_next == PMAP_PCID_OVERMAX) || (pti && pcid_next == PMAP_PCID_OVERMAX_KERN)) { new_gen = gen + 1; if (new_gen == 0) new_gen = 1; PCPU_SET(pcid_gen, new_gen); pcid_next = PMAP_PCID_KERN + 1; } else { new_gen = gen; } pmap->pm_pcids[cpuid].pm_pcid = pcid_next; pmap->pm_pcids[cpuid].pm_gen = new_gen; PCPU_SET(pcid_next, pcid_next + 1); return (0); } static uint64_t pmap_pcid_alloc_checked(pmap_t pmap, u_int cpuid) { uint64_t cached; cached = pmap_pcid_alloc(pmap, cpuid); KASSERT(pmap->pm_pcids[cpuid].pm_pcid < PMAP_PCID_OVERMAX, ("pmap %p cpu %d pcid %#x", pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid)); KASSERT(pmap->pm_pcids[cpuid].pm_pcid != PMAP_PCID_KERN || pmap == kernel_pmap, ("non-kernel pmap pmap %p cpu %d pcid %#x", pmap, cpuid, pmap->pm_pcids[cpuid].pm_pcid)); return (cached); } static void pmap_activate_sw_pti_post(pmap_t pmap) { if (pmap->pm_ucr3 != PMAP_NO_CR3) PCPU_GET(tssp)->tss_rsp0 = ((vm_offset_t)PCPU_PTR(pti_stack) + PC_PTI_STACK_SZ * sizeof(uint64_t)) & ~0xful; } static void inline pmap_activate_sw_pcid_pti(pmap_t pmap, u_int cpuid, const bool invpcid_works1) { struct invpcid_descr d; uint64_t cached, cr3, kcr3, ucr3; cached = pmap_pcid_alloc_checked(pmap, cpuid); cr3 = rcr3(); if ((cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid); PCPU_SET(curpmap, pmap); kcr3 = pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid; ucr3 = pmap->pm_ucr3 | pmap->pm_pcids[cpuid].pm_pcid | PMAP_PCID_USER_PT; if (!cached && pmap->pm_ucr3 != PMAP_NO_CR3) { /* * Explicitly invalidate translations cached from the * user page table. They are not automatically * flushed by reload of cr3 with the kernel page table * pointer above. * * Note that the if() condition is resolved statically * by using the function argument instead of * runtime-evaluated invpcid_works value. */ if (invpcid_works1) { d.pcid = PMAP_PCID_USER_PT | pmap->pm_pcids[cpuid].pm_pcid; d.pad = 0; d.addr = 0; invpcid(&d, INVPCID_CTX); } else { pmap_pti_pcid_invalidate(ucr3, kcr3); } } PCPU_SET(kcr3, kcr3 | CR3_PCID_SAVE); PCPU_SET(ucr3, ucr3 | CR3_PCID_SAVE); if (cached) PCPU_INC(pm_save_cnt); } static void pmap_activate_sw_pcid_invpcid_pti(pmap_t pmap, u_int cpuid) { pmap_activate_sw_pcid_pti(pmap, cpuid, true); pmap_activate_sw_pti_post(pmap); } static void pmap_activate_sw_pcid_noinvpcid_pti(pmap_t pmap, u_int cpuid) { register_t rflags; /* * If the INVPCID instruction is not available, * invltlb_pcid_handler() is used to handle an invalidate_all * IPI, which checks for curpmap == smp_tlb_pmap. The below * sequence of operations has a window where %CR3 is loaded * with the new pmap's PML4 address, but the curpmap value has * not yet been updated. This causes the invltlb IPI handler, * which is called between the updates, to execute as a NOP, * which leaves stale TLB entries. * * Note that the most typical use of pmap_activate_sw(), from * the context switch, is immune to this race, because * interrupts are disabled (while the thread lock is owned), * and the IPI happens after curpmap is updated. Protect * other callers in a similar way, by disabling interrupts * around the %cr3 register reload and curpmap assignment. */ rflags = intr_disable(); pmap_activate_sw_pcid_pti(pmap, cpuid, false); intr_restore(rflags); pmap_activate_sw_pti_post(pmap); } static void pmap_activate_sw_pcid_nopti(pmap_t pmap, u_int cpuid) { uint64_t cached, cr3; cached = pmap_pcid_alloc_checked(pmap, cpuid); cr3 = rcr3(); if (!cached || (cr3 & ~CR3_PCID_MASK) != pmap->pm_cr3) load_cr3(pmap->pm_cr3 | pmap->pm_pcids[cpuid].pm_pcid | cached); PCPU_SET(curpmap, pmap); if (cached) PCPU_INC(pm_save_cnt); } static void pmap_activate_sw_pcid_noinvpcid_nopti(pmap_t pmap, u_int cpuid) { register_t rflags; rflags = intr_disable(); pmap_activate_sw_pcid_nopti(pmap, cpuid); intr_restore(rflags); } static void pmap_activate_sw_nopcid_nopti(pmap_t pmap, u_int cpuid __unused) { load_cr3(pmap->pm_cr3); PCPU_SET(curpmap, pmap); } static void pmap_activate_sw_nopcid_pti(pmap_t pmap, u_int cpuid __unused) { pmap_activate_sw_nopcid_nopti(pmap, cpuid); PCPU_SET(kcr3, pmap->pm_cr3); PCPU_SET(ucr3, pmap->pm_ucr3); pmap_activate_sw_pti_post(pmap); } DEFINE_IFUNC(static, void, pmap_activate_sw_mode, (pmap_t, u_int), static) { if (pmap_pcid_enabled && pti && invpcid_works) return (pmap_activate_sw_pcid_invpcid_pti); else if (pmap_pcid_enabled && pti && !invpcid_works) return (pmap_activate_sw_pcid_noinvpcid_pti); else if (pmap_pcid_enabled && !pti && invpcid_works) return (pmap_activate_sw_pcid_nopti); else if (pmap_pcid_enabled && !pti && !invpcid_works) return (pmap_activate_sw_pcid_noinvpcid_nopti); else if (!pmap_pcid_enabled && pti) return (pmap_activate_sw_nopcid_pti); else /* if (!pmap_pcid_enabled && !pti) */ return (pmap_activate_sw_nopcid_nopti); } void pmap_activate_sw(struct thread *td) { pmap_t oldpmap, pmap; u_int cpuid; oldpmap = PCPU_GET(curpmap); pmap = vmspace_pmap(td->td_proc->p_vmspace); if (oldpmap == pmap) return; cpuid = PCPU_GET(cpuid); #ifdef SMP CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_SET(cpuid, &pmap->pm_active); #endif pmap_activate_sw_mode(pmap, cpuid); #ifdef SMP CPU_CLR_ATOMIC(cpuid, &oldpmap->pm_active); #else CPU_CLR(cpuid, &oldpmap->pm_active); #endif } void pmap_activate(struct thread *td) { critical_enter(); pmap_activate_sw(td); critical_exit(); } void pmap_activate_boot(pmap_t pmap) { uint64_t kcr3; u_int cpuid; /* * kernel_pmap must be never deactivated, and we ensure that * by never activating it at all. */ MPASS(pmap != kernel_pmap); cpuid = PCPU_GET(cpuid); #ifdef SMP CPU_SET_ATOMIC(cpuid, &pmap->pm_active); #else CPU_SET(cpuid, &pmap->pm_active); #endif PCPU_SET(curpmap, pmap); if (pti) { kcr3 = pmap->pm_cr3; if (pmap_pcid_enabled) kcr3 |= pmap->pm_pcids[cpuid].pm_pcid | CR3_PCID_SAVE; } else { kcr3 = PMAP_NO_CR3; } PCPU_SET(kcr3, kcr3); PCPU_SET(ucr3, PMAP_NO_CR3); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { vm_offset_t superpage_offset; if (size < NBPDR) return; if (object != NULL && (object->flags & OBJ_COLORED) != 0) offset += ptoa(object->pg_color); superpage_offset = offset & PDRMASK; if (size - ((NBPDR - superpage_offset) & PDRMASK) < NBPDR || (*addr & PDRMASK) == superpage_offset) return; if ((*addr & PDRMASK) < superpage_offset) *addr = (*addr & ~PDRMASK) + superpage_offset; else *addr = ((*addr + PDRMASK) & ~PDRMASK) + superpage_offset; } #ifdef INVARIANTS static unsigned long num_dirty_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_dirty_emulations, CTLFLAG_RW, &num_dirty_emulations, 0, NULL); static unsigned long num_accessed_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_accessed_emulations, CTLFLAG_RW, &num_accessed_emulations, 0, NULL); static unsigned long num_superpage_accessed_emulations; SYSCTL_ULONG(_vm_pmap, OID_AUTO, num_superpage_accessed_emulations, CTLFLAG_RW, &num_superpage_accessed_emulations, 0, NULL); static unsigned long ad_emulation_superpage_promotions; SYSCTL_ULONG(_vm_pmap, OID_AUTO, ad_emulation_superpage_promotions, CTLFLAG_RW, &ad_emulation_superpage_promotions, 0, NULL); #endif /* INVARIANTS */ int pmap_emulate_accessed_dirty(pmap_t pmap, vm_offset_t va, int ftype) { int rv; struct rwlock *lock; #if VM_NRESERVLEVEL > 0 vm_page_t m, mpte; #endif pd_entry_t *pde; pt_entry_t *pte, PG_A, PG_M, PG_RW, PG_V; KASSERT(ftype == VM_PROT_READ || ftype == VM_PROT_WRITE, ("pmap_emulate_accessed_dirty: invalid fault type %d", ftype)); if (!pmap_emulate_ad_bits(pmap)) return (-1); PG_A = pmap_accessed_bit(pmap); PG_M = pmap_modified_bit(pmap); PG_V = pmap_valid_bit(pmap); PG_RW = pmap_rw_bit(pmap); rv = -1; lock = NULL; PMAP_LOCK(pmap); pde = pmap_pde(pmap, va); if (pde == NULL || (*pde & PG_V) == 0) goto done; if ((*pde & PG_PS) != 0) { if (ftype == VM_PROT_READ) { #ifdef INVARIANTS atomic_add_long(&num_superpage_accessed_emulations, 1); #endif *pde |= PG_A; rv = 0; } goto done; } pte = pmap_pde_to_pte(pde, va); if ((*pte & PG_V) == 0) goto done; if (ftype == VM_PROT_WRITE) { if ((*pte & PG_RW) == 0) goto done; /* * Set the modified and accessed bits simultaneously. * * Intel EPT PTEs that do software emulation of A/D bits map * PG_A and PG_M to EPT_PG_READ and EPT_PG_WRITE respectively. * An EPT misconfiguration is triggered if the PTE is writable * but not readable (WR=10). This is avoided by setting PG_A * and PG_M simultaneously. */ *pte |= PG_M | PG_A; } else { *pte |= PG_A; } #if VM_NRESERVLEVEL > 0 /* try to promote the mapping */ if (va < VM_MAXUSER_ADDRESS) mpte = PHYS_TO_VM_PAGE(*pde & PG_FRAME); else mpte = NULL; m = PHYS_TO_VM_PAGE(*pte & PG_FRAME); if ((mpte == NULL || mpte->wire_count == NPTEPG) && pmap_ps_enabled(pmap) && (m->flags & PG_FICTITIOUS) == 0 && vm_reserv_level_iffullpop(m) == 0) { pmap_promote_pde(pmap, pde, va, &lock); #ifdef INVARIANTS atomic_add_long(&ad_emulation_superpage_promotions, 1); #endif } #endif #ifdef INVARIANTS if (ftype == VM_PROT_WRITE) atomic_add_long(&num_dirty_emulations, 1); else atomic_add_long(&num_accessed_emulations, 1); #endif rv = 0; /* success */ done: if (lock != NULL) rw_wunlock(lock); PMAP_UNLOCK(pmap); return (rv); } void pmap_get_mapping(pmap_t pmap, vm_offset_t va, uint64_t *ptr, int *num) { pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pde; pt_entry_t *pte, PG_V; int idx; idx = 0; PG_V = pmap_valid_bit(pmap); PMAP_LOCK(pmap); pml4 = pmap_pml4e(pmap, va); ptr[idx++] = *pml4; if ((*pml4 & PG_V) == 0) goto done; pdp = pmap_pml4e_to_pdpe(pml4, va); ptr[idx++] = *pdp; if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) goto done; pde = pmap_pdpe_to_pde(pdp, va); ptr[idx++] = *pde; if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) goto done; pte = pmap_pde_to_pte(pde, va); ptr[idx++] = *pte; done: PMAP_UNLOCK(pmap); *num = idx; } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; pt_entry_t *pte; int cache_bits, error __unused, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= dmaplimit)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); /* * NB: The sequence of updating a page table followed by accesses * to the corresponding pages used in the !DMAP case is subject to * the situation described in the "AMD64 Architecture Programmer's * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special * Coherency Considerations". Therefore, issuing the INVLPG right * after modifying the PTE bits is crucial. */ if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= dmaplimit) { if (can_fault) { /* * Slow path, since we can get page faults * while mappings are active don't pin the * thread to the CPU and instead add a global * mapping visible to all CPUs. */ pmap_qenter(vaddr[i], &page[i], 1); } else { pte = vtopte(vaddr[i]); cache_bits = pmap_cache_bits(kernel_pmap, page[i]->md.pat_mode, 0); pte_store(pte, paddr | X86_PG_RW | X86_PG_V | cache_bits); invlpg(vaddr[i]); } } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= dmaplimit) { if (can_fault) pmap_qremove(vaddr[i], 1); vmem_free(kernel_arena, vaddr[i], PAGE_SIZE); } } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { vm_paddr_t paddr; paddr = VM_PAGE_TO_PHYS(m); if (paddr < dmaplimit) return (PHYS_TO_DMAP(paddr)); mtx_lock_spin(&qframe_mtx); KASSERT(*vtopte(qframe) == 0, ("qframe busy")); pte_store(vtopte(qframe), paddr | X86_PG_RW | X86_PG_V | X86_PG_A | X86_PG_M | pmap_cache_bits(kernel_pmap, m->md.pat_mode, 0)); return (qframe); } void pmap_quick_remove_page(vm_offset_t addr) { if (addr != qframe) return; pte_store(vtopte(qframe), 0); invlpg(qframe); mtx_unlock_spin(&qframe_mtx); } static vm_page_t pmap_pti_alloc_page(void) { vm_page_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); m = vm_page_grab(pti_obj, pti_pg_idx++, VM_ALLOC_NOBUSY | VM_ALLOC_WIRED | VM_ALLOC_ZERO); return (m); } static bool pmap_pti_free_page(vm_page_t m) { KASSERT(m->wire_count > 0, ("page %p not wired", m)); if (!vm_page_unwire_noq(m)) return (false); vm_page_free_zero(m); return (true); } static void pmap_pti_init(void) { vm_page_t pml4_pg; pdp_entry_t *pdpe; vm_offset_t va; int i; if (!pti) return; pti_obj = vm_pager_allocate(OBJT_PHYS, NULL, 0, VM_PROT_ALL, 0, NULL); VM_OBJECT_WLOCK(pti_obj); pml4_pg = pmap_pti_alloc_page(); pti_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4_pg)); for (va = VM_MIN_KERNEL_ADDRESS; va <= VM_MAX_KERNEL_ADDRESS && va >= VM_MIN_KERNEL_ADDRESS && va > NBPML4; va += NBPML4) { pdpe = pmap_pti_pdpe(va); pmap_pti_wire_pte(pdpe); } pmap_pti_add_kva_locked((vm_offset_t)&__pcpu[0], (vm_offset_t)&__pcpu[0] + sizeof(__pcpu[0]) * MAXCPU, false); pmap_pti_add_kva_locked((vm_offset_t)gdt, (vm_offset_t)gdt + sizeof(struct user_segment_descriptor) * NGDT * MAXCPU, false); pmap_pti_add_kva_locked((vm_offset_t)idt, (vm_offset_t)idt + sizeof(struct gate_descriptor) * NIDT, false); pmap_pti_add_kva_locked((vm_offset_t)common_tss, (vm_offset_t)common_tss + sizeof(struct amd64tss) * MAXCPU, false); CPU_FOREACH(i) { /* Doublefault stack IST 1 */ va = common_tss[i].tss_ist1; pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* NMI stack IST 2 */ va = common_tss[i].tss_ist2 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* MC# stack IST 3 */ va = common_tss[i].tss_ist3 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); /* DB# stack IST 4 */ va = common_tss[i].tss_ist4 + sizeof(struct nmi_pcpu); pmap_pti_add_kva_locked(va - PAGE_SIZE, va, false); } pmap_pti_add_kva_locked((vm_offset_t)kernphys + KERNBASE, (vm_offset_t)etext, true); pti_finalized = true; VM_OBJECT_WUNLOCK(pti_obj); } SYSINIT(pmap_pti, SI_SUB_CPU + 1, SI_ORDER_ANY, pmap_pti_init, NULL); static pdp_entry_t * pmap_pti_pdpe(vm_offset_t va) { pml4_entry_t *pml4e; pdp_entry_t *pdpe; vm_page_t m; vm_pindex_t pml4_idx; vm_paddr_t mphys; VM_OBJECT_ASSERT_WLOCKED(pti_obj); pml4_idx = pmap_pml4e_index(va); pml4e = &pti_pml4[pml4_idx]; m = NULL; if (*pml4e == 0) { if (pti_finalized) panic("pml4 alloc after finalization\n"); m = pmap_pti_alloc_page(); if (*pml4e != 0) { pmap_pti_free_page(m); mphys = *pml4e & ~PAGE_MASK; } else { mphys = VM_PAGE_TO_PHYS(m); *pml4e = mphys | X86_PG_RW | X86_PG_V; } } else { mphys = *pml4e & ~PAGE_MASK; } pdpe = (pdp_entry_t *)PHYS_TO_DMAP(mphys) + pmap_pdpe_index(va); return (pdpe); } static void pmap_pti_wire_pte(void *pte) { vm_page_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); m->wire_count++; } static void pmap_pti_unwire_pde(void *pde, bool only_ref) { vm_page_t m; VM_OBJECT_ASSERT_WLOCKED(pti_obj); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pde)); MPASS(m->wire_count > 0); MPASS(only_ref || m->wire_count > 1); pmap_pti_free_page(m); } static void pmap_pti_unwire_pte(void *pte, vm_offset_t va) { vm_page_t m; pd_entry_t *pde; VM_OBJECT_ASSERT_WLOCKED(pti_obj); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((uintptr_t)pte)); MPASS(m->wire_count > 0); if (pmap_pti_free_page(m)) { pde = pmap_pti_pde(va); MPASS((*pde & (X86_PG_PS | X86_PG_V)) == X86_PG_V); *pde = 0; pmap_pti_unwire_pde(pde, false); } } static pd_entry_t * pmap_pti_pde(vm_offset_t va) { pdp_entry_t *pdpe; pd_entry_t *pde; vm_page_t m; vm_pindex_t pd_idx; vm_paddr_t mphys; VM_OBJECT_ASSERT_WLOCKED(pti_obj); pdpe = pmap_pti_pdpe(va); if (*pdpe == 0) { m = pmap_pti_alloc_page(); if (*pdpe != 0) { pmap_pti_free_page(m); MPASS((*pdpe & X86_PG_PS) == 0); mphys = *pdpe & ~PAGE_MASK; } else { mphys = VM_PAGE_TO_PHYS(m); *pdpe = mphys | X86_PG_RW | X86_PG_V; } } else { MPASS((*pdpe & X86_PG_PS) == 0); mphys = *pdpe & ~PAGE_MASK; } pde = (pd_entry_t *)PHYS_TO_DMAP(mphys); pd_idx = pmap_pde_index(va); pde += pd_idx; return (pde); } static pt_entry_t * pmap_pti_pte(vm_offset_t va, bool *unwire_pde) { pd_entry_t *pde; pt_entry_t *pte; vm_page_t m; vm_paddr_t mphys; VM_OBJECT_ASSERT_WLOCKED(pti_obj); pde = pmap_pti_pde(va); if (unwire_pde != NULL) { *unwire_pde = true; pmap_pti_wire_pte(pde); } if (*pde == 0) { m = pmap_pti_alloc_page(); if (*pde != 0) { pmap_pti_free_page(m); MPASS((*pde & X86_PG_PS) == 0); mphys = *pde & ~(PAGE_MASK | pg_nx); } else { mphys = VM_PAGE_TO_PHYS(m); *pde = mphys | X86_PG_RW | X86_PG_V; if (unwire_pde != NULL) *unwire_pde = false; } } else { MPASS((*pde & X86_PG_PS) == 0); mphys = *pde & ~(PAGE_MASK | pg_nx); } pte = (pt_entry_t *)PHYS_TO_DMAP(mphys); pte += pmap_pte_index(va); return (pte); } static void pmap_pti_add_kva_locked(vm_offset_t sva, vm_offset_t eva, bool exec) { vm_paddr_t pa; pd_entry_t *pde; pt_entry_t *pte, ptev; bool unwire_pde; VM_OBJECT_ASSERT_WLOCKED(pti_obj); sva = trunc_page(sva); MPASS(sva > VM_MAXUSER_ADDRESS); eva = round_page(eva); MPASS(sva < eva); for (; sva < eva; sva += PAGE_SIZE) { pte = pmap_pti_pte(sva, &unwire_pde); pa = pmap_kextract(sva); ptev = pa | X86_PG_RW | X86_PG_V | X86_PG_A | X86_PG_G | (exec ? 0 : pg_nx) | pmap_cache_bits(kernel_pmap, VM_MEMATTR_DEFAULT, FALSE); if (*pte == 0) { pte_store(pte, ptev); pmap_pti_wire_pte(pte); } else { KASSERT(!pti_finalized, ("pti overlap after fin %#lx %#lx %#lx", sva, *pte, ptev)); KASSERT(*pte == ptev, ("pti non-identical pte after fin %#lx %#lx %#lx", sva, *pte, ptev)); } if (unwire_pde) { pde = pmap_pti_pde(sva); pmap_pti_unwire_pde(pde, true); } } } void pmap_pti_add_kva(vm_offset_t sva, vm_offset_t eva, bool exec) { if (!pti) return; VM_OBJECT_WLOCK(pti_obj); pmap_pti_add_kva_locked(sva, eva, exec); VM_OBJECT_WUNLOCK(pti_obj); } void pmap_pti_remove_kva(vm_offset_t sva, vm_offset_t eva) { pt_entry_t *pte; vm_offset_t va; if (!pti) return; sva = rounddown2(sva, PAGE_SIZE); MPASS(sva > VM_MAXUSER_ADDRESS); eva = roundup2(eva, PAGE_SIZE); MPASS(sva < eva); VM_OBJECT_WLOCK(pti_obj); for (va = sva; va < eva; va += PAGE_SIZE) { pte = pmap_pti_pte(va, NULL); KASSERT((*pte & X86_PG_V) != 0, ("invalid pte va %#lx pte %#lx pt %#lx", va, (u_long)pte, *pte)); pte_clear(pte); pmap_pti_unwire_pte(pte, va); } pmap_invalidate_range(kernel_pmap, sva, eva); VM_OBJECT_WUNLOCK(pti_obj); } #include "opt_ddb.h" #ifdef DDB #include #include DB_SHOW_COMMAND(pte, pmap_print_pte) { pmap_t pmap; pml4_entry_t *pml4; pdp_entry_t *pdp; pd_entry_t *pde; pt_entry_t *pte, PG_V; vm_offset_t va; if (!have_addr) { db_printf("show pte addr\n"); return; } va = (vm_offset_t)addr; if (kdb_thread != NULL) pmap = vmspace_pmap(kdb_thread->td_proc->p_vmspace); else pmap = PCPU_GET(curpmap); PG_V = pmap_valid_bit(pmap); pml4 = pmap_pml4e(pmap, va); db_printf("VA %#016lx pml4e %#016lx", va, *pml4); if ((*pml4 & PG_V) == 0) { db_printf("\n"); return; } pdp = pmap_pml4e_to_pdpe(pml4, va); db_printf(" pdpe %#016lx", *pdp); if ((*pdp & PG_V) == 0 || (*pdp & PG_PS) != 0) { db_printf("\n"); return; } pde = pmap_pdpe_to_pde(pdp, va); db_printf(" pde %#016lx", *pde); if ((*pde & PG_V) == 0 || (*pde & PG_PS) != 0) { db_printf("\n"); return; } pte = pmap_pde_to_pte(pde, va); db_printf(" pte %#016lx\n", *pte); } DB_SHOW_COMMAND(phys2dmap, pmap_phys2dmap) { vm_paddr_t a; if (have_addr) { a = (vm_paddr_t)addr; db_printf("0x%jx\n", (uintmax_t)PHYS_TO_DMAP(a)); } else { db_printf("show phys2dmap addr\n"); } } #endif Index: projects/clang700-import/sys/amd64/amd64/support.S =================================================================== --- projects/clang700-import/sys/amd64/amd64/support.S (revision 339014) +++ projects/clang700-import/sys/amd64/amd64/support.S (revision 339015) @@ -1,1399 +1,1399 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "opt_ddb.h" #include #include #include #include "assym.inc" .text /* Address: %rdi */ ENTRY(pagezero_std) PUSH_FRAME_POINTER movq $PAGE_SIZE/8,%rcx xorl %eax,%eax rep stosq POP_FRAME_POINTER ret END(pagezero_std) ENTRY(pagezero_erms) PUSH_FRAME_POINTER movq $PAGE_SIZE,%rcx xorl %eax,%eax rep stosb POP_FRAME_POINTER ret END(pagezero_erms) /* * pagecopy(%rdi=from, %rsi=to) */ ENTRY(pagecopy) PUSH_FRAME_POINTER movq $PAGE_SIZE/8,%rcx movq %rdi,%r9 movq %rsi,%rdi movq %r9,%rsi rep movsq POP_FRAME_POINTER ret END(pagecopy) /* Address: %rdi */ ENTRY(sse2_pagezero) PUSH_FRAME_POINTER movq $-PAGE_SIZE,%rdx subq %rdx,%rdi xorl %eax,%eax jmp 1f /* * The loop takes 29 bytes. Ensure that it doesn't cross a 32-byte * cache line. */ .p2align 5,0x90 1: movnti %rax,(%rdi,%rdx) movnti %rax,8(%rdi,%rdx) movnti %rax,16(%rdi,%rdx) movnti %rax,24(%rdi,%rdx) addq $32,%rdx jne 1b sfence POP_FRAME_POINTER ret END(sse2_pagezero) /* * memcmpy(b1, b2, len) * rdi,rsi,len */ ENTRY(memcmp) PUSH_FRAME_POINTER cmpq $16,%rdx jae 5f 1: testq %rdx,%rdx je 3f xorl %ecx,%ecx 2: movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jz 3f movzbl (%rdi,%rcx,1),%eax movzbl (%rsi,%rcx,1),%r8d cmpb %r8b,%al jne 4f addq $1,%rcx cmpq %rcx,%rdx jne 2b 3: xorl %eax,%eax POP_FRAME_POINTER ret 4: subl %r8d,%eax POP_FRAME_POINTER ret 5: cmpq $32,%rdx jae 7f 6: /* * 8 bytes */ movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 jne 1b leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi subq $8,%rdx cmpq $8,%rdx jae 6b jl 1b jmp 3b 7: /* * 32 bytes */ movq (%rsi),%r8 movq 8(%rsi),%r9 subq (%rdi),%r8 subq 8(%rdi),%r9 or %r8,%r9 jnz 1b movq 16(%rsi),%r8 movq 24(%rsi),%r9 subq 16(%rdi),%r8 subq 24(%rdi),%r9 or %r8,%r9 jnz 1b leaq 32(%rdi),%rdi leaq 32(%rsi),%rsi subq $32,%rdx cmpq $32,%rdx jae 7b jnz 1b jmp 3b END(memcmp) /* * memmove(dst, src, cnt) * rdi, rsi, rdx * Adapted from bcopy written by: * ws@tools.de (Wolfgang Solfrank, TooLs GmbH) +49-228-985800 */ ENTRY(memmove_std) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx movq %rdi,%r8 subq %rsi,%r8 cmpq %rcx,%r8 /* overlapping && src < dst? */ jb 2f cmpq $15,%rcx jbe 1f shrq $3,%rcx /* copy by 64-bit words */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ jne 1f POP_FRAME_POINTER ret ALIGN_TEXT 1: rep movsb POP_FRAME_POINTER ret /* ALIGN_TEXT */ 2: addq %rcx,%rdi /* copy backwards */ addq %rcx,%rsi decq %rdi decq %rsi std andq $7,%rcx /* any fractional bytes? */ je 3f rep movsb 3: movq %rdx,%rcx /* copy remainder by 32-bit words */ shrq $3,%rcx subq $7,%rsi subq $7,%rdi rep movsq cld POP_FRAME_POINTER ret END(memmove_std) ENTRY(memmove_erms) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx movq %rdi,%r8 subq %rsi,%r8 cmpq %rcx,%r8 /* overlapping && src < dst? */ jb 1f rep movsb POP_FRAME_POINTER ret 1: addq %rcx,%rdi /* copy backwards */ addq %rcx,%rsi decq %rdi decq %rsi std rep movsb cld POP_FRAME_POINTER ret END(memmove_erms) /* * memcpy(dst, src, len) * rdi, rsi, rdx * * Note: memcpy does not support overlapping copies */ ENTRY(memcpy_std) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx cmpq $15,%rcx jbe 1f shrq $3,%rcx /* copy by 64-bit words */ rep movsq movq %rdx,%rcx andq $7,%rcx /* any bytes left? */ jne 1f POP_FRAME_POINTER ret ALIGN_TEXT 1: rep movsb POP_FRAME_POINTER ret END(memcpy_std) ENTRY(memcpy_erms) PUSH_FRAME_POINTER movq %rdi,%rax movq %rdx,%rcx rep movsb POP_FRAME_POINTER ret END(memcpy_erms) /* * memset(dst, c, len) * rdi, rsi, rdx */ ENTRY(memset_std) PUSH_FRAME_POINTER movq %rdi,%r9 movq %rdx,%rcx movzbq %sil,%r8 movabs $0x0101010101010101,%rax imulq %r8,%rax cmpq $15,%rcx jbe 1f shrq $3,%rcx rep stosq movq %rdx,%rcx andq $7,%rcx jne 1f movq %r9,%rax POP_FRAME_POINTER ret ALIGN_TEXT 1: rep stosb movq %r9,%rax POP_FRAME_POINTER ret END(memset_std) ENTRY(memset_erms) PUSH_FRAME_POINTER movq %rdi,%r9 movq %rdx,%rcx movb %sil,%al rep stosb movq %r9,%rax POP_FRAME_POINTER ret END(memset_erms) /* fillw(pat, base, cnt) */ /* %rdi,%rsi, %rdx */ ENTRY(fillw) PUSH_FRAME_POINTER movq %rdi,%rax movq %rsi,%rdi movq %rdx,%rcx rep stosw POP_FRAME_POINTER ret END(fillw) /*****************************************************************************/ /* copyout and fubyte family */ /*****************************************************************************/ /* * Access user memory from inside the kernel. These routines should be * the only places that do this. * * These routines set curpcb->pcb_onfault for the time they execute. When a * protection violation occurs inside the functions, the trap handler * returns to *curpcb->pcb_onfault instead of the function. */ .macro SMAP_DISABLE smap .if \smap stac .endif .endm .macro SMAP_ENABLE smap .if \smap clac .endif .endm /* * copyout(from_kernel, to_user, len) * %rdi, %rsi, %rdx */ .macro COPYOUT smap erms PUSH_FRAME_POINTER movq PCPU(CURPCB),%r9 movq $copy_fault,PCB_ONFAULT(%r9) /* * Check explicitly for non-user addresses. If 486 write protection * is being used, this check is essential because we are in kernel * mode so the h/w does not provide any protection against writing * kernel addresses. */ /* * First, prevent address wrapping. */ movq %rsi,%rax addq %rdx,%rax jc copy_fault /* * XXX STOP USING VM_MAXUSER_ADDRESS. * It is an end address, not a max, so every time it is used correctly it * looks like there is an off by one error, and of course it caused an off * by one error in several places. */ movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copy_fault /* * Set up arguments for rep movs*. */ movq %rdi,%r8 movq %rsi,%rdi movq %r8,%rsi movq %rdx,%rcx /* * Set return value to zero. Remaining failure mode goes through * copy_fault. */ xorl %eax,%eax SMAP_DISABLE \smap .if \erms == 0 cmpq $15,%rcx jbe 1f shrq $3,%rcx rep movsq movb %dl,%cl andb $7,%cl jne 1f SMAP_ENABLE \smap movq %rax,PCB_ONFAULT(%r9) POP_FRAME_POINTER ret .endif ALIGN_TEXT 1: rep movsb SMAP_ENABLE \smap movq %rax,PCB_ONFAULT(%r9) POP_FRAME_POINTER ret .endm ENTRY(copyout_nosmap_std) COPYOUT smap=0 erms=0 END(copyout_nosmap_std) ENTRY(copyout_smap_std) COPYOUT smap=1 erms=0 END(copyout_smap_std) ENTRY(copyout_nosmap_erms) COPYOUT smap=0 erms=1 END(copyout_nosmap_erms) ENTRY(copyout_smap_erms) COPYOUT smap=1 erms=1 END(copyout_smap_erms) /* * copyin(from_user, to_kernel, len) * %rdi, %rsi, %rdx */ .macro COPYIN smap erms PUSH_FRAME_POINTER movq PCPU(CURPCB),%r9 movq $copy_fault,PCB_ONFAULT(%r9) /* * make sure address is valid */ movq %rdi,%rax addq %rdx,%rax jc copy_fault movq $VM_MAXUSER_ADDRESS,%rcx cmpq %rcx,%rax ja copy_fault movq %rdi,%r8 movq %rsi,%rdi movq %r8,%rsi movq %rdx,%rcx xorl %eax,%eax SMAP_DISABLE \smap .if \erms == 0 cmpq $15,%rcx jbe 1f shrq $3,%rcx /* copy longword-wise */ rep movsq movb %dl,%cl andb $7,%cl /* copy remaining bytes */ jne 1f SMAP_ENABLE \smap movq %rax,PCB_ONFAULT(%r9) POP_FRAME_POINTER ret .endif ALIGN_TEXT 1: rep movsb SMAP_ENABLE \smap movq %rax,PCB_ONFAULT(%r9) POP_FRAME_POINTER ret .endm ENTRY(copyin_nosmap_std) COPYIN smap=0 erms=0 END(copyin_nosmap_std) ENTRY(copyin_smap_std) COPYIN smap=1 erms=0 END(copyin_smap_std) ENTRY(copyin_nosmap_erms) COPYIN smap=0 erms=1 END(copyin_nosmap_erms) ENTRY(copyin_smap_erms) COPYIN smap=1 erms=1 END(copyin_smap_erms) ALIGN_TEXT /* Trap entry clears PSL.AC */ copy_fault: movq $0,PCB_ONFAULT(%r9) movl $EFAULT,%eax POP_FRAME_POINTER ret /* * casueword32. Compare and set user integer. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %esi, oldp = %rdx, new = %ecx */ ENTRY(casueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret END(casueword32_nosmap) ENTRY(casueword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movl %esi,%eax /* old */ stac #ifdef SMP lock #endif cmpxchgl %ecx,(%rdi) /* new = %ecx */ clac /* * The old value is in %eax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. Save %eax into %esi to prepare the return * value. */ movl %eax,%esi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) /* * Access the oldp after the pcb_onfault is cleared, to correctly * catch corrupted pointer. */ movl %esi,(%rdx) /* oldp = %rdx */ POP_FRAME_POINTER ret END(casueword32_smap) /* * casueword. Compare and set user long. Returns -1 on fault, * 0 if access was successful. Old value is written to *oldp. * dst = %rdi, old = %rsi, oldp = %rdx, new = %rcx */ ENTRY(casueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER ret END(casueword_nosmap) ENTRY(casueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $fusufault,PCB_ONFAULT(%r8) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault movq %rsi,%rax /* old */ stac #ifdef SMP lock #endif cmpxchgq %rcx,(%rdi) /* new = %rcx */ clac /* * The old value is in %rax. If the store succeeded it will be the * value we expected (old) from before the store, otherwise it will * be the current value. */ movq %rax,%rsi xorl %eax,%eax movq %rax,PCB_ONFAULT(%r8) movq %rsi,(%rdx) POP_FRAME_POINTER ret END(casueword_smap) /* * Fetch (load) a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit * byte from user memory. * addr = %rdi, valp = %rsi */ ENTRY(fueword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movq (%rdi),%r11 movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword_nosmap) ENTRY(fueword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax stac movq (%rdi),%r11 clac movq %rax,PCB_ONFAULT(%rcx) movq %r11,(%rsi) POP_FRAME_POINTER ret END(fueword_smap) ENTRY(fueword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax movl (%rdi),%r11d movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32_nosmap) ENTRY(fueword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address is valid */ ja fusufault xorl %eax,%eax stac movl (%rdi),%r11d clac movq %rax,PCB_ONFAULT(%rcx) movl %r11d,(%rsi) POP_FRAME_POINTER ret END(fueword32_smap) ENTRY(fuword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault movzwl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16_nosmap) ENTRY(fuword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi ja fusufault stac movzwl (%rdi),%eax clac movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fuword16_smap) ENTRY(fubyte_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault movzbl (%rdi),%eax movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte_nosmap) ENTRY(fubyte_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi ja fusufault stac movzbl (%rdi),%eax clac movq $0,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(fubyte_smap) /* * Store a 64-bit word, a 32-bit word, a 16-bit word, or an 8-bit byte to * user memory. * addr = %rdi, value = %rsi */ ENTRY(suword_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movq %rsi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword_nosmap) ENTRY(suword_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-8,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movq %rsi,(%rdi) clac xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword_smap) ENTRY(suword32_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32_nosmap) ENTRY(suword32_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-4,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movl %esi,(%rdi) clac xorl %eax,%eax movq PCPU(CURPCB),%rcx movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword32_smap) ENTRY(suword16_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movw %si,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16_nosmap) ENTRY(suword16_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-2,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault stac movw %si,(%rdi) clac xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(suword16_smap) ENTRY(subyte_nosmap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax movb %al,(%rdi) xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte_nosmap) ENTRY(subyte_smap) PUSH_FRAME_POINTER movq PCPU(CURPCB),%rcx movq $fusufault,PCB_ONFAULT(%rcx) movq $VM_MAXUSER_ADDRESS-1,%rax cmpq %rax,%rdi /* verify address validity */ ja fusufault movl %esi,%eax stac movb %al,(%rdi) clac xorl %eax,%eax movq PCPU(CURPCB),%rcx /* restore trashed register */ movq %rax,PCB_ONFAULT(%rcx) POP_FRAME_POINTER ret END(subyte_smap) ALIGN_TEXT /* Fault entry clears PSL.AC */ fusufault: movq PCPU(CURPCB),%rcx xorl %eax,%eax movq %rax,PCB_ONFAULT(%rcx) decq %rax POP_FRAME_POINTER ret /* * copyinstr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx * * copy a string from 'from' to 'to', stop when a 0 character is reached. * return ENAMETOOLONG if string is longer than maxlen, and * EFAULT on protection violations. If lencopied is non-zero, * return the actual length in *lencopied. */ .macro COPYINSTR smap PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ movq PCPU(CURPCB),%r9 movq $cpystrflt,PCB_ONFAULT(%r9) movq $VM_MAXUSER_ADDRESS,%rax /* make sure 'from' is within bounds */ subq %rdi,%rax jbe cpystrflt SMAP_DISABLE \smap /* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */ cmpq %rdx,%rax jb 8f 1: incq %rdx 2: decq %rdx .if \smap == 0 jz copyinstr_toolong .else jz copyinstr_toolong_smap .endif movb (%rdi),%al movb %al,(%rsi) incq %rsi incq %rdi testb %al,%al jnz 2b SMAP_ENABLE \smap /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax /* set *lencopied and return %eax */ movq %rax,PCB_ONFAULT(%r9) testq %rcx,%rcx jz 3f subq %rdx,%r8 movq %r8,(%rcx) 3: POP_FRAME_POINTER ret ALIGN_TEXT 8: movq %rax,%rdx movq %rax,%r8 jmp 1b .endm ENTRY(copyinstr_nosmap) COPYINSTR smap=0 END(copyinstr_nosmap) ENTRY(copyinstr_smap) COPYINSTR smap=1 END(copyinstr_smap) cpystrflt: /* Fault entry clears PSL.AC */ movl $EFAULT,%eax cpystrflt_x: /* set *lencopied and return %eax */ movq $0,PCB_ONFAULT(%r9) testq %rcx,%rcx jz 1f subq %rdx,%r8 movq %r8,(%rcx) 1: POP_FRAME_POINTER ret copyinstr_toolong_smap: clac copyinstr_toolong: /* rdx is zero - return ENAMETOOLONG or EFAULT */ movq $VM_MAXUSER_ADDRESS,%rax - cmpq %rax,%rsi + cmpq %rax,%rdi jae cpystrflt movl $ENAMETOOLONG,%eax jmp cpystrflt_x /* * copystr(from, to, maxlen, int *lencopied) * %rdi, %rsi, %rdx, %rcx */ ENTRY(copystr) PUSH_FRAME_POINTER movq %rdx,%r8 /* %r8 = maxlen */ incq %rdx 1: decq %rdx jz 4f movb (%rdi),%al movb %al,(%rsi) incq %rsi incq %rdi testb %al,%al jnz 1b /* Success -- 0 byte reached */ decq %rdx xorl %eax,%eax 2: testq %rcx,%rcx jz 3f /* set *lencopied and return %rax */ subq %rdx,%r8 movq %r8,(%rcx) 3: POP_FRAME_POINTER ret 4: /* rdx is zero -- return ENAMETOOLONG */ movl $ENAMETOOLONG,%eax jmp 2b END(copystr) /* * Handling of special amd64 registers and descriptor tables etc */ /* void lgdt(struct region_descriptor *rdp); */ ENTRY(lgdt) /* reload the descriptor table */ lgdt (%rdi) /* flush the prefetch q */ jmp 1f nop 1: movl $KDSEL,%eax movl %eax,%ds movl %eax,%es movl %eax,%fs /* Beware, use wrmsr to set 64 bit base */ movl %eax,%gs movl %eax,%ss /* reload code selector by turning return into intersegmental return */ popq %rax pushq $KCSEL pushq %rax MEXITCOUNT lretq END(lgdt) /*****************************************************************************/ /* setjump, longjump */ /*****************************************************************************/ ENTRY(setjmp) movq %rbx,0(%rdi) /* save rbx */ movq %rsp,8(%rdi) /* save rsp */ movq %rbp,16(%rdi) /* save rbp */ movq %r12,24(%rdi) /* save r12 */ movq %r13,32(%rdi) /* save r13 */ movq %r14,40(%rdi) /* save r14 */ movq %r15,48(%rdi) /* save r15 */ movq 0(%rsp),%rdx /* get rta */ movq %rdx,56(%rdi) /* save rip */ xorl %eax,%eax /* return(0); */ ret END(setjmp) ENTRY(longjmp) movq 0(%rdi),%rbx /* restore rbx */ movq 8(%rdi),%rsp /* restore rsp */ movq 16(%rdi),%rbp /* restore rbp */ movq 24(%rdi),%r12 /* restore r12 */ movq 32(%rdi),%r13 /* restore r13 */ movq 40(%rdi),%r14 /* restore r14 */ movq 48(%rdi),%r15 /* restore r15 */ movq 56(%rdi),%rdx /* get rta */ movq %rdx,0(%rsp) /* put in return frame */ xorl %eax,%eax /* return(1); */ incl %eax ret END(longjmp) /* * Support for reading MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(rdmsr_safe) /* int rdmsr_safe(u_int msr, uint64_t *data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx rdmsr /* Read MSR pointed by %ecx. Returns hi byte in edx, lo in %eax */ salq $32,%rdx /* sign-shift %rdx left */ movl %eax,%eax /* zero-extend %eax -> %rax */ orq %rdx,%rax movq %rax,(%rsi) xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * Support for writing MSRs in the safe manner. (Instead of panic on #gp, * return an error.) */ ENTRY(wrmsr_safe) /* int wrmsr_safe(u_int msr, uint64_t data) */ PUSH_FRAME_POINTER movq PCPU(CURPCB),%r8 movq $msr_onfault,PCB_ONFAULT(%r8) movl %edi,%ecx movl %esi,%eax sarq $32,%rsi movl %esi,%edx wrmsr /* Write MSR pointed by %ecx. Accepts hi byte in edx, lo in %eax. */ xorq %rax,%rax movq %rax,PCB_ONFAULT(%r8) POP_FRAME_POINTER ret /* * MSR operations fault handler */ ALIGN_TEXT msr_onfault: movq $0,PCB_ONFAULT(%r8) movl $EFAULT,%eax POP_FRAME_POINTER ret /* * void pmap_pti_pcid_invalidate(uint64_t ucr3, uint64_t kcr3); * Invalidates address space addressed by ucr3, then returns to kcr3. * Done in assembler to ensure no other memory accesses happen while * on ucr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invalidate) pushfq cli movq %rdi,%cr3 /* to user page table */ movq %rsi,%cr3 /* back to kernel */ popfq retq /* * void pmap_pti_pcid_invlpg(uint64_t ucr3, uint64_t kcr3, vm_offset_t va); * Invalidates virtual address va in address space ucr3, then returns to kcr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invlpg) pushfq cli movq %rdi,%cr3 /* to user page table */ invlpg (%rdx) movq %rsi,%cr3 /* back to kernel */ popfq retq /* * void pmap_pti_pcid_invlrng(uint64_t ucr3, uint64_t kcr3, vm_offset_t sva, * vm_offset_t eva); * Invalidates virtual addresses between sva and eva in address space ucr3, * then returns to kcr3. */ ALIGN_TEXT ENTRY(pmap_pti_pcid_invlrng) pushfq cli movq %rdi,%cr3 /* to user page table */ 1: invlpg (%rdx) addq $PAGE_SIZE,%rdx cmpq %rdx,%rcx ja 1b movq %rsi,%cr3 /* back to kernel */ popfq retq .altmacro .macro ibrs_seq_label l handle_ibrs_\l: .endm .macro ibrs_call_label l call handle_ibrs_\l .endm .macro ibrs_seq count ll=1 .rept \count ibrs_call_label %(ll) nop ibrs_seq_label %(ll) addq $8,%rsp ll=ll+1 .endr .endm /* all callers already saved %rax, %rdx, and %rcx */ ENTRY(handle_ibrs_entry) cmpb $0,hw_ibrs_active(%rip) je 1f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax orl $(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32,%edx wrmsr movb $1,PCPU(IBPB_SET) testl $CPUID_STDEXT_SMEP,cpu_stdext_feature(%rip) jne 1f ibrs_seq 32 1: ret END(handle_ibrs_entry) ENTRY(handle_ibrs_exit) cmpb $0,PCPU(IBPB_SET) je 1f movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx wrmsr movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit) /* registers-neutral version, but needs stack */ ENTRY(handle_ibrs_exit_rs) cmpb $0,PCPU(IBPB_SET) je 1f pushq %rax pushq %rdx pushq %rcx movl $MSR_IA32_SPEC_CTRL,%ecx rdmsr andl $~(IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP),%eax andl $~((IA32_SPEC_CTRL_IBRS|IA32_SPEC_CTRL_STIBP)>>32),%edx wrmsr popq %rcx popq %rdx popq %rax movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit_rs) .noaltmacro /* * Flush L1D cache. Load enough of the data from the kernel text * to flush existing L1D content. * * N.B. The function does not follow ABI calling conventions, it corrupts %rbx. * The vmm.ko caller expects that only %rax, %rdx, %rbx, %rcx, %r9, and %rflags * registers are clobbered. The NMI handler caller only needs %r13 preserved. */ ENTRY(flush_l1d_sw) #define L1D_FLUSH_SIZE (64 * 1024) movq $KERNBASE, %r9 movq $-L1D_FLUSH_SIZE, %rcx /* * pass 1: Preload TLB. * Kernel text is mapped using superpages. TLB preload is * done for the benefit of older CPUs which split 2M page * into 4k TLB entries. */ 1: movb L1D_FLUSH_SIZE(%r9, %rcx), %al addq $PAGE_SIZE, %rcx jne 1b xorl %eax, %eax cpuid movq $-L1D_FLUSH_SIZE, %rcx /* pass 2: Read each cache line. */ 2: movb L1D_FLUSH_SIZE(%r9, %rcx), %al addq $64, %rcx jne 2b lfence ret #undef L1D_FLUSH_SIZE END(flush_l1d_sw) Index: projects/clang700-import/sys/arm64/arm64/identcpu.c =================================================================== --- projects/clang700-import/sys/arm64/arm64/identcpu.c (revision 339014) +++ projects/clang700-import/sys/arm64/arm64/identcpu.c (revision 339015) @@ -1,1335 +1,1373 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Semihalf * under sponsorship of the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include static int ident_lock; char machine[] = "arm64"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); /* * Per-CPU affinity as provided in MPIDR_EL1 * Indexed by CPU number in logical order selected by the system. * Relevant fields can be extracted using CPU_AFFn macros, * Aff3.Aff2.Aff1.Aff0 construct a unique CPU address in the system. * * Fields used by us: * Aff1 - Cluster number * Aff0 - CPU number in Aff1 cluster */ uint64_t __cpu_affinity[MAXCPU]; static u_int cpu_aff_levels; struct cpu_desc { u_int cpu_impl; u_int cpu_part_num; u_int cpu_variant; u_int cpu_revision; const char *cpu_impl_name; const char *cpu_part_name; uint64_t mpidr; uint64_t id_aa64afr0; uint64_t id_aa64afr1; uint64_t id_aa64dfr0; uint64_t id_aa64dfr1; uint64_t id_aa64isar0; uint64_t id_aa64isar1; uint64_t id_aa64mmfr0; uint64_t id_aa64mmfr1; uint64_t id_aa64mmfr2; uint64_t id_aa64pfr0; uint64_t id_aa64pfr1; }; struct cpu_desc cpu_desc[MAXCPU]; struct cpu_desc user_cpu_desc; static u_int cpu_print_regs; #define PRINT_ID_AA64_AFR0 0x00000001 #define PRINT_ID_AA64_AFR1 0x00000002 #define PRINT_ID_AA64_DFR0 0x00000010 #define PRINT_ID_AA64_DFR1 0x00000020 #define PRINT_ID_AA64_ISAR0 0x00000100 #define PRINT_ID_AA64_ISAR1 0x00000200 #define PRINT_ID_AA64_MMFR0 0x00001000 #define PRINT_ID_AA64_MMFR1 0x00002000 #define PRINT_ID_AA64_MMFR2 0x00004000 #define PRINT_ID_AA64_PFR0 0x00010000 #define PRINT_ID_AA64_PFR1 0x00020000 struct cpu_parts { u_int part_id; const char *part_name; }; #define CPU_PART_NONE { 0, "Unknown Processor" } struct cpu_implementers { u_int impl_id; const char *impl_name; /* * Part number is implementation defined * so each vendor will have its own set of values and names. */ const struct cpu_parts *cpu_parts; }; #define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer", cpu_parts_none } /* * Per-implementer table of (PartNum, CPU Name) pairs. */ /* ARM Ltd. */ static const struct cpu_parts cpu_parts_arm[] = { { CPU_PART_FOUNDATION, "Foundation-Model" }, { CPU_PART_CORTEX_A35, "Cortex-A35" }, { CPU_PART_CORTEX_A53, "Cortex-A53" }, { CPU_PART_CORTEX_A55, "Cortex-A55" }, { CPU_PART_CORTEX_A57, "Cortex-A57" }, { CPU_PART_CORTEX_A72, "Cortex-A72" }, { CPU_PART_CORTEX_A73, "Cortex-A73" }, { CPU_PART_CORTEX_A75, "Cortex-A75" }, CPU_PART_NONE, }; /* Cavium */ static const struct cpu_parts cpu_parts_cavium[] = { { CPU_PART_THUNDERX, "ThunderX" }, { CPU_PART_THUNDERX2, "ThunderX2" }, CPU_PART_NONE, }; /* Unknown */ static const struct cpu_parts cpu_parts_none[] = { CPU_PART_NONE, }; /* * Implementers table. */ const struct cpu_implementers cpu_implementers[] = { { CPU_IMPL_ARM, "ARM", cpu_parts_arm }, { CPU_IMPL_BROADCOM, "Broadcom", cpu_parts_none }, { CPU_IMPL_CAVIUM, "Cavium", cpu_parts_cavium }, { CPU_IMPL_DEC, "DEC", cpu_parts_none }, { CPU_IMPL_INFINEON, "IFX", cpu_parts_none }, { CPU_IMPL_FREESCALE, "Freescale", cpu_parts_none }, { CPU_IMPL_NVIDIA, "NVIDIA", cpu_parts_none }, { CPU_IMPL_APM, "APM", cpu_parts_none }, { CPU_IMPL_QUALCOMM, "Qualcomm", cpu_parts_none }, { CPU_IMPL_MARVELL, "Marvell", cpu_parts_none }, { CPU_IMPL_INTEL, "Intel", cpu_parts_none }, CPU_IMPLEMENTER_NONE, }; #define MRS_TYPE_MASK 0xf #define MRS_INVALID 0 #define MRS_EXACT 1 #define MRS_EXACT_VAL(x) (MRS_EXACT | ((x) << 4)) #define MRS_EXACT_FIELD(x) ((x) >> 4) #define MRS_LOWER 2 struct mrs_field { bool sign; u_int type; u_int shift; }; #define MRS_FIELD(_sign, _type, _shift) \ { \ .sign = (_sign), \ .type = (_type), \ .shift = (_shift), \ } #define MRS_FIELD_END { .type = MRS_INVALID, } +static struct mrs_field id_aa64isar0_fields[] = { + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_DP_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM4_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SM3_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA3_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_RDM_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_ATOMIC_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_CRC32_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA2_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_SHA1_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR0_AES_SHIFT), + MRS_FIELD_END, +}; + +static struct mrs_field id_aa64isar1_fields[] = { + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_GPI_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_GPA_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_LRCPC_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_FCMA_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_JSCVT_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_API_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_APA_SHIFT), + MRS_FIELD(false, MRS_LOWER, ID_AA64ISAR1_DPB_SHIFT), + MRS_FIELD_END, +}; + static struct mrs_field id_aa64pfr0_fields[] = { MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_SVE_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_RAS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_GIC_SHIFT), MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_ADV_SIMD_SHIFT), MRS_FIELD(true, MRS_LOWER, ID_AA64PFR0_FP_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL3_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64PFR0_EL2_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL1_SHIFT), MRS_FIELD(false, MRS_LOWER, ID_AA64PFR0_EL0_SHIFT), MRS_FIELD_END, }; static struct mrs_field id_aa64dfr0_fields[] = { MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMS_VER_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_CTX_CMPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_WRPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_BRPS_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_PMU_VER_SHIFT), MRS_FIELD(false, MRS_EXACT, ID_AA64DFR0_TRACE_VER_SHIFT), MRS_FIELD(false, MRS_EXACT_VAL(0x6), ID_AA64DFR0_DEBUG_VER_SHIFT), MRS_FIELD_END, }; struct mrs_user_reg { u_int CRm; u_int Op2; size_t offset; struct mrs_field *fields; }; static struct mrs_user_reg user_regs[] = { + { /* id_aa64isar0_el1 */ + .CRm = 6, + .Op2 = 0, + .offset = __offsetof(struct cpu_desc, id_aa64isar0), + .fields = id_aa64isar0_fields, + }, + { /* id_aa64isar1_el1 */ + .CRm = 6, + .Op2 = 1, + .offset = __offsetof(struct cpu_desc, id_aa64isar1), + .fields = id_aa64isar1_fields, + }, { /* id_aa64pfr0_el1 */ .CRm = 4, .Op2 = 0, .offset = __offsetof(struct cpu_desc, id_aa64pfr0), .fields = id_aa64pfr0_fields, }, { /* id_aa64dfr0_el1 */ .CRm = 5, .Op2 = 0, .offset = __offsetof(struct cpu_desc, id_aa64dfr0), .fields = id_aa64dfr0_fields, }, }; #define CPU_DESC_FIELD(desc, idx) \ *(uint64_t *)((char *)&(desc) + user_regs[(idx)].offset) static int user_mrs_handler(vm_offset_t va, uint32_t insn, struct trapframe *frame, uint32_t esr) { uint64_t value; int CRm, Op2, i, reg; if ((insn & MRS_MASK) != MRS_VALUE) return (0); /* * We only emulate Op0 == 3, Op1 == 0, CRn == 0, CRm == {0, 4-7}. * These are in the EL1 CPU identification space. * CRm == 0 holds MIDR_EL1, MPIDR_EL1, and REVID_EL1. * CRm == {4-7} holds the ID_AA64 registers. * * For full details see the ARMv8 ARM (ARM DDI 0487C.a) * Table D9-2 System instruction encodings for non-Debug System * register accesses. */ if (mrs_Op0(insn) != 3 || mrs_Op1(insn) != 0 || mrs_CRn(insn) != 0) return (0); CRm = mrs_CRm(insn); if (CRm > 7 || (CRm < 4 && CRm != 0)) return (0); Op2 = mrs_Op2(insn); value = 0; for (i = 0; i < nitems(user_regs); i++) { if (user_regs[i].CRm == CRm && user_regs[i].Op2 == Op2) { value = CPU_DESC_FIELD(user_cpu_desc, i); break; } } if (CRm == 0) { switch (Op2) { case 0: value = READ_SPECIALREG(midr_el1); break; case 5: value = READ_SPECIALREG(mpidr_el1); break; case 6: value = READ_SPECIALREG(revidr_el1); break; default: return (0); } } /* * We will handle this instruction, move to the next so we * don't trap here again. */ frame->tf_elr += INSN_SIZE; reg = MRS_REGISTER(insn); /* If reg is 31 then write to xzr, i.e. do nothing */ if (reg == 31) return (1); if (reg < nitems(frame->tf_x)) frame->tf_x[reg] = value; else if (reg == 30) frame->tf_lr = value; return (1); } static void update_user_regs(u_int cpu) { struct mrs_field *fields; uint64_t cur, value; int i, j, cur_field, new_field; for (i = 0; i < nitems(user_regs); i++) { value = CPU_DESC_FIELD(cpu_desc[cpu], i); if (cpu == 0) cur = value; else cur = CPU_DESC_FIELD(user_cpu_desc, i); fields = user_regs[i].fields; for (j = 0; fields[j].type != 0; j++) { switch (fields[j].type & MRS_TYPE_MASK) { case MRS_EXACT: cur &= ~(0xfu << fields[j].shift); cur |= (uint64_t)MRS_EXACT_FIELD(fields[j].type) << fields[j].shift; break; case MRS_LOWER: new_field = (value >> fields[j].shift) & 0xf; cur_field = (cur >> fields[j].shift) & 0xf; if ((fields[j].sign && (int)new_field < (int)cur_field) || (!fields[j].sign && (u_int)new_field < (u_int)cur_field)) { cur &= ~(0xfu << fields[j].shift); cur |= new_field << fields[j].shift; } break; default: panic("Invalid field type: %d", fields[j].type); } } CPU_DESC_FIELD(user_cpu_desc, i) = cur; } } static void identify_cpu_sysinit(void *dummy __unused) { int cpu; /* Create a user visible cpu description with safe values */ memset(&user_cpu_desc, 0, sizeof(user_cpu_desc)); /* Safe values for these registers */ user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_ADV_SIMD_NONE | ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 | ID_AA64PFR0_EL0_64; user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DEBUG_VER_8; CPU_FOREACH(cpu) { print_cpu_features(cpu); update_user_regs(cpu); } install_undef_handler(true, user_mrs_handler); } SYSINIT(idenrity_cpu, SI_SUB_SMP, SI_ORDER_ANY, identify_cpu_sysinit, NULL); void print_cpu_features(u_int cpu) { struct sbuf *sb; int printed; sb = sbuf_new_auto(); sbuf_printf(sb, "CPU%3d: %s %s r%dp%d", cpu, cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name, cpu_desc[cpu].cpu_variant, cpu_desc[cpu].cpu_revision); sbuf_cat(sb, " affinity:"); switch(cpu_aff_levels) { default: case 4: sbuf_printf(sb, " %2d", CPU_AFF3(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 3: sbuf_printf(sb, " %2d", CPU_AFF2(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 2: sbuf_printf(sb, " %2d", CPU_AFF1(cpu_desc[cpu].mpidr)); /* FALLTHROUGH */ case 1: case 0: /* On UP this will be zero */ sbuf_printf(sb, " %2d", CPU_AFF0(cpu_desc[cpu].mpidr)); break; } sbuf_finish(sb); printf("%s\n", sbuf_data(sb)); sbuf_clear(sb); /* * There is a hardware errata where, if one CPU is performing a TLB * invalidation while another is performing a store-exclusive the * store-exclusive may return the wrong status. A workaround seems * to be to use an IPI to invalidate on each CPU, however given the * limited number of affected units (pass 1.1 is the evaluation * hardware revision), and the lack of information from Cavium * this has not been implemented. * * At the time of writing this the only information is from: * https://lkml.org/lkml/2016/8/4/722 */ /* * XXX: CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1 on its own also * triggers on pass 2.0+. */ if (cpu == 0 && CPU_VAR(PCPU_GET(midr)) == 0 && CPU_MATCH_ERRATA_CAVIUM_THUNDERX_1_1) printf("WARNING: ThunderX Pass 1.1 detected.\nThis has known " "hardware bugs that may cause the incorrect operation of " "atomic operations.\n"); if (cpu != 0 && cpu_print_regs == 0) return; #define SEP_STR ((printed++) == 0) ? "" : "," /* AArch64 Instruction Set Attribute Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR0) != 0) { printed = 0; sbuf_printf(sb, " Instruction Set Attributes 0 = <"); switch (ID_AA64ISAR0_RDM(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_RDM_NONE: break; case ID_AA64ISAR0_RDM_IMPL: sbuf_printf(sb, "%sRDM", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RDM", SEP_STR); } switch (ID_AA64ISAR0_ATOMIC(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_ATOMIC_NONE: break; case ID_AA64ISAR0_ATOMIC_IMPL: sbuf_printf(sb, "%sAtomic", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Atomic", SEP_STR); } switch (ID_AA64ISAR0_AES(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_AES_NONE: break; case ID_AA64ISAR0_AES_BASE: sbuf_printf(sb, "%sAES", SEP_STR); break; case ID_AA64ISAR0_AES_PMULL: sbuf_printf(sb, "%sAES+PMULL", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown AES", SEP_STR); break; } switch (ID_AA64ISAR0_SHA1(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA1_NONE: break; case ID_AA64ISAR0_SHA1_BASE: sbuf_printf(sb, "%sSHA1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA1", SEP_STR); break; } switch (ID_AA64ISAR0_SHA2(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA2_NONE: break; case ID_AA64ISAR0_SHA2_BASE: sbuf_printf(sb, "%sSHA2", SEP_STR); break; case ID_AA64ISAR0_SHA2_512: sbuf_printf(sb, "%sSHA2+SHA512", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA2", SEP_STR); break; } switch (ID_AA64ISAR0_CRC32(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_CRC32_NONE: break; case ID_AA64ISAR0_CRC32_BASE: sbuf_printf(sb, "%sCRC32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CRC32", SEP_STR); break; } switch (ID_AA64ISAR0_SHA3(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SHA3_NONE: break; case ID_AA64ISAR0_SHA3_IMPL: sbuf_printf(sb, "%sSHA3", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SHA3", SEP_STR); break; } switch (ID_AA64ISAR0_SM3(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SM3_NONE: break; case ID_AA64ISAR0_SM3_IMPL: sbuf_printf(sb, "%sSM3", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SM3", SEP_STR); break; } switch (ID_AA64ISAR0_SM4(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_SM4_NONE: break; case ID_AA64ISAR0_SM4_IMPL: sbuf_printf(sb, "%sSM4", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SM4", SEP_STR); break; } switch (ID_AA64ISAR0_DP(cpu_desc[cpu].id_aa64isar0)) { case ID_AA64ISAR0_DP_NONE: break; case ID_AA64ISAR0_DP_IMPL: sbuf_printf(sb, "%sDotProd", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown DP", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64isar0 & ~ID_AA64ISAR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64isar0 & ~ID_AA64ISAR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Instruction Set Attribute Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_ISAR1) != 0) { printed = 0; sbuf_printf(sb, " Instruction Set Attributes 1 = <"); switch (ID_AA64ISAR1_GPI(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_GPI_NONE: break; case ID_AA64ISAR1_GPI_IMPL: sbuf_printf(sb, "%sImpl GenericAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GenericAuth", SEP_STR); break; } switch (ID_AA64ISAR1_GPA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_GPA_NONE: break; case ID_AA64ISAR1_GPA_IMPL: sbuf_printf(sb, "%sPrince GenericAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GenericAuth", SEP_STR); break; } switch (ID_AA64ISAR1_LRCPC(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_LRCPC_NONE: break; case ID_AA64ISAR1_LRCPC_IMPL: sbuf_printf(sb, "%sRCpc", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RCpc", SEP_STR); break; } switch (ID_AA64ISAR1_FCMA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_FCMA_NONE: break; case ID_AA64ISAR1_FCMA_IMPL: sbuf_printf(sb, "%sFCMA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown FCMA", SEP_STR); break; } switch (ID_AA64ISAR1_JSCVT(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_JSCVT_NONE: break; case ID_AA64ISAR1_JSCVT_IMPL: sbuf_printf(sb, "%sJS Conv", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown JS Conv", SEP_STR); break; } switch (ID_AA64ISAR1_API(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_API_NONE: break; case ID_AA64ISAR1_API_IMPL: sbuf_printf(sb, "%sImpl AddrAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Impl AddrAuth", SEP_STR); break; } switch (ID_AA64ISAR1_APA(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_APA_NONE: break; case ID_AA64ISAR1_APA_IMPL: sbuf_printf(sb, "%sPrince AddrAuth", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Prince AddrAuth", SEP_STR); break; } switch (ID_AA64ISAR1_DPB(cpu_desc[cpu].id_aa64isar1)) { case ID_AA64ISAR1_DPB_NONE: break; case ID_AA64ISAR1_DPB_IMPL: sbuf_printf(sb, "%sDC CVAP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown DC CVAP", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64isar1 & ~ID_AA64ISAR1_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64isar1 & ~ID_AA64ISAR1_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Processor Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR0) != 0) { printed = 0; sbuf_printf(sb, " Processor Features 0 = <"); switch (ID_AA64PFR0_SVE(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_SVE_NONE: break; case ID_AA64PFR0_SVE_IMPL: sbuf_printf(sb, "%sSVE", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SVE", SEP_STR); break; } switch (ID_AA64PFR0_RAS(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_RAS_NONE: break; case ID_AA64PFR0_RAS_V1: sbuf_printf(sb, "%sRASv1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown RAS", SEP_STR); break; } switch (ID_AA64PFR0_GIC(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_GIC_CPUIF_NONE: break; case ID_AA64PFR0_GIC_CPUIF_EN: sbuf_printf(sb, "%sGIC", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown GIC interface", SEP_STR); break; } switch (ID_AA64PFR0_ADV_SIMD(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_ADV_SIMD_NONE: break; case ID_AA64PFR0_ADV_SIMD_IMPL: sbuf_printf(sb, "%sAdvSIMD", SEP_STR); break; case ID_AA64PFR0_ADV_SIMD_HP: sbuf_printf(sb, "%sAdvSIMD+HP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown AdvSIMD", SEP_STR); break; } switch (ID_AA64PFR0_FP(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_FP_NONE: break; case ID_AA64PFR0_FP_IMPL: sbuf_printf(sb, "%sFloat", SEP_STR); break; case ID_AA64PFR0_FP_HP: sbuf_printf(sb, "%sFloat+HP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Float", SEP_STR); break; } switch (ID_AA64PFR0_EL3(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL3_NONE: sbuf_printf(sb, "%sNo EL3", SEP_STR); break; case ID_AA64PFR0_EL3_64: sbuf_printf(sb, "%sEL3", SEP_STR); break; case ID_AA64PFR0_EL3_64_32: sbuf_printf(sb, "%sEL3 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL3", SEP_STR); break; } switch (ID_AA64PFR0_EL2(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL2_NONE: sbuf_printf(sb, "%sNo EL2", SEP_STR); break; case ID_AA64PFR0_EL2_64: sbuf_printf(sb, "%sEL2", SEP_STR); break; case ID_AA64PFR0_EL2_64_32: sbuf_printf(sb, "%sEL2 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL2", SEP_STR); break; } switch (ID_AA64PFR0_EL1(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL1_64: sbuf_printf(sb, "%sEL1", SEP_STR); break; case ID_AA64PFR0_EL1_64_32: sbuf_printf(sb, "%sEL1 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL1", SEP_STR); break; } switch (ID_AA64PFR0_EL0(cpu_desc[cpu].id_aa64pfr0)) { case ID_AA64PFR0_EL0_64: sbuf_printf(sb, "%sEL0", SEP_STR); break; case ID_AA64PFR0_EL0_64_32: sbuf_printf(sb, "%sEL0 32", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL0", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64pfr0 & ~ID_AA64PFR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64pfr0 & ~ID_AA64PFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Processor Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_PFR1) != 0) { printf(" Processor Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64pfr1); } /* AArch64 Memory Model Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR0) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 0 = <"); switch (ID_AA64MMFR0_TGRAN4(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN4_NONE: break; case ID_AA64MMFR0_TGRAN4_IMPL: sbuf_printf(sb, "%s4k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 4k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_TGRAN16(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN16_NONE: break; case ID_AA64MMFR0_TGRAN16_IMPL: sbuf_printf(sb, "%s16k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 16k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_TGRAN64(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_TGRAN64_NONE: break; case ID_AA64MMFR0_TGRAN64_IMPL: sbuf_printf(sb, "%s64k Granule", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown 64k Granule", SEP_STR); break; } switch (ID_AA64MMFR0_BIGEND(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_BIGEND_FIXED: break; case ID_AA64MMFR0_BIGEND_MIXED: sbuf_printf(sb, "%sMixedEndian", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Endian switching", SEP_STR); break; } switch (ID_AA64MMFR0_BIGEND_EL0(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_BIGEND_EL0_FIXED: break; case ID_AA64MMFR0_BIGEND_EL0_MIXED: sbuf_printf(sb, "%sEL0 MixEndian", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown EL0 Endian switching", SEP_STR); break; } switch (ID_AA64MMFR0_S_NS_MEM(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_S_NS_MEM_NONE: break; case ID_AA64MMFR0_S_NS_MEM_DISTINCT: sbuf_printf(sb, "%sS/NS Mem", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown S/NS Mem", SEP_STR); break; } switch (ID_AA64MMFR0_ASID_BITS(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_ASID_BITS_8: sbuf_printf(sb, "%s8bit ASID", SEP_STR); break; case ID_AA64MMFR0_ASID_BITS_16: sbuf_printf(sb, "%s16bit ASID", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown ASID", SEP_STR); break; } switch (ID_AA64MMFR0_PA_RANGE(cpu_desc[cpu].id_aa64mmfr0)) { case ID_AA64MMFR0_PA_RANGE_4G: sbuf_printf(sb, "%s4GB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_64G: sbuf_printf(sb, "%s64GB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_1T: sbuf_printf(sb, "%s1TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_4T: sbuf_printf(sb, "%s4TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_16T: sbuf_printf(sb, "%s16TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_256T: sbuf_printf(sb, "%s256TB PA", SEP_STR); break; case ID_AA64MMFR0_PA_RANGE_4P: sbuf_printf(sb, "%s4PB PA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PA Range", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr0 & ~ID_AA64MMFR0_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr0 & ~ID_AA64MMFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR1) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 1 = <"); switch (ID_AA64MMFR1_XNX(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_XNX_NONE: break; case ID_AA64MMFR1_XNX_IMPL: sbuf_printf(sb, "%sEL2 XN", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown XNX", SEP_STR); break; } switch (ID_AA64MMFR1_SPEC_SEI(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_SPEC_SEI_NONE: break; case ID_AA64MMFR1_SPEC_SEI_IMPL: sbuf_printf(sb, "%sSpecSEI", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SpecSEI", SEP_STR); break; } switch (ID_AA64MMFR1_PAN(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_PAN_NONE: break; case ID_AA64MMFR1_PAN_IMPL: sbuf_printf(sb, "%sPAN", SEP_STR); break; case ID_AA64MMFR1_PAN_ATS1E1: sbuf_printf(sb, "%sPAN+AT", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PAN", SEP_STR); break; } switch (ID_AA64MMFR1_LO(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_LO_NONE: break; case ID_AA64MMFR1_LO_IMPL: sbuf_printf(sb, "%sLO", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown LO", SEP_STR); break; } switch (ID_AA64MMFR1_HPDS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_HPDS_NONE: break; case ID_AA64MMFR1_HPDS_HPD: sbuf_printf(sb, "%sHPDS", SEP_STR); break; case ID_AA64MMFR1_HPDS_TTPBHA: sbuf_printf(sb, "%sTTPBHA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown HPDS", SEP_STR); break; } switch (ID_AA64MMFR1_VH(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_VH_NONE: break; case ID_AA64MMFR1_VH_IMPL: sbuf_printf(sb, "%sVHE", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VHE", SEP_STR); break; } switch (ID_AA64MMFR1_VMIDBITS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_VMIDBITS_8: break; case ID_AA64MMFR1_VMIDBITS_16: sbuf_printf(sb, "%s16 VMID bits", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VMID bits", SEP_STR); break; } switch (ID_AA64MMFR1_HAFDBS(cpu_desc[cpu].id_aa64mmfr1)) { case ID_AA64MMFR1_HAFDBS_NONE: break; case ID_AA64MMFR1_HAFDBS_AF: sbuf_printf(sb, "%sAF", SEP_STR); break; case ID_AA64MMFR1_HAFDBS_AF_DBS: sbuf_printf(sb, "%sAF+DBS", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Hardware update AF/DBS", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr1 & ~ID_AA64MMFR1_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr1 & ~ID_AA64MMFR1_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 2 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_MMFR2) != 0) { printed = 0; sbuf_printf(sb, " Memory Model Features 2 = <"); switch (ID_AA64MMFR2_NV(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_NV_NONE: break; case ID_AA64MMFR2_NV_IMPL: sbuf_printf(sb, "%sNestedVirt", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown NestedVirt", SEP_STR); break; } switch (ID_AA64MMFR2_CCIDX(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_CCIDX_32: sbuf_printf(sb, "%s32b CCIDX", SEP_STR); break; case ID_AA64MMFR2_CCIDX_64: sbuf_printf(sb, "%s64b CCIDX", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CCIDX", SEP_STR); break; } switch (ID_AA64MMFR2_VA_RANGE(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_VA_RANGE_48: sbuf_printf(sb, "%s48b VA", SEP_STR); break; case ID_AA64MMFR2_VA_RANGE_52: sbuf_printf(sb, "%s52b VA", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown VA Range", SEP_STR); break; } switch (ID_AA64MMFR2_IESB(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_IESB_NONE: break; case ID_AA64MMFR2_IESB_IMPL: sbuf_printf(sb, "%sIESB", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown IESB", SEP_STR); break; } switch (ID_AA64MMFR2_LSM(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_LSM_NONE: break; case ID_AA64MMFR2_LSM_IMPL: sbuf_printf(sb, "%sLSM", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown LSM", SEP_STR); break; } switch (ID_AA64MMFR2_UAO(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_UAO_NONE: break; case ID_AA64MMFR2_UAO_IMPL: sbuf_printf(sb, "%sUAO", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown UAO", SEP_STR); break; } switch (ID_AA64MMFR2_CNP(cpu_desc[cpu].id_aa64mmfr2)) { case ID_AA64MMFR2_CNP_NONE: break; case ID_AA64MMFR2_CNP_IMPL: sbuf_printf(sb, "%sCnP", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown CnP", SEP_STR); break; } if ((cpu_desc[cpu].id_aa64mmfr2 & ~ID_AA64MMFR2_MASK) != 0) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64mmfr2 & ~ID_AA64MMFR2_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Debug Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR0) != 0) { printed = 0; sbuf_printf(sb, " Debug Features 0 = <"); switch(ID_AA64DFR0_PMS_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_PMS_VER_NONE: break; case ID_AA64DFR0_PMS_VER_V1: sbuf_printf(sb, "%sSPE v1", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown SPE", SEP_STR); break; } sbuf_printf(sb, "%s%lu CTX Breakpoints", SEP_STR, ID_AA64DFR0_CTX_CMPS(cpu_desc[cpu].id_aa64dfr0)); sbuf_printf(sb, "%s%lu Watchpoints", SEP_STR, ID_AA64DFR0_WRPS(cpu_desc[cpu].id_aa64dfr0)); sbuf_printf(sb, "%s%lu Breakpoints", SEP_STR, ID_AA64DFR0_BRPS(cpu_desc[cpu].id_aa64dfr0)); switch (ID_AA64DFR0_PMU_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_PMU_VER_NONE: break; case ID_AA64DFR0_PMU_VER_3: sbuf_printf(sb, "%sPMUv3", SEP_STR); break; case ID_AA64DFR0_PMU_VER_3_1: sbuf_printf(sb, "%sPMUv3+16 bit evtCount", SEP_STR); break; case ID_AA64DFR0_PMU_VER_IMPL: sbuf_printf(sb, "%sImplementation defined PMU", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown PMU", SEP_STR); break; } switch (ID_AA64DFR0_TRACE_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_TRACE_VER_NONE: break; case ID_AA64DFR0_TRACE_VER_IMPL: sbuf_printf(sb, "%sTrace", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Trace", SEP_STR); break; } switch (ID_AA64DFR0_DEBUG_VER(cpu_desc[cpu].id_aa64dfr0)) { case ID_AA64DFR0_DEBUG_VER_8: sbuf_printf(sb, "%sDebug v8", SEP_STR); break; case ID_AA64DFR0_DEBUG_VER_8_VHE: sbuf_printf(sb, "%sDebug v8+VHE", SEP_STR); break; case ID_AA64DFR0_DEBUG_VER_8_2: sbuf_printf(sb, "%sDebug v8.2", SEP_STR); break; default: sbuf_printf(sb, "%sUnknown Debug", SEP_STR); break; } if (cpu_desc[cpu].id_aa64dfr0 & ~ID_AA64DFR0_MASK) sbuf_printf(sb, "%s%#lx", SEP_STR, cpu_desc[cpu].id_aa64dfr0 & ~ID_AA64DFR0_MASK); sbuf_finish(sb); printf("%s>\n", sbuf_data(sb)); sbuf_clear(sb); } /* AArch64 Memory Model Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_DFR1) != 0) { printf(" Debug Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64dfr1); } /* AArch64 Auxiliary Feature Register 0 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR0) != 0) { printf(" Auxiliary Features 0 = <%#lx>\n", cpu_desc[cpu].id_aa64afr0); } /* AArch64 Auxiliary Feature Register 1 */ if (cpu == 0 || (cpu_print_regs & PRINT_ID_AA64_AFR1) != 0) { printf(" Auxiliary Features 1 = <%#lx>\n", cpu_desc[cpu].id_aa64afr1); } sbuf_delete(sb); sb = NULL; #undef SEP_STR } void identify_cpu(void) { u_int midr; u_int impl_id; u_int part_id; u_int cpu; size_t i; const struct cpu_parts *cpu_partsp = NULL; cpu = PCPU_GET(cpuid); midr = get_midr(); /* * Store midr to pcpu to allow fast reading * from EL0, EL1 and assembly code. */ PCPU_SET(midr, midr); impl_id = CPU_IMPL(midr); for (i = 0; i < nitems(cpu_implementers); i++) { if (impl_id == cpu_implementers[i].impl_id || cpu_implementers[i].impl_id == 0) { cpu_desc[cpu].cpu_impl = impl_id; cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name; cpu_partsp = cpu_implementers[i].cpu_parts; break; } } part_id = CPU_PART(midr); for (i = 0; &cpu_partsp[i] != NULL; i++) { if (part_id == cpu_partsp[i].part_id || cpu_partsp[i].part_id == 0) { cpu_desc[cpu].cpu_part_num = part_id; cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name; break; } } cpu_desc[cpu].cpu_revision = CPU_REV(midr); cpu_desc[cpu].cpu_variant = CPU_VAR(midr); /* Save affinity for current CPU */ cpu_desc[cpu].mpidr = get_mpidr(); CPU_AFFINITY(cpu) = cpu_desc[cpu].mpidr & CPU_AFF_MASK; cpu_desc[cpu].id_aa64dfr0 = READ_SPECIALREG(ID_AA64DFR0_EL1); cpu_desc[cpu].id_aa64dfr1 = READ_SPECIALREG(ID_AA64DFR1_EL1); cpu_desc[cpu].id_aa64isar0 = READ_SPECIALREG(ID_AA64ISAR0_EL1); cpu_desc[cpu].id_aa64isar1 = READ_SPECIALREG(ID_AA64ISAR1_EL1); cpu_desc[cpu].id_aa64mmfr0 = READ_SPECIALREG(ID_AA64MMFR0_EL1); cpu_desc[cpu].id_aa64mmfr1 = READ_SPECIALREG(ID_AA64MMFR1_EL1); cpu_desc[cpu].id_aa64mmfr2 = READ_SPECIALREG(ID_AA64MMFR2_EL1); cpu_desc[cpu].id_aa64pfr0 = READ_SPECIALREG(ID_AA64PFR0_EL1); cpu_desc[cpu].id_aa64pfr1 = READ_SPECIALREG(ID_AA64PFR1_EL1); if (cpu != 0) { /* * This code must run on one cpu at a time, but we are * not scheduling on the current core so implement a * simple spinlock. */ while (atomic_cmpset_acq_int(&ident_lock, 0, 1) == 0) __asm __volatile("wfe" ::: "memory"); switch (cpu_aff_levels) { case 0: if (CPU_AFF0(cpu_desc[cpu].mpidr) != CPU_AFF0(cpu_desc[0].mpidr)) cpu_aff_levels = 1; /* FALLTHROUGH */ case 1: if (CPU_AFF1(cpu_desc[cpu].mpidr) != CPU_AFF1(cpu_desc[0].mpidr)) cpu_aff_levels = 2; /* FALLTHROUGH */ case 2: if (CPU_AFF2(cpu_desc[cpu].mpidr) != CPU_AFF2(cpu_desc[0].mpidr)) cpu_aff_levels = 3; /* FALLTHROUGH */ case 3: if (CPU_AFF3(cpu_desc[cpu].mpidr) != CPU_AFF3(cpu_desc[0].mpidr)) cpu_aff_levels = 4; break; } if (cpu_desc[cpu].id_aa64afr0 != cpu_desc[0].id_aa64afr0) cpu_print_regs |= PRINT_ID_AA64_AFR0; if (cpu_desc[cpu].id_aa64afr1 != cpu_desc[0].id_aa64afr1) cpu_print_regs |= PRINT_ID_AA64_AFR1; if (cpu_desc[cpu].id_aa64dfr0 != cpu_desc[0].id_aa64dfr0) cpu_print_regs |= PRINT_ID_AA64_DFR0; if (cpu_desc[cpu].id_aa64dfr1 != cpu_desc[0].id_aa64dfr1) cpu_print_regs |= PRINT_ID_AA64_DFR1; if (cpu_desc[cpu].id_aa64isar0 != cpu_desc[0].id_aa64isar0) cpu_print_regs |= PRINT_ID_AA64_ISAR0; if (cpu_desc[cpu].id_aa64isar1 != cpu_desc[0].id_aa64isar1) cpu_print_regs |= PRINT_ID_AA64_ISAR1; if (cpu_desc[cpu].id_aa64mmfr0 != cpu_desc[0].id_aa64mmfr0) cpu_print_regs |= PRINT_ID_AA64_MMFR0; if (cpu_desc[cpu].id_aa64mmfr1 != cpu_desc[0].id_aa64mmfr1) cpu_print_regs |= PRINT_ID_AA64_MMFR1; if (cpu_desc[cpu].id_aa64mmfr2 != cpu_desc[0].id_aa64mmfr2) cpu_print_regs |= PRINT_ID_AA64_MMFR2; if (cpu_desc[cpu].id_aa64pfr0 != cpu_desc[0].id_aa64pfr0) cpu_print_regs |= PRINT_ID_AA64_PFR0; if (cpu_desc[cpu].id_aa64pfr1 != cpu_desc[0].id_aa64pfr1) cpu_print_regs |= PRINT_ID_AA64_PFR1; /* Wake up the other CPUs */ atomic_store_rel_int(&ident_lock, 0); __asm __volatile("sev" ::: "memory"); } } Index: projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (revision 339014) +++ projects/clang700-import/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c (revision 339015) @@ -1,1018 +1,1027 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2012, 2018 by Delphix. All rights reserved. * Copyright (c) 2014 Integros [integros.com] */ #include #include #include #include #include #include #include #include /* * ZFS I/O Scheduler * --------------- * * ZFS issues I/O operations to leaf vdevs to satisfy and complete zios. The * I/O scheduler determines when and in what order those operations are * issued. The I/O scheduler divides operations into six I/O classes * prioritized in the following order: sync read, sync write, async read, * async write, scrub/resilver and trim. Each queue defines the minimum and * maximum number of concurrent operations that may be issued to the device. * In addition, the device has an aggregate maximum. Note that the sum of the * per-queue minimums must not exceed the aggregate maximum, and if the * aggregate maximum is equal to or greater than the sum of the per-queue * maximums, the per-queue minimum has no effect. * * For many physical devices, throughput increases with the number of * concurrent operations, but latency typically suffers. Further, physical * devices typically have a limit at which more concurrent operations have no * effect on throughput or can actually cause it to decrease. * * The scheduler selects the next operation to issue by first looking for an * I/O class whose minimum has not been satisfied. Once all are satisfied and * the aggregate maximum has not been hit, the scheduler looks for classes * whose maximum has not been satisfied. Iteration through the I/O classes is * done in the order specified above. No further operations are issued if the * aggregate maximum number of concurrent operations has been hit or if there * are no operations queued for an I/O class that has not hit its maximum. * Every time an I/O is queued or an operation completes, the I/O scheduler * looks for new operations to issue. * * All I/O classes have a fixed maximum number of outstanding operations * except for the async write class. Asynchronous writes represent the data * that is committed to stable storage during the syncing stage for * transaction groups (see txg.c). Transaction groups enter the syncing state * periodically so the number of queued async writes will quickly burst up and * then bleed down to zero. Rather than servicing them as quickly as possible, * the I/O scheduler changes the maximum number of active async write I/Os * according to the amount of dirty data in the pool (see dsl_pool.c). Since * both throughput and latency typically increase with the number of * concurrent operations issued to physical devices, reducing the burstiness * in the number of concurrent operations also stabilizes the response time of * operations from other -- and in particular synchronous -- queues. In broad * strokes, the I/O scheduler will issue more concurrent operations from the * async write queue as there's more dirty data in the pool. * * Async Writes * * The number of concurrent operations issued for the async write I/O class * follows a piece-wise linear function defined by a few adjustable points. * * | o---------| <-- zfs_vdev_async_write_max_active * ^ | /^ | * | | / | | * active | / | | * I/O | / | | * count | / | | * | / | | * |------------o | | <-- zfs_vdev_async_write_min_active * 0|____________^______|_________| * 0% | | 100% of zfs_dirty_data_max * | | * | `-- zfs_vdev_async_write_active_max_dirty_percent * `--------- zfs_vdev_async_write_active_min_dirty_percent * * Until the amount of dirty data exceeds a minimum percentage of the dirty * data allowed in the pool, the I/O scheduler will limit the number of * concurrent operations to the minimum. As that threshold is crossed, the * number of concurrent operations issued increases linearly to the maximum at * the specified maximum percentage of the dirty data allowed in the pool. * * Ideally, the amount of dirty data on a busy pool will stay in the sloped * part of the function between zfs_vdev_async_write_active_min_dirty_percent * and zfs_vdev_async_write_active_max_dirty_percent. If it exceeds the * maximum percentage, this indicates that the rate of incoming data is * greater than the rate that the backend storage can handle. In this case, we * must further throttle incoming writes (see dmu_tx_delay() for details). */ /* * The maximum number of I/Os active to each device. Ideally, this will be >= * the sum of each queue's max_active. It must be at least the sum of each * queue's min_active. */ uint32_t zfs_vdev_max_active = 1000; /* * Per-queue limits on the number of I/Os active to each device. If the * sum of the queue's max_active is < zfs_vdev_max_active, then the * min_active comes into play. We will send min_active from each queue, * and then select from queues in the order defined by zio_priority_t. * * In general, smaller max_active's will lead to lower latency of synchronous * operations. Larger max_active's may lead to higher overall throughput, * depending on underlying storage. * * The ratio of the queues' max_actives determines the balance of performance * between reads, writes, and scrubs. E.g., increasing * zfs_vdev_scrub_max_active will cause the scrub or resilver to complete * more quickly, but reads and writes to have higher latency and lower * throughput. */ uint32_t zfs_vdev_sync_read_min_active = 10; uint32_t zfs_vdev_sync_read_max_active = 10; uint32_t zfs_vdev_sync_write_min_active = 10; uint32_t zfs_vdev_sync_write_max_active = 10; uint32_t zfs_vdev_async_read_min_active = 1; uint32_t zfs_vdev_async_read_max_active = 3; uint32_t zfs_vdev_async_write_min_active = 1; uint32_t zfs_vdev_async_write_max_active = 10; uint32_t zfs_vdev_scrub_min_active = 1; uint32_t zfs_vdev_scrub_max_active = 2; uint32_t zfs_vdev_trim_min_active = 1; /* * TRIM max active is large in comparison to the other values due to the fact * that TRIM IOs are coalesced at the device layer. This value is set such * that a typical SSD can process the queued IOs in a single request. */ uint32_t zfs_vdev_trim_max_active = 64; uint32_t zfs_vdev_removal_min_active = 1; uint32_t zfs_vdev_removal_max_active = 2; uint32_t zfs_vdev_initializing_min_active = 1; uint32_t zfs_vdev_initializing_max_active = 1; /* * When the pool has less than zfs_vdev_async_write_active_min_dirty_percent * dirty data, use zfs_vdev_async_write_min_active. When it has more than * zfs_vdev_async_write_active_max_dirty_percent, use * zfs_vdev_async_write_max_active. The value is linearly interpolated * between min and max. */ int zfs_vdev_async_write_active_min_dirty_percent = 30; int zfs_vdev_async_write_active_max_dirty_percent = 60; /* * To reduce IOPs, we aggregate small adjacent I/Os into one large I/O. * For read I/Os, we also aggregate across small adjacency gaps; for writes * we include spans of optional I/Os to aid aggregation at the disk even when * they aren't able to help us aggregate at this level. */ int zfs_vdev_aggregation_limit = 1 << 20; int zfs_vdev_read_gap_limit = 32 << 10; int zfs_vdev_write_gap_limit = 4 << 10; /* * Define the queue depth percentage for each top-level. This percentage is * used in conjunction with zfs_vdev_async_max_active to determine how many * allocations a specific top-level vdev should handle. Once the queue depth * reaches zfs_vdev_queue_depth_pct * zfs_vdev_async_write_max_active / 100 * then allocator will stop allocating blocks on that top-level device. * The default kernel setting is 1000% which will yield 100 allocations per * device. For userland testing, the default setting is 300% which equates * to 30 allocations per device. */ #ifdef _KERNEL int zfs_vdev_queue_depth_pct = 1000; #else int zfs_vdev_queue_depth_pct = 300; #endif /* * When performing allocations for a given metaslab, we want to make sure that * there are enough IOs to aggregate together to improve throughput. We want to * ensure that there are at least 128k worth of IOs that can be aggregated, and * we assume that the average allocation size is 4k, so we need the queue depth * to be 32 per allocator to get good aggregation of sequential writes. */ int zfs_vdev_def_queue_depth = 32; #ifdef __FreeBSD__ #ifdef _KERNEL SYSCTL_DECL(_vfs_zfs_vdev); static int sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_min_dirty_percent, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_async_write_active_min_dirty_percent, "I", "Percentage of async write dirty data below which " "async_write_min_active is used."); static int sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_vfs_zfs_vdev, OID_AUTO, async_write_active_max_dirty_percent, CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RWTUN, 0, sizeof(int), sysctl_zfs_async_write_active_max_dirty_percent, "I", "Percentage of async write dirty data above which " "async_write_max_active is used."); SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, max_active, CTLFLAG_RWTUN, &zfs_vdev_max_active, 0, "The maximum number of I/Os of all types active for each device."); #define ZFS_VDEV_QUEUE_KNOB_MIN(name) \ SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _min_active, CTLFLAG_RWTUN,\ &zfs_vdev_ ## name ## _min_active, 0, \ "Initial number of I/O requests of type " #name \ " active for each device"); #define ZFS_VDEV_QUEUE_KNOB_MAX(name) \ SYSCTL_UINT(_vfs_zfs_vdev, OID_AUTO, name ## _max_active, CTLFLAG_RWTUN,\ &zfs_vdev_ ## name ## _max_active, 0, \ "Maximum number of I/O requests of type " #name \ " active for each device"); ZFS_VDEV_QUEUE_KNOB_MIN(sync_read); ZFS_VDEV_QUEUE_KNOB_MAX(sync_read); ZFS_VDEV_QUEUE_KNOB_MIN(sync_write); ZFS_VDEV_QUEUE_KNOB_MAX(sync_write); ZFS_VDEV_QUEUE_KNOB_MIN(async_read); ZFS_VDEV_QUEUE_KNOB_MAX(async_read); ZFS_VDEV_QUEUE_KNOB_MIN(async_write); ZFS_VDEV_QUEUE_KNOB_MAX(async_write); ZFS_VDEV_QUEUE_KNOB_MIN(scrub); ZFS_VDEV_QUEUE_KNOB_MAX(scrub); ZFS_VDEV_QUEUE_KNOB_MIN(trim); ZFS_VDEV_QUEUE_KNOB_MAX(trim); #undef ZFS_VDEV_QUEUE_KNOB SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, aggregation_limit, CTLFLAG_RWTUN, &zfs_vdev_aggregation_limit, 0, "I/O requests are aggregated up to this size"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, read_gap_limit, CTLFLAG_RWTUN, &zfs_vdev_read_gap_limit, 0, "Acceptable gap between two reads being aggregated"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, write_gap_limit, CTLFLAG_RWTUN, &zfs_vdev_write_gap_limit, 0, "Acceptable gap between two writes being aggregated"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, queue_depth_pct, CTLFLAG_RWTUN, &zfs_vdev_queue_depth_pct, 0, "Queue depth percentage for each top-level"); SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, def_queue_depth, CTLFLAG_RWTUN, &zfs_vdev_def_queue_depth, 0, "Default queue depth for each allocator"); static int sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_vdev_async_write_active_min_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100 || val >= zfs_vdev_async_write_active_max_dirty_percent) return (EINVAL); zfs_vdev_async_write_active_min_dirty_percent = val; return (0); } static int sysctl_zfs_async_write_active_max_dirty_percent(SYSCTL_HANDLER_ARGS) { int val, err; val = zfs_vdev_async_write_active_max_dirty_percent; err = sysctl_handle_int(oidp, &val, 0, req); if (err != 0 || req->newptr == NULL) return (err); if (val < 0 || val > 100 || val <= zfs_vdev_async_write_active_min_dirty_percent) return (EINVAL); zfs_vdev_async_write_active_max_dirty_percent = val; return (0); } #endif #endif int vdev_queue_offset_compare(const void *x1, const void *x2) { const zio_t *z1 = (const zio_t *)x1; const zio_t *z2 = (const zio_t *)x2; int cmp = AVL_CMP(z1->io_offset, z2->io_offset); if (likely(cmp)) return (cmp); return (AVL_PCMP(z1, z2)); } static inline avl_tree_t * vdev_queue_class_tree(vdev_queue_t *vq, zio_priority_t p) { return (&vq->vq_class[p].vqc_queued_tree); } static inline avl_tree_t * vdev_queue_type_tree(vdev_queue_t *vq, zio_type_t t) { if (t == ZIO_TYPE_READ) return (&vq->vq_read_offset_tree); else if (t == ZIO_TYPE_WRITE) return (&vq->vq_write_offset_tree); else return (NULL); } int vdev_queue_timestamp_compare(const void *x1, const void *x2) { const zio_t *z1 = x1; const zio_t *z2 = x2; if (z1->io_timestamp < z2->io_timestamp) return (-1); if (z1->io_timestamp > z2->io_timestamp) return (1); if (z1->io_offset < z2->io_offset) return (-1); if (z1->io_offset > z2->io_offset) return (1); if (z1 < z2) return (-1); if (z1 > z2) return (1); return (0); } void vdev_queue_init(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; mutex_init(&vq->vq_lock, NULL, MUTEX_DEFAULT, NULL); vq->vq_vdev = vd; avl_create(&vq->vq_active_tree, vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_queue_node)); avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_READ), vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_offset_node)); avl_create(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE), vdev_queue_offset_compare, sizeof (zio_t), offsetof(struct zio, io_offset_node)); for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { int (*compfn) (const void *, const void *); /* * The synchronous i/o queues are dispatched in FIFO rather * than LBA order. This provides more consistent latency for * these i/os. */ if (p == ZIO_PRIORITY_SYNC_READ || p == ZIO_PRIORITY_SYNC_WRITE) compfn = vdev_queue_timestamp_compare; else compfn = vdev_queue_offset_compare; avl_create(vdev_queue_class_tree(vq, p), compfn, sizeof (zio_t), offsetof(struct zio, io_queue_node)); } vq->vq_lastoffset = 0; } void vdev_queue_fini(vdev_t *vd) { vdev_queue_t *vq = &vd->vdev_queue; for (zio_priority_t p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) avl_destroy(vdev_queue_class_tree(vq, p)); avl_destroy(&vq->vq_active_tree); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_READ)); avl_destroy(vdev_queue_type_tree(vq, ZIO_TYPE_WRITE)); mutex_destroy(&vq->vq_lock); } static void vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; avl_tree_t *qtt; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); qtt = vdev_queue_type_tree(vq, zio->io_type); if (qtt) avl_add(qtt, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); spa->spa_queue_stats[zio->io_priority].spa_queued++; if (spa->spa_iokstat != NULL) kstat_waitq_enter(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; avl_tree_t *qtt; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); qtt = vdev_queue_type_tree(vq, zio->io_type); if (qtt) avl_remove(qtt, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_queued, >, 0); spa->spa_queue_stats[zio->io_priority].spa_queued--; if (spa->spa_iokstat != NULL) kstat_waitq_exit(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_pending_add(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active++; avl_add(&vq->vq_active_tree, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); spa->spa_queue_stats[zio->io_priority].spa_active++; if (spa->spa_iokstat != NULL) kstat_runq_enter(spa->spa_iokstat->ks_data); mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_pending_remove(vdev_queue_t *vq, zio_t *zio) { spa_t *spa = zio->io_spa; ASSERT(MUTEX_HELD(&vq->vq_lock)); ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); vq->vq_class[zio->io_priority].vqc_active--; avl_remove(&vq->vq_active_tree, zio); #ifdef illumos mutex_enter(&spa->spa_iokstat_lock); ASSERT3U(spa->spa_queue_stats[zio->io_priority].spa_active, >, 0); spa->spa_queue_stats[zio->io_priority].spa_active--; if (spa->spa_iokstat != NULL) { kstat_io_t *ksio = spa->spa_iokstat->ks_data; kstat_runq_exit(spa->spa_iokstat->ks_data); if (zio->io_type == ZIO_TYPE_READ) { ksio->reads++; ksio->nread += zio->io_size; } else if (zio->io_type == ZIO_TYPE_WRITE) { ksio->writes++; ksio->nwritten += zio->io_size; } } mutex_exit(&spa->spa_iokstat_lock); #endif } static void vdev_queue_agg_io_done(zio_t *aio) { if (aio->io_type == ZIO_TYPE_READ) { zio_t *pio; zio_link_t *zl = NULL; while ((pio = zio_walk_parents(aio, &zl)) != NULL) { abd_copy_off(pio->io_abd, aio->io_abd, 0, pio->io_offset - aio->io_offset, pio->io_size); } } abd_free(aio->io_abd); } static int vdev_queue_class_min_active(zio_priority_t p) { switch (p) { case ZIO_PRIORITY_SYNC_READ: return (zfs_vdev_sync_read_min_active); case ZIO_PRIORITY_SYNC_WRITE: return (zfs_vdev_sync_write_min_active); case ZIO_PRIORITY_ASYNC_READ: return (zfs_vdev_async_read_min_active); case ZIO_PRIORITY_ASYNC_WRITE: return (zfs_vdev_async_write_min_active); case ZIO_PRIORITY_SCRUB: return (zfs_vdev_scrub_min_active); case ZIO_PRIORITY_TRIM: return (zfs_vdev_trim_min_active); case ZIO_PRIORITY_REMOVAL: return (zfs_vdev_removal_min_active); case ZIO_PRIORITY_INITIALIZING: return (zfs_vdev_initializing_min_active); default: panic("invalid priority %u", p); return (0); } } static __noinline int vdev_queue_max_async_writes(spa_t *spa) { int writes; uint64_t dirty = spa->spa_dsl_pool->dp_dirty_total; uint64_t min_bytes = zfs_dirty_data_max * zfs_vdev_async_write_active_min_dirty_percent / 100; uint64_t max_bytes = zfs_dirty_data_max * zfs_vdev_async_write_active_max_dirty_percent / 100; /* * Sync tasks correspond to interactive user actions. To reduce the * execution time of those actions we push data out as fast as possible. */ if (spa_has_pending_synctask(spa)) { return (zfs_vdev_async_write_max_active); } if (dirty < min_bytes) return (zfs_vdev_async_write_min_active); if (dirty > max_bytes) return (zfs_vdev_async_write_max_active); /* * linear interpolation: * slope = (max_writes - min_writes) / (max_bytes - min_bytes) * move right by min_bytes * move up by min_writes */ writes = (dirty - min_bytes) * (zfs_vdev_async_write_max_active - zfs_vdev_async_write_min_active) / (max_bytes - min_bytes) + zfs_vdev_async_write_min_active; ASSERT3U(writes, >=, zfs_vdev_async_write_min_active); ASSERT3U(writes, <=, zfs_vdev_async_write_max_active); return (writes); } static int vdev_queue_class_max_active(spa_t *spa, zio_priority_t p) { switch (p) { case ZIO_PRIORITY_SYNC_READ: return (zfs_vdev_sync_read_max_active); case ZIO_PRIORITY_SYNC_WRITE: return (zfs_vdev_sync_write_max_active); case ZIO_PRIORITY_ASYNC_READ: return (zfs_vdev_async_read_max_active); case ZIO_PRIORITY_ASYNC_WRITE: return (vdev_queue_max_async_writes(spa)); case ZIO_PRIORITY_SCRUB: return (zfs_vdev_scrub_max_active); case ZIO_PRIORITY_TRIM: return (zfs_vdev_trim_max_active); case ZIO_PRIORITY_REMOVAL: return (zfs_vdev_removal_max_active); case ZIO_PRIORITY_INITIALIZING: return (zfs_vdev_initializing_max_active); default: panic("invalid priority %u", p); return (0); } } /* * Return the i/o class to issue from, or ZIO_PRIORITY_MAX_QUEUEABLE if * there is no eligible class. */ static zio_priority_t vdev_queue_class_to_issue(vdev_queue_t *vq) { spa_t *spa = vq->vq_vdev->vdev_spa; zio_priority_t p; ASSERT(MUTEX_HELD(&vq->vq_lock)); if (avl_numnodes(&vq->vq_active_tree) >= zfs_vdev_max_active) return (ZIO_PRIORITY_NUM_QUEUEABLE); /* find a queue that has not reached its minimum # outstanding i/os */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_min_active(p)) return (p); } /* * If we haven't found a queue, look for one that hasn't reached its * maximum # outstanding i/os. */ for (p = 0; p < ZIO_PRIORITY_NUM_QUEUEABLE; p++) { if (avl_numnodes(vdev_queue_class_tree(vq, p)) > 0 && vq->vq_class[p].vqc_active < vdev_queue_class_max_active(spa, p)) return (p); } /* No eligible queued i/os */ return (ZIO_PRIORITY_NUM_QUEUEABLE); } /* * Compute the range spanned by two i/os, which is the endpoint of the last * (lio->io_offset + lio->io_size) minus start of the first (fio->io_offset). * Conveniently, the gap between fio and lio is given by -IO_SPAN(lio, fio); * thus fio and lio are adjacent if and only if IO_SPAN(lio, fio) == 0. */ #define IO_SPAN(fio, lio) ((lio)->io_offset + (lio)->io_size - (fio)->io_offset) #define IO_GAP(fio, lio) (-IO_SPAN(lio, fio)) static zio_t * vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) { zio_t *first, *last, *aio, *dio, *mandatory, *nio; uint64_t maxgap = 0; uint64_t size; boolean_t stretch; avl_tree_t *t; enum zio_flag flags; ASSERT(MUTEX_HELD(&vq->vq_lock)); if (zio->io_flags & ZIO_FLAG_DONT_AGGREGATE) return (NULL); first = last = zio; if (zio->io_type == ZIO_TYPE_READ) maxgap = zfs_vdev_read_gap_limit; /* * We can aggregate I/Os that are sufficiently adjacent and of * the same flavor, as expressed by the AGG_INHERIT flags. * The latter requirement is necessary so that certain * attributes of the I/O, such as whether it's a normal I/O * or a scrub/resilver, can be preserved in the aggregate. * We can include optional I/Os, but don't allow them * to begin a range as they add no benefit in that situation. */ /* * We keep track of the last non-optional I/O. */ mandatory = (first->io_flags & ZIO_FLAG_OPTIONAL) ? NULL : first; /* * Walk backwards through sufficiently contiguous I/Os * recording the last non-optional I/O. */ flags = zio->io_flags & ZIO_FLAG_AGG_INHERIT; t = vdev_queue_type_tree(vq, zio->io_type); while (t != NULL && (dio = AVL_PREV(t, first)) != NULL && (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && IO_SPAN(dio, last) <= zfs_vdev_aggregation_limit && IO_GAP(dio, first) <= maxgap && dio->io_type == zio->io_type) { first = dio; if (mandatory == NULL && !(first->io_flags & ZIO_FLAG_OPTIONAL)) mandatory = first; } /* * Skip any initial optional I/Os. */ while ((first->io_flags & ZIO_FLAG_OPTIONAL) && first != last) { first = AVL_NEXT(t, first); ASSERT(first != NULL); } /* * Walk forward through sufficiently contiguous I/Os. * The aggregation limit does not apply to optional i/os, so that * we can issue contiguous writes even if they are larger than the * aggregation limit. */ while ((dio = AVL_NEXT(t, last)) != NULL && (dio->io_flags & ZIO_FLAG_AGG_INHERIT) == flags && (IO_SPAN(first, dio) <= zfs_vdev_aggregation_limit || (dio->io_flags & ZIO_FLAG_OPTIONAL)) && IO_GAP(last, dio) <= maxgap && dio->io_type == zio->io_type) { last = dio; if (!(last->io_flags & ZIO_FLAG_OPTIONAL)) mandatory = last; } /* * Now that we've established the range of the I/O aggregation * we must decide what to do with trailing optional I/Os. * For reads, there's nothing to do. While we are unable to * aggregate further, it's possible that a trailing optional * I/O would allow the underlying device to aggregate with * subsequent I/Os. We must therefore determine if the next * non-optional I/O is close enough to make aggregation * worthwhile. */ stretch = B_FALSE; if (zio->io_type == ZIO_TYPE_WRITE && mandatory != NULL) { zio_t *nio = last; while ((dio = AVL_NEXT(t, nio)) != NULL && IO_GAP(nio, dio) == 0 && IO_GAP(mandatory, dio) <= zfs_vdev_write_gap_limit) { nio = dio; if (!(nio->io_flags & ZIO_FLAG_OPTIONAL)) { stretch = B_TRUE; break; } } } if (stretch) { /* * We are going to include an optional io in our aggregated * span, thus closing the write gap. Only mandatory i/os can * start aggregated spans, so make sure that the next i/o * after our span is mandatory. */ dio = AVL_NEXT(t, last); dio->io_flags &= ~ZIO_FLAG_OPTIONAL; } else { /* do not include the optional i/o */ while (last != mandatory && last != first) { ASSERT(last->io_flags & ZIO_FLAG_OPTIONAL); last = AVL_PREV(t, last); ASSERT(last != NULL); } } if (first == last) return (NULL); size = IO_SPAN(first, last); ASSERT3U(size, <=, SPA_MAXBLOCKSIZE); aio = zio_vdev_delegated_io(first->io_vd, first->io_offset, abd_alloc_for_io(size, B_TRUE), size, first->io_type, zio->io_priority, flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE, vdev_queue_agg_io_done, NULL); aio->io_timestamp = first->io_timestamp; nio = first; do { dio = nio; nio = AVL_NEXT(t, dio); ASSERT3U(dio->io_type, ==, aio->io_type); if (dio->io_flags & ZIO_FLAG_NODATA) { ASSERT3U(dio->io_type, ==, ZIO_TYPE_WRITE); abd_zero_off(aio->io_abd, dio->io_offset - aio->io_offset, dio->io_size); } else if (dio->io_type == ZIO_TYPE_WRITE) { abd_copy_off(aio->io_abd, dio->io_abd, dio->io_offset - aio->io_offset, 0, dio->io_size); } zio_add_child(dio, aio); vdev_queue_io_remove(vq, dio); zio_vdev_io_bypass(dio); zio_execute(dio); } while (dio != last); return (aio); } static zio_t * vdev_queue_io_to_issue(vdev_queue_t *vq) { zio_t *zio, *aio; zio_priority_t p; avl_index_t idx; avl_tree_t *tree; zio_t search; again: ASSERT(MUTEX_HELD(&vq->vq_lock)); p = vdev_queue_class_to_issue(vq); if (p == ZIO_PRIORITY_NUM_QUEUEABLE) { /* No eligible queued i/os */ return (NULL); } /* * For LBA-ordered queues (async / scrub / initializing), issue the * i/o which follows the most recently issued i/o in LBA (offset) order. * * For FIFO queues (sync), issue the i/o with the lowest timestamp. */ tree = vdev_queue_class_tree(vq, p); search.io_timestamp = 0; search.io_offset = vq->vq_last_offset + 1; VERIFY3P(avl_find(tree, &search, &idx), ==, NULL); zio = avl_nearest(tree, idx, AVL_AFTER); if (zio == NULL) zio = avl_first(tree); ASSERT3U(zio->io_priority, ==, p); aio = vdev_queue_aggregate(vq, zio); if (aio != NULL) zio = aio; else vdev_queue_io_remove(vq, zio); /* * If the I/O is or was optional and therefore has no data, we need to * simply discard it. We need to drop the vdev queue's lock to avoid a * deadlock that we could encounter since this I/O will complete * immediately. */ if (zio->io_flags & ZIO_FLAG_NODATA) { mutex_exit(&vq->vq_lock); zio_vdev_io_bypass(zio); zio_execute(zio); mutex_enter(&vq->vq_lock); goto again; } vdev_queue_pending_add(vq, zio); vq->vq_last_offset = zio->io_offset; return (zio); } zio_t * vdev_queue_io(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; if (zio->io_flags & ZIO_FLAG_DONT_QUEUE) return (zio); /* * Children i/os inherent their parent's priority, which might * not match the child's i/o type. Fix it up here. */ if (zio->io_type == ZIO_TYPE_READ) { if (zio->io_priority != ZIO_PRIORITY_SYNC_READ && zio->io_priority != ZIO_PRIORITY_ASYNC_READ && zio->io_priority != ZIO_PRIORITY_SCRUB && zio->io_priority != ZIO_PRIORITY_REMOVAL && zio->io_priority != ZIO_PRIORITY_INITIALIZING) zio->io_priority = ZIO_PRIORITY_ASYNC_READ; } else if (zio->io_type == ZIO_TYPE_WRITE) { if (zio->io_priority != ZIO_PRIORITY_SYNC_WRITE && zio->io_priority != ZIO_PRIORITY_ASYNC_WRITE && zio->io_priority != ZIO_PRIORITY_REMOVAL && zio->io_priority != ZIO_PRIORITY_INITIALIZING) zio->io_priority = ZIO_PRIORITY_ASYNC_WRITE; } else { ASSERT(zio->io_type == ZIO_TYPE_FREE); zio->io_priority = ZIO_PRIORITY_TRIM; } zio->io_flags |= ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE; mutex_enter(&vq->vq_lock); zio->io_timestamp = gethrtime(); vdev_queue_io_add(vq, zio); nio = vdev_queue_io_to_issue(vq); mutex_exit(&vq->vq_lock); if (nio == NULL) return (NULL); if (nio->io_done == vdev_queue_agg_io_done) { zio_nowait(nio); return (NULL); } return (nio); } void vdev_queue_io_done(zio_t *zio) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; zio_t *nio; mutex_enter(&vq->vq_lock); vdev_queue_pending_remove(vq, zio); vq->vq_io_complete_ts = gethrtime(); while ((nio = vdev_queue_io_to_issue(vq)) != NULL) { mutex_exit(&vq->vq_lock); if (nio->io_done == vdev_queue_agg_io_done) { zio_nowait(nio); } else { zio_vdev_io_reissue(nio); zio_execute(nio); } mutex_enter(&vq->vq_lock); } mutex_exit(&vq->vq_lock); } void vdev_queue_change_io_priority(zio_t *zio, zio_priority_t priority) { vdev_queue_t *vq = &zio->io_vd->vdev_queue; avl_tree_t *tree; + /* + * ZIO_PRIORITY_NOW is used by the vdev cache code and the aggregate zio + * code to issue IOs without adding them to the vdev queue. In this + * case, the zio is already going to be issued as quickly as possible + * and so it doesn't need any reprioitization to help. + */ + if (zio->io_priority == ZIO_PRIORITY_NOW) + return; + ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); ASSERT3U(priority, <, ZIO_PRIORITY_NUM_QUEUEABLE); if (zio->io_type == ZIO_TYPE_READ) { if (priority != ZIO_PRIORITY_SYNC_READ && priority != ZIO_PRIORITY_ASYNC_READ && priority != ZIO_PRIORITY_SCRUB) priority = ZIO_PRIORITY_ASYNC_READ; } else { ASSERT(zio->io_type == ZIO_TYPE_WRITE); if (priority != ZIO_PRIORITY_SYNC_WRITE && priority != ZIO_PRIORITY_ASYNC_WRITE) priority = ZIO_PRIORITY_ASYNC_WRITE; } mutex_enter(&vq->vq_lock); /* * If the zio is in none of the queues we can simply change * the priority. If the zio is waiting to be submitted we must * remove it from the queue and re-insert it with the new priority. * Otherwise, the zio is currently active and we cannot change its * priority. */ tree = vdev_queue_class_tree(vq, zio->io_priority); if (avl_find(tree, zio, NULL) == zio) { avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio); zio->io_priority = priority; avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio); } else if (avl_find(&vq->vq_active_tree, zio, NULL) != zio) { zio->io_priority = priority; } mutex_exit(&vq->vq_lock); } /* * As these three methods are only used for load calculations we're not concerned * if we get an incorrect value on 32bit platforms due to lack of vq_lock mutex * use here, instead we prefer to keep it lock free for performance. */ int vdev_queue_length(vdev_t *vd) { return (avl_numnodes(&vd->vdev_queue.vq_active_tree)); } uint64_t vdev_queue_lastoffset(vdev_t *vd) { return (vd->vdev_queue.vq_lastoffset); } void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio) { vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size; } Index: projects/clang700-import/sys/cddl/contrib/opensolaris =================================================================== --- projects/clang700-import/sys/cddl/contrib/opensolaris (revision 339014) +++ projects/clang700-import/sys/cddl/contrib/opensolaris (revision 339015) Property changes on: projects/clang700-import/sys/cddl/contrib/opensolaris ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head/sys/cddl/contrib/opensolaris:r338988-339014 Index: projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.c =================================================================== --- projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.c (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.c (revision 339015) @@ -1,407 +1,298 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2008 David E. O'Brien * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include CTASSERT(sizeof(struct ioc_read_toc_entry32) == 8); CTASSERT(sizeof(struct mem_range_op32) == 12); -CTASSERT(sizeof(struct pci_conf_io32) == 36); -CTASSERT(sizeof(struct pci_match_conf32) == 44); -CTASSERT(sizeof(struct pci_conf32) == 44); static int freebsd32_ioctl_ioc_read_toc(struct thread *td, struct freebsd32_ioctl_args *uap, struct file *fp) { struct ioc_read_toc_entry toce; struct ioc_read_toc_entry32 toce32; int error; if ((error = copyin(uap->data, &toce32, sizeof(toce32)))) return (error); CP(toce32, toce, address_format); CP(toce32, toce, starting_track); CP(toce32, toce, data_len); PTRIN_CP(toce32, toce, data); if ((error = fo_ioctl(fp, CDIOREADTOCENTRYS, (caddr_t)&toce, td->td_ucred, td))) { CP(toce, toce32, address_format); CP(toce, toce32, starting_track); CP(toce, toce32, data_len); PTROUT_CP(toce, toce32, data); error = copyout(&toce32, uap->data, sizeof(toce32)); } return error; } static int freebsd32_ioctl_fiodgname(struct thread *td, struct freebsd32_ioctl_args *uap, struct file *fp) { struct fiodgname_arg fgn; struct fiodgname_arg32 fgn32; int error; if ((error = copyin(uap->data, &fgn32, sizeof fgn32)) != 0) return (error); CP(fgn32, fgn, len); PTRIN_CP(fgn32, fgn, buf); error = fo_ioctl(fp, FIODGNAME, (caddr_t)&fgn, td->td_ucred, td); return (error); } static int freebsd32_ioctl_memrange(struct thread *td, struct freebsd32_ioctl_args *uap, struct file *fp) { struct mem_range_op mro; struct mem_range_op32 mro32; int error; u_long com; if ((error = copyin(uap->data, &mro32, sizeof(mro32))) != 0) return (error); PTRIN_CP(mro32, mro, mo_desc); CP(mro32, mro, mo_arg[0]); CP(mro32, mro, mo_arg[1]); com = 0; switch (uap->com) { case MEMRANGE_GET32: com = MEMRANGE_GET; break; case MEMRANGE_SET32: com = MEMRANGE_SET; break; default: panic("%s: unknown MEMRANGE %#x", __func__, uap->com); } if ((error = fo_ioctl(fp, com, (caddr_t)&mro, td->td_ucred, td)) != 0) return (error); if ( (com & IOC_OUT) ) { CP(mro, mro32, mo_arg[0]); CP(mro, mro32, mo_arg[1]); error = copyout(&mro32, uap->data, sizeof(mro32)); } return (error); } static int -freebsd32_ioctl_pciocgetconf(struct thread *td, - struct freebsd32_ioctl_args *uap, struct file *fp) -{ - struct pci_conf_io pci; - struct pci_conf_io32 pci32; - struct pci_match_conf32 pmc32; - struct pci_match_conf32 *pmc32p; - struct pci_match_conf pmc; - struct pci_match_conf *pmcp; - struct pci_conf32 pc32; - struct pci_conf32 *pc32p; - struct pci_conf pc; - struct pci_conf *pcp; - u_int32_t i; - u_int32_t npat_to_convert; - u_int32_t nmatch_to_convert; - vm_offset_t addr; - int error; - - if ((error = copyin(uap->data, &pci32, sizeof(pci32))) != 0) - return (error); - - CP(pci32, pci, num_patterns); - CP(pci32, pci, offset); - CP(pci32, pci, generation); - - npat_to_convert = pci32.pat_buf_len / sizeof(struct pci_match_conf32); - pci.pat_buf_len = npat_to_convert * sizeof(struct pci_match_conf); - pci.patterns = NULL; - nmatch_to_convert = pci32.match_buf_len / sizeof(struct pci_conf32); - pci.match_buf_len = nmatch_to_convert * sizeof(struct pci_conf); - pci.matches = NULL; - - if ((error = copyout_map(td, &addr, pci.pat_buf_len)) != 0) - goto cleanup; - pci.patterns = (struct pci_match_conf *)addr; - if ((error = copyout_map(td, &addr, pci.match_buf_len)) != 0) - goto cleanup; - pci.matches = (struct pci_conf *)addr; - - npat_to_convert = min(npat_to_convert, pci.num_patterns); - - for (i = 0, pmc32p = (struct pci_match_conf32 *)PTRIN(pci32.patterns), - pmcp = pci.patterns; - i < npat_to_convert; i++, pmc32p++, pmcp++) { - if ((error = copyin(pmc32p, &pmc32, sizeof(pmc32))) != 0) - goto cleanup; - CP(pmc32,pmc,pc_sel); - strlcpy(pmc.pd_name, pmc32.pd_name, sizeof(pmc.pd_name)); - CP(pmc32,pmc,pd_unit); - CP(pmc32,pmc,pc_vendor); - CP(pmc32,pmc,pc_device); - CP(pmc32,pmc,pc_class); - CP(pmc32,pmc,flags); - if ((error = copyout(&pmc, pmcp, sizeof(pmc))) != 0) - goto cleanup; - } - - if ((error = fo_ioctl(fp, PCIOCGETCONF, (caddr_t)&pci, - td->td_ucred, td)) != 0) - goto cleanup; - - nmatch_to_convert = min(nmatch_to_convert, pci.num_matches); - - for (i = 0, pcp = pci.matches, - pc32p = (struct pci_conf32 *)PTRIN(pci32.matches); - i < nmatch_to_convert; i++, pcp++, pc32p++) { - if ((error = copyin(pcp, &pc, sizeof(pc))) != 0) - goto cleanup; - CP(pc,pc32,pc_sel); - CP(pc,pc32,pc_hdr); - CP(pc,pc32,pc_subvendor); - CP(pc,pc32,pc_subdevice); - CP(pc,pc32,pc_vendor); - CP(pc,pc32,pc_device); - CP(pc,pc32,pc_class); - CP(pc,pc32,pc_subclass); - CP(pc,pc32,pc_progif); - CP(pc,pc32,pc_revid); - strlcpy(pc32.pd_name, pc.pd_name, sizeof(pc32.pd_name)); - CP(pc,pc32,pd_unit); - if ((error = copyout(&pc32, pc32p, sizeof(pc32))) != 0) - goto cleanup; - } - - CP(pci, pci32, num_matches); - CP(pci, pci32, offset); - CP(pci, pci32, generation); - CP(pci, pci32, status); - - error = copyout(&pci32, uap->data, sizeof(pci32)); - -cleanup: - if (pci.patterns) - copyout_unmap(td, (vm_offset_t)pci.patterns, pci.pat_buf_len); - if (pci.matches) - copyout_unmap(td, (vm_offset_t)pci.matches, pci.match_buf_len); - - return (error); -} - -static int freebsd32_ioctl_barmmap(struct thread *td, struct freebsd32_ioctl_args *uap, struct file *fp) { struct pci_bar_mmap32 pbm32; struct pci_bar_mmap pbm; int error; error = copyin(uap->data, &pbm32, sizeof(pbm32)); if (error != 0) return (error); PTRIN_CP(pbm32, pbm, pbm_map_base); CP(pbm32, pbm, pbm_sel); CP(pbm32, pbm, pbm_reg); CP(pbm32, pbm, pbm_flags); CP(pbm32, pbm, pbm_memattr); pbm.pbm_bar_length = PAIR32TO64(uint64_t, pbm32.pbm_bar_length); error = fo_ioctl(fp, PCIOCBARMMAP, (caddr_t)&pbm, td->td_ucred, td); if (error == 0) { PTROUT_CP(pbm, pbm32, pbm_map_base); CP(pbm, pbm32, pbm_map_length); #if BYTE_ORDER == LITTLE_ENDIAN pbm32.pbm_bar_length1 = pbm.pbm_bar_length; pbm32.pbm_bar_length2 = pbm.pbm_bar_length >> 32; #else pbm32.pbm_bar_length1 = pbm.pbm_bar_length >> 32; pbm32.pbm_bar_length2 = pbm.pbm_bar_length; #endif CP(pbm, pbm32, pbm_bar_off); error = copyout(&pbm32, uap->data, sizeof(pbm32)); } return (error); } static int freebsd32_ioctl_sg(struct thread *td, struct freebsd32_ioctl_args *uap, struct file *fp) { struct sg_io_hdr io; struct sg_io_hdr32 io32; int error; if ((error = copyin(uap->data, &io32, sizeof(io32))) != 0) return (error); CP(io32, io, interface_id); CP(io32, io, dxfer_direction); CP(io32, io, cmd_len); CP(io32, io, mx_sb_len); CP(io32, io, iovec_count); CP(io32, io, dxfer_len); PTRIN_CP(io32, io, dxferp); PTRIN_CP(io32, io, cmdp); PTRIN_CP(io32, io, sbp); CP(io32, io, timeout); CP(io32, io, flags); CP(io32, io, pack_id); PTRIN_CP(io32, io, usr_ptr); CP(io32, io, status); CP(io32, io, masked_status); CP(io32, io, msg_status); CP(io32, io, sb_len_wr); CP(io32, io, host_status); CP(io32, io, driver_status); CP(io32, io, resid); CP(io32, io, duration); CP(io32, io, info); if ((error = fo_ioctl(fp, SG_IO, (caddr_t)&io, td->td_ucred, td)) != 0) return (error); CP(io, io32, interface_id); CP(io, io32, dxfer_direction); CP(io, io32, cmd_len); CP(io, io32, mx_sb_len); CP(io, io32, iovec_count); CP(io, io32, dxfer_len); PTROUT_CP(io, io32, dxferp); PTROUT_CP(io, io32, cmdp); PTROUT_CP(io, io32, sbp); CP(io, io32, timeout); CP(io, io32, flags); CP(io, io32, pack_id); PTROUT_CP(io, io32, usr_ptr); CP(io, io32, status); CP(io, io32, masked_status); CP(io, io32, msg_status); CP(io, io32, sb_len_wr); CP(io, io32, host_status); CP(io, io32, driver_status); CP(io, io32, resid); CP(io, io32, duration); CP(io, io32, info); error = copyout(&io32, uap->data, sizeof(io32)); return (error); } int freebsd32_ioctl(struct thread *td, struct freebsd32_ioctl_args *uap) { struct ioctl_args ap /*{ int fd; u_long com; caddr_t data; }*/ ; struct file *fp; cap_rights_t rights; int error; error = fget(td, uap->fd, cap_rights_init(&rights, CAP_IOCTL), &fp); if (error != 0) return (error); if ((fp->f_flag & (FREAD | FWRITE)) == 0) { fdrop(fp, td); return (EBADF); } switch (uap->com) { case CDIOREADTOCENTRYS_32: error = freebsd32_ioctl_ioc_read_toc(td, uap, fp); break; case FIODGNAME_32: error = freebsd32_ioctl_fiodgname(td, uap, fp); break; case MEMRANGE_GET32: /* FALLTHROUGH */ case MEMRANGE_SET32: error = freebsd32_ioctl_memrange(td, uap, fp); - break; - - case PCIOCGETCONF_32: - error = freebsd32_ioctl_pciocgetconf(td, uap, fp); break; case SG_IO_32: error = freebsd32_ioctl_sg(td, uap, fp); break; case PCIOCBARMMAP_32: error = freebsd32_ioctl_barmmap(td, uap, fp); break; default: fdrop(fp, td); ap.fd = uap->fd; ap.com = uap->com; PTRIN_CP(*uap, ap, data); return sys_ioctl(td, &ap); } fdrop(fp, td); return (error); } Index: projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.h =================================================================== --- projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.h (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/freebsd32_ioctl.h (revision 339015) @@ -1,117 +1,77 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2008 David E. O'Brien * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _COMPAT_FREEBSD32_IOCTL_H_ #define _COMPAT_FREEBSD32_IOCTL_H_ #include typedef __uint32_t caddr_t32; struct ioc_read_toc_entry32 { u_char address_format; u_char starting_track; u_short data_len; uint32_t data; /* struct cd_toc_entry* */ }; struct fiodgname_arg32 { int len; caddr_t32 buf; }; struct mem_range_op32 { caddr_t32 mo_desc; int mo_arg[2]; }; -struct pci_conf32 { - struct pcisel pc_sel; /* domain+bus+slot+function */ - u_int8_t pc_hdr; /* PCI header type */ - u_int16_t pc_subvendor; /* card vendor ID */ - u_int16_t pc_subdevice; /* card device ID, assigned by - card vendor */ - u_int16_t pc_vendor; /* chip vendor ID */ - u_int16_t pc_device; /* chip device ID, assigned by - chip vendor */ - u_int8_t pc_class; /* chip PCI class */ - u_int8_t pc_subclass; /* chip PCI subclass */ - u_int8_t pc_progif; /* chip PCI programming interface */ - u_int8_t pc_revid; /* chip revision ID */ - char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ - u_int32_t pd_unit; /* device unit number */ -}; - -struct pci_match_conf32 { - struct pcisel pc_sel; /* domain+bus+slot+function */ - char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ - u_int32_t pd_unit; /* Unit number */ - u_int16_t pc_vendor; /* PCI Vendor ID */ - u_int16_t pc_device; /* PCI Device ID */ - u_int8_t pc_class; /* PCI class */ - u_int32_t flags; /* Matching expression */ -}; - -struct pci_conf_io32 { - u_int32_t pat_buf_len; /* pattern buffer length */ - u_int32_t num_patterns; /* number of patterns */ - caddr_t32 patterns; /* struct pci_match_conf ptr */ - u_int32_t match_buf_len; /* match buffer length */ - u_int32_t num_matches; /* number of matches returned */ - caddr_t32 matches; /* struct pci_conf ptr */ - u_int32_t offset; /* offset into device list */ - u_int32_t generation; /* device list generation */ - u_int32_t status; /* request status */ -}; - struct pci_bar_mmap32 { uint32_t pbm_map_base; uint32_t pbm_map_length; uint32_t pbm_bar_length1, pbm_bar_length2; int pbm_bar_off; struct pcisel pbm_sel; int pbm_reg; int pbm_flags; int pbm_memattr; }; #define CDIOREADTOCENTRYS_32 _IOWR('c', 5, struct ioc_read_toc_entry32) #define FIODGNAME_32 _IOW('f', 120, struct fiodgname_arg32) #define MEMRANGE_GET32 _IOWR('m', 50, struct mem_range_op32) #define MEMRANGE_SET32 _IOW('m', 51, struct mem_range_op32) -#define PCIOCGETCONF_32 _IOWR('p', 5, struct pci_conf_io32) #define SG_IO_32 _IOWR(SGIOC, 0x85, struct sg_io_hdr32) #define PCIOCBARMMAP_32 _IOWR('p', 8, struct pci_bar_mmap32) #endif /* _COMPAT_FREEBSD32_IOCTL_H_ */ Index: projects/clang700-import/sys/compat/freebsd32/freebsd32_syscall.h =================================================================== --- projects/clang700-import/sys/compat/freebsd32/freebsd32_syscall.h (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/freebsd32_syscall.h (revision 339015) @@ -1,472 +1,492 @@ /* * System call numbers. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ #define FREEBSD32_SYS_syscall 0 #define FREEBSD32_SYS_exit 1 #define FREEBSD32_SYS_fork 2 #define FREEBSD32_SYS_read 3 #define FREEBSD32_SYS_write 4 #define FREEBSD32_SYS_open 5 #define FREEBSD32_SYS_close 6 #define FREEBSD32_SYS_freebsd32_wait4 7 /* 8 is obsolete old creat */ #define FREEBSD32_SYS_link 9 #define FREEBSD32_SYS_unlink 10 /* 11 is obsolete execv */ #define FREEBSD32_SYS_chdir 12 #define FREEBSD32_SYS_fchdir 13 #define FREEBSD32_SYS_freebsd11_freebsd32_mknod 14 #define FREEBSD32_SYS_chmod 15 #define FREEBSD32_SYS_chown 16 #define FREEBSD32_SYS_break 17 /* 18 is freebsd4 freebsd32_getfsstat */ /* 19 is old freebsd32_lseek */ #define FREEBSD32_SYS_getpid 20 #define FREEBSD32_SYS_mount 21 #define FREEBSD32_SYS_unmount 22 #define FREEBSD32_SYS_setuid 23 #define FREEBSD32_SYS_getuid 24 #define FREEBSD32_SYS_geteuid 25 #define FREEBSD32_SYS_ptrace 26 #define FREEBSD32_SYS_freebsd32_recvmsg 27 #define FREEBSD32_SYS_freebsd32_sendmsg 28 #define FREEBSD32_SYS_freebsd32_recvfrom 29 #define FREEBSD32_SYS_accept 30 #define FREEBSD32_SYS_getpeername 31 #define FREEBSD32_SYS_getsockname 32 #define FREEBSD32_SYS_access 33 #define FREEBSD32_SYS_chflags 34 #define FREEBSD32_SYS_fchflags 35 #define FREEBSD32_SYS_sync 36 #define FREEBSD32_SYS_kill 37 /* 38 is old freebsd32_stat */ #define FREEBSD32_SYS_getppid 39 /* 40 is old freebsd32_lstat */ #define FREEBSD32_SYS_dup 41 #define FREEBSD32_SYS_freebsd10_freebsd32_pipe 42 #define FREEBSD32_SYS_getegid 43 #define FREEBSD32_SYS_profil 44 #define FREEBSD32_SYS_ktrace 45 /* 46 is old freebsd32_sigaction */ #define FREEBSD32_SYS_getgid 47 /* 48 is old freebsd32_sigprocmask */ #define FREEBSD32_SYS_getlogin 49 #define FREEBSD32_SYS_setlogin 50 #define FREEBSD32_SYS_acct 51 /* 52 is old freebsd32_sigpending */ #define FREEBSD32_SYS_freebsd32_sigaltstack 53 #define FREEBSD32_SYS_freebsd32_ioctl 54 #define FREEBSD32_SYS_reboot 55 #define FREEBSD32_SYS_revoke 56 #define FREEBSD32_SYS_symlink 57 #define FREEBSD32_SYS_readlink 58 #define FREEBSD32_SYS_freebsd32_execve 59 #define FREEBSD32_SYS_umask 60 #define FREEBSD32_SYS_chroot 61 /* 62 is old freebsd32_fstat */ /* 63 is obsolete ogetkerninfo */ /* 64 is old freebsd32_getpagesize */ #define FREEBSD32_SYS_msync 65 #define FREEBSD32_SYS_vfork 66 /* 67 is obsolete vread */ /* 68 is obsolete vwrite */ #define FREEBSD32_SYS_sbrk 69 #define FREEBSD32_SYS_sstk 70 /* 71 is old mmap */ #define FREEBSD32_SYS_freebsd11_vadvise 72 #define FREEBSD32_SYS_munmap 73 #define FREEBSD32_SYS_freebsd32_mprotect 74 #define FREEBSD32_SYS_madvise 75 /* 76 is obsolete vhangup */ /* 77 is obsolete vlimit */ #define FREEBSD32_SYS_mincore 78 #define FREEBSD32_SYS_getgroups 79 #define FREEBSD32_SYS_setgroups 80 #define FREEBSD32_SYS_getpgrp 81 #define FREEBSD32_SYS_setpgid 82 #define FREEBSD32_SYS_freebsd32_setitimer 83 /* 84 is obsolete owait */ #define FREEBSD32_SYS_swapon 85 #define FREEBSD32_SYS_freebsd32_getitimer 86 /* 87 is obsolete ogethostname */ /* 88 is obsolete osethostname */ #define FREEBSD32_SYS_getdtablesize 89 #define FREEBSD32_SYS_dup2 90 #define FREEBSD32_SYS_freebsd32_fcntl 92 #define FREEBSD32_SYS_freebsd32_select 93 #define FREEBSD32_SYS_fsync 95 #define FREEBSD32_SYS_setpriority 96 #define FREEBSD32_SYS_socket 97 #define FREEBSD32_SYS_connect 98 /* 99 is obsolete oaccept */ #define FREEBSD32_SYS_getpriority 100 /* 101 is obsolete osend */ /* 102 is obsolete orecv */ /* 103 is old freebsd32_sigreturn */ #define FREEBSD32_SYS_bind 104 #define FREEBSD32_SYS_setsockopt 105 #define FREEBSD32_SYS_listen 106 /* 107 is obsolete vtimes */ /* 108 is old freebsd32_sigvec */ /* 109 is old freebsd32_sigblock */ /* 110 is old freebsd32_sigsetmask */ /* 111 is old freebsd32_sigsuspend */ /* 112 is old freebsd32_sigstack */ /* 113 is obsolete orecvmsg */ /* 114 is obsolete osendmsg */ /* 115 is obsolete vtrace */ #define FREEBSD32_SYS_freebsd32_gettimeofday 116 #define FREEBSD32_SYS_freebsd32_getrusage 117 #define FREEBSD32_SYS_getsockopt 118 #define FREEBSD32_SYS_freebsd32_readv 120 #define FREEBSD32_SYS_freebsd32_writev 121 #define FREEBSD32_SYS_freebsd32_settimeofday 122 #define FREEBSD32_SYS_fchown 123 #define FREEBSD32_SYS_fchmod 124 /* 125 is obsolete orecvfrom */ #define FREEBSD32_SYS_setreuid 126 #define FREEBSD32_SYS_setregid 127 #define FREEBSD32_SYS_rename 128 /* 129 is old truncate */ /* 130 is old ftruncate */ #define FREEBSD32_SYS_flock 131 #define FREEBSD32_SYS_mkfifo 132 #define FREEBSD32_SYS_sendto 133 #define FREEBSD32_SYS_shutdown 134 #define FREEBSD32_SYS_socketpair 135 #define FREEBSD32_SYS_mkdir 136 #define FREEBSD32_SYS_rmdir 137 #define FREEBSD32_SYS_freebsd32_utimes 138 /* 139 is obsolete 4.2 sigreturn */ #define FREEBSD32_SYS_freebsd32_adjtime 140 /* 141 is obsolete ogetpeername */ /* 142 is obsolete ogethostid */ /* 143 is obsolete sethostid */ /* 144 is obsolete getrlimit */ /* 145 is obsolete setrlimit */ /* 146 is obsolete killpg */ #define FREEBSD32_SYS_setsid 147 #define FREEBSD32_SYS_quotactl 148 /* 149 is obsolete oquota */ /* 150 is obsolete ogetsockname */ /* 156 is old freebsd32_getdirentries */ /* 157 is freebsd4 freebsd32_statfs */ /* 158 is freebsd4 freebsd32_fstatfs */ #define FREEBSD32_SYS_getfh 161 /* 162 is obsolete getdomainname */ /* 163 is obsolete setdomainname */ /* 164 is obsolete uname */ #define FREEBSD32_SYS_freebsd32_sysarch 165 #define FREEBSD32_SYS_rtprio 166 #define FREEBSD32_SYS_freebsd32_semsys 169 #define FREEBSD32_SYS_freebsd32_msgsys 170 #define FREEBSD32_SYS_freebsd32_shmsys 171 /* 173 is freebsd6 freebsd32_pread */ /* 174 is freebsd6 freebsd32_pwrite */ #define FREEBSD32_SYS_ntp_adjtime 176 #define FREEBSD32_SYS_setgid 181 #define FREEBSD32_SYS_setegid 182 #define FREEBSD32_SYS_seteuid 183 + /* 184 is obsolete lfs_bmapv */ + /* 185 is obsolete lfs_markv */ + /* 186 is obsolete lfs_segclean */ + /* 187 is obsolete lfs_segwait */ #define FREEBSD32_SYS_freebsd11_freebsd32_stat 188 #define FREEBSD32_SYS_freebsd11_freebsd32_fstat 189 #define FREEBSD32_SYS_freebsd11_freebsd32_lstat 190 #define FREEBSD32_SYS_pathconf 191 #define FREEBSD32_SYS_fpathconf 192 #define FREEBSD32_SYS_getrlimit 194 #define FREEBSD32_SYS_setrlimit 195 #define FREEBSD32_SYS_freebsd11_freebsd32_getdirentries 196 /* 197 is freebsd6 freebsd32_mmap */ #define FREEBSD32_SYS___syscall 198 /* 199 is freebsd6 freebsd32_lseek */ /* 200 is freebsd6 freebsd32_truncate */ /* 201 is freebsd6 freebsd32_ftruncate */ #define FREEBSD32_SYS_freebsd32_sysctl 202 #define FREEBSD32_SYS_mlock 203 #define FREEBSD32_SYS_munlock 204 #define FREEBSD32_SYS_undelete 205 #define FREEBSD32_SYS_freebsd32_futimes 206 #define FREEBSD32_SYS_getpgid 207 #define FREEBSD32_SYS_poll 209 #define FREEBSD32_SYS_freebsd7_freebsd32_semctl 220 #define FREEBSD32_SYS_semget 221 #define FREEBSD32_SYS_semop 222 + /* 223 is obsolete semconfig */ #define FREEBSD32_SYS_freebsd7_freebsd32_msgctl 224 #define FREEBSD32_SYS_msgget 225 #define FREEBSD32_SYS_freebsd32_msgsnd 226 #define FREEBSD32_SYS_freebsd32_msgrcv 227 #define FREEBSD32_SYS_shmat 228 #define FREEBSD32_SYS_freebsd7_freebsd32_shmctl 229 #define FREEBSD32_SYS_shmdt 230 #define FREEBSD32_SYS_shmget 231 #define FREEBSD32_SYS_freebsd32_clock_gettime 232 #define FREEBSD32_SYS_freebsd32_clock_settime 233 #define FREEBSD32_SYS_freebsd32_clock_getres 234 #define FREEBSD32_SYS_freebsd32_ktimer_create 235 #define FREEBSD32_SYS_ktimer_delete 236 #define FREEBSD32_SYS_freebsd32_ktimer_settime 237 #define FREEBSD32_SYS_freebsd32_ktimer_gettime 238 #define FREEBSD32_SYS_ktimer_getoverrun 239 #define FREEBSD32_SYS_freebsd32_nanosleep 240 #define FREEBSD32_SYS_ffclock_getcounter 241 #define FREEBSD32_SYS_ffclock_setestimate 242 #define FREEBSD32_SYS_ffclock_getestimate 243 #define FREEBSD32_SYS_freebsd32_clock_nanosleep 244 #define FREEBSD32_SYS_freebsd32_clock_getcpuclockid2 247 #define FREEBSD32_SYS_minherit 250 #define FREEBSD32_SYS_rfork 251 /* 252 is obsolete openbsd_poll */ #define FREEBSD32_SYS_issetugid 253 #define FREEBSD32_SYS_lchown 254 #define FREEBSD32_SYS_freebsd32_aio_read 255 #define FREEBSD32_SYS_freebsd32_aio_write 256 #define FREEBSD32_SYS_freebsd32_lio_listio 257 #define FREEBSD32_SYS_freebsd11_freebsd32_getdents 272 #define FREEBSD32_SYS_lchmod 274 /* 275 is obsolete netbsd_lchown */ #define FREEBSD32_SYS_freebsd32_lutimes 276 /* 277 is obsolete netbsd_msync */ #define FREEBSD32_SYS_freebsd11_nstat 278 #define FREEBSD32_SYS_freebsd11_nfstat 279 #define FREEBSD32_SYS_freebsd11_nlstat 280 #define FREEBSD32_SYS_freebsd32_preadv 289 #define FREEBSD32_SYS_freebsd32_pwritev 290 /* 297 is freebsd4 freebsd32_fhstatfs */ #define FREEBSD32_SYS_fhopen 298 #define FREEBSD32_SYS_freebsd11_freebsd32_fhstat 299 #define FREEBSD32_SYS_modnext 300 #define FREEBSD32_SYS_freebsd32_modstat 301 #define FREEBSD32_SYS_modfnext 302 #define FREEBSD32_SYS_modfind 303 #define FREEBSD32_SYS_kldload 304 #define FREEBSD32_SYS_kldunload 305 #define FREEBSD32_SYS_kldfind 306 #define FREEBSD32_SYS_kldnext 307 #define FREEBSD32_SYS_freebsd32_kldstat 308 #define FREEBSD32_SYS_kldfirstmod 309 #define FREEBSD32_SYS_getsid 310 #define FREEBSD32_SYS_setresuid 311 #define FREEBSD32_SYS_setresgid 312 /* 313 is obsolete signanosleep */ #define FREEBSD32_SYS_freebsd32_aio_return 314 #define FREEBSD32_SYS_freebsd32_aio_suspend 315 #define FREEBSD32_SYS_aio_cancel 316 #define FREEBSD32_SYS_freebsd32_aio_error 317 /* 318 is freebsd6 freebsd32_aio_read */ /* 319 is freebsd6 freebsd32_aio_write */ /* 320 is freebsd6 freebsd32_lio_listio */ #define FREEBSD32_SYS_yield 321 /* 322 is obsolete thr_sleep */ /* 323 is obsolete thr_wakeup */ #define FREEBSD32_SYS_mlockall 324 #define FREEBSD32_SYS_munlockall 325 #define FREEBSD32_SYS___getcwd 326 #define FREEBSD32_SYS_sched_setparam 327 #define FREEBSD32_SYS_sched_getparam 328 #define FREEBSD32_SYS_sched_setscheduler 329 #define FREEBSD32_SYS_sched_getscheduler 330 #define FREEBSD32_SYS_sched_yield 331 #define FREEBSD32_SYS_sched_get_priority_max 332 #define FREEBSD32_SYS_sched_get_priority_min 333 #define FREEBSD32_SYS_freebsd32_sched_rr_get_interval 334 #define FREEBSD32_SYS_utrace 335 /* 336 is freebsd4 freebsd32_sendfile */ #define FREEBSD32_SYS_kldsym 337 #define FREEBSD32_SYS_freebsd32_jail 338 #define FREEBSD32_SYS_sigprocmask 340 #define FREEBSD32_SYS_sigsuspend 341 /* 342 is freebsd4 freebsd32_sigaction */ #define FREEBSD32_SYS_sigpending 343 /* 344 is freebsd4 freebsd32_sigreturn */ #define FREEBSD32_SYS_freebsd32_sigtimedwait 345 #define FREEBSD32_SYS_freebsd32_sigwaitinfo 346 #define FREEBSD32_SYS___acl_get_file 347 #define FREEBSD32_SYS___acl_set_file 348 #define FREEBSD32_SYS___acl_get_fd 349 #define FREEBSD32_SYS___acl_set_fd 350 #define FREEBSD32_SYS___acl_delete_file 351 #define FREEBSD32_SYS___acl_delete_fd 352 #define FREEBSD32_SYS___acl_aclcheck_file 353 #define FREEBSD32_SYS___acl_aclcheck_fd 354 #define FREEBSD32_SYS_extattrctl 355 #define FREEBSD32_SYS_extattr_set_file 356 #define FREEBSD32_SYS_extattr_get_file 357 #define FREEBSD32_SYS_extattr_delete_file 358 #define FREEBSD32_SYS_freebsd32_aio_waitcomplete 359 #define FREEBSD32_SYS_getresuid 360 #define FREEBSD32_SYS_getresgid 361 #define FREEBSD32_SYS_kqueue 362 #define FREEBSD32_SYS_freebsd11_freebsd32_kevent 363 + /* 364 is obsolete __cap_get_proc */ + /* 365 is obsolete __cap_set_proc */ + /* 366 is obsolete __cap_get_fd */ + /* 367 is obsolete __cap_get_file */ + /* 368 is obsolete __cap_set_fd */ + /* 369 is obsolete __cap_set_file */ #define FREEBSD32_SYS_extattr_set_fd 371 #define FREEBSD32_SYS_extattr_get_fd 372 #define FREEBSD32_SYS_extattr_delete_fd 373 #define FREEBSD32_SYS___setugid 374 + /* 375 is obsolete nfsclnt */ #define FREEBSD32_SYS_eaccess 376 #define FREEBSD32_SYS_freebsd32_nmount 378 + /* 379 is obsolete kse_exit */ + /* 380 is obsolete kse_wakeup */ + /* 381 is obsolete kse_create */ + /* 382 is obsolete kse_thr_interrupt */ + /* 383 is obsolete kse_release */ #define FREEBSD32_SYS_kenv 390 #define FREEBSD32_SYS_lchflags 391 #define FREEBSD32_SYS_uuidgen 392 #define FREEBSD32_SYS_freebsd32_sendfile 393 #define FREEBSD32_SYS_freebsd11_getfsstat 395 #define FREEBSD32_SYS_freebsd11_statfs 396 #define FREEBSD32_SYS_freebsd11_fstatfs 397 #define FREEBSD32_SYS_freebsd11_fhstatfs 398 #define FREEBSD32_SYS_ksem_close 400 #define FREEBSD32_SYS_ksem_post 401 #define FREEBSD32_SYS_ksem_wait 402 #define FREEBSD32_SYS_ksem_trywait 403 #define FREEBSD32_SYS_freebsd32_ksem_init 404 #define FREEBSD32_SYS_freebsd32_ksem_open 405 #define FREEBSD32_SYS_ksem_unlink 406 #define FREEBSD32_SYS_ksem_getvalue 407 #define FREEBSD32_SYS_ksem_destroy 408 #define FREEBSD32_SYS_extattr_set_link 412 #define FREEBSD32_SYS_extattr_get_link 413 #define FREEBSD32_SYS_extattr_delete_link 414 #define FREEBSD32_SYS_freebsd32_sigaction 416 #define FREEBSD32_SYS_freebsd32_sigreturn 417 #define FREEBSD32_SYS_freebsd32_getcontext 421 #define FREEBSD32_SYS_freebsd32_setcontext 422 #define FREEBSD32_SYS_freebsd32_swapcontext 423 #define FREEBSD32_SYS___acl_get_link 425 #define FREEBSD32_SYS___acl_set_link 426 #define FREEBSD32_SYS___acl_delete_link 427 #define FREEBSD32_SYS___acl_aclcheck_link 428 #define FREEBSD32_SYS_sigwait 429 #define FREEBSD32_SYS_thr_exit 431 #define FREEBSD32_SYS_thr_self 432 #define FREEBSD32_SYS_thr_kill 433 #define FREEBSD32_SYS_jail_attach 436 #define FREEBSD32_SYS_extattr_list_fd 437 #define FREEBSD32_SYS_extattr_list_file 438 #define FREEBSD32_SYS_extattr_list_link 439 + /* 440 is obsolete kse_switchin */ #define FREEBSD32_SYS_freebsd32_ksem_timedwait 441 #define FREEBSD32_SYS_freebsd32_thr_suspend 442 #define FREEBSD32_SYS_thr_wake 443 #define FREEBSD32_SYS_kldunloadf 444 #define FREEBSD32_SYS_audit 445 #define FREEBSD32_SYS_auditon 446 #define FREEBSD32_SYS_getauid 447 #define FREEBSD32_SYS_setauid 448 #define FREEBSD32_SYS_getaudit 449 #define FREEBSD32_SYS_setaudit 450 #define FREEBSD32_SYS_getaudit_addr 451 #define FREEBSD32_SYS_setaudit_addr 452 #define FREEBSD32_SYS_auditctl 453 #define FREEBSD32_SYS_freebsd32_umtx_op 454 #define FREEBSD32_SYS_freebsd32_thr_new 455 #define FREEBSD32_SYS_freebsd32_sigqueue 456 #define FREEBSD32_SYS_freebsd32_kmq_open 457 #define FREEBSD32_SYS_freebsd32_kmq_setattr 458 #define FREEBSD32_SYS_freebsd32_kmq_timedreceive 459 #define FREEBSD32_SYS_freebsd32_kmq_timedsend 460 #define FREEBSD32_SYS_freebsd32_kmq_notify 461 #define FREEBSD32_SYS_kmq_unlink 462 #define FREEBSD32_SYS_abort2 463 #define FREEBSD32_SYS_thr_set_name 464 #define FREEBSD32_SYS_freebsd32_aio_fsync 465 #define FREEBSD32_SYS_rtprio_thread 466 #define FREEBSD32_SYS_sctp_peeloff 471 #define FREEBSD32_SYS_sctp_generic_sendmsg 472 #define FREEBSD32_SYS_sctp_generic_sendmsg_iov 473 #define FREEBSD32_SYS_sctp_generic_recvmsg 474 #define FREEBSD32_SYS_freebsd32_pread 475 #define FREEBSD32_SYS_freebsd32_pwrite 476 #define FREEBSD32_SYS_freebsd32_mmap 477 #define FREEBSD32_SYS_freebsd32_lseek 478 #define FREEBSD32_SYS_freebsd32_truncate 479 #define FREEBSD32_SYS_freebsd32_ftruncate 480 #define FREEBSD32_SYS_freebsd32_pread 475 #define FREEBSD32_SYS_freebsd32_pwrite 476 #define FREEBSD32_SYS_freebsd32_mmap 477 #define FREEBSD32_SYS_freebsd32_lseek 478 #define FREEBSD32_SYS_freebsd32_truncate 479 #define FREEBSD32_SYS_freebsd32_ftruncate 480 #define FREEBSD32_SYS_thr_kill2 481 #define FREEBSD32_SYS_shm_open 482 #define FREEBSD32_SYS_shm_unlink 483 #define FREEBSD32_SYS_cpuset 484 #define FREEBSD32_SYS_freebsd32_cpuset_setid 485 #define FREEBSD32_SYS_freebsd32_cpuset_setid 485 #define FREEBSD32_SYS_freebsd32_cpuset_getid 486 #define FREEBSD32_SYS_freebsd32_cpuset_getaffinity 487 #define FREEBSD32_SYS_freebsd32_cpuset_setaffinity 488 #define FREEBSD32_SYS_faccessat 489 #define FREEBSD32_SYS_fchmodat 490 #define FREEBSD32_SYS_fchownat 491 #define FREEBSD32_SYS_freebsd32_fexecve 492 #define FREEBSD32_SYS_freebsd11_freebsd32_fstatat 493 #define FREEBSD32_SYS_freebsd32_futimesat 494 #define FREEBSD32_SYS_linkat 495 #define FREEBSD32_SYS_mkdirat 496 #define FREEBSD32_SYS_mkfifoat 497 #define FREEBSD32_SYS_freebsd11_freebsd32_mknodat 498 #define FREEBSD32_SYS_openat 499 #define FREEBSD32_SYS_readlinkat 500 #define FREEBSD32_SYS_renameat 501 #define FREEBSD32_SYS_symlinkat 502 #define FREEBSD32_SYS_unlinkat 503 #define FREEBSD32_SYS_posix_openpt 504 #define FREEBSD32_SYS_freebsd32_jail_get 506 #define FREEBSD32_SYS_freebsd32_jail_set 507 #define FREEBSD32_SYS_jail_remove 508 #define FREEBSD32_SYS_closefrom 509 #define FREEBSD32_SYS_freebsd32_semctl 510 #define FREEBSD32_SYS_freebsd32_msgctl 511 #define FREEBSD32_SYS_freebsd32_shmctl 512 #define FREEBSD32_SYS_lpathconf 513 /* 514 is obsolete cap_new */ #define FREEBSD32_SYS___cap_rights_get 515 #define FREEBSD32_SYS_cap_enter 516 #define FREEBSD32_SYS_cap_getmode 517 #define FREEBSD32_SYS_pdfork 518 #define FREEBSD32_SYS_pdkill 519 #define FREEBSD32_SYS_pdgetpid 520 #define FREEBSD32_SYS_freebsd32_pselect 522 #define FREEBSD32_SYS_getloginclass 523 #define FREEBSD32_SYS_setloginclass 524 #define FREEBSD32_SYS_rctl_get_racct 525 #define FREEBSD32_SYS_rctl_get_rules 526 #define FREEBSD32_SYS_rctl_get_limits 527 #define FREEBSD32_SYS_rctl_add_rule 528 #define FREEBSD32_SYS_rctl_remove_rule 529 #define FREEBSD32_SYS_freebsd32_posix_fallocate 530 #define FREEBSD32_SYS_freebsd32_posix_fadvise 531 #define FREEBSD32_SYS_freebsd32_wait6 532 #define FREEBSD32_SYS_freebsd32_posix_fallocate 530 #define FREEBSD32_SYS_freebsd32_posix_fadvise 531 #define FREEBSD32_SYS_freebsd32_wait6 532 #define FREEBSD32_SYS_cap_rights_limit 533 #define FREEBSD32_SYS_freebsd32_cap_ioctls_limit 534 #define FREEBSD32_SYS_freebsd32_cap_ioctls_get 535 #define FREEBSD32_SYS_cap_fcntls_limit 536 #define FREEBSD32_SYS_cap_fcntls_get 537 #define FREEBSD32_SYS_bindat 538 #define FREEBSD32_SYS_connectat 539 #define FREEBSD32_SYS_chflagsat 540 #define FREEBSD32_SYS_accept4 541 #define FREEBSD32_SYS_pipe2 542 #define FREEBSD32_SYS_freebsd32_aio_mlock 543 #define FREEBSD32_SYS_freebsd32_procctl 544 #define FREEBSD32_SYS_freebsd32_procctl 544 #define FREEBSD32_SYS_freebsd32_ppoll 545 #define FREEBSD32_SYS_freebsd32_futimens 546 #define FREEBSD32_SYS_freebsd32_utimensat 547 + /* 548 is obsolete numa_getaffinity */ + /* 549 is obsolete numa_setaffinity */ #define FREEBSD32_SYS_fdatasync 550 #define FREEBSD32_SYS_freebsd32_fstat 551 #define FREEBSD32_SYS_freebsd32_fstatat 552 #define FREEBSD32_SYS_freebsd32_fhstat 553 #define FREEBSD32_SYS_getdirentries 554 #define FREEBSD32_SYS_statfs 555 #define FREEBSD32_SYS_fstatfs 556 #define FREEBSD32_SYS_getfsstat 557 #define FREEBSD32_SYS_fhstatfs 558 #define FREEBSD32_SYS_mknodat 559 #define FREEBSD32_SYS_freebsd32_kevent 560 #define FREEBSD32_SYS_freebsd32_cpuset_getdomain 561 #define FREEBSD32_SYS_freebsd32_cpuset_setdomain 562 #define FREEBSD32_SYS_getrandom 563 #define FREEBSD32_SYS_MAXSYSCALL 564 Index: projects/clang700-import/sys/compat/freebsd32/freebsd32_syscalls.c =================================================================== --- projects/clang700-import/sys/compat/freebsd32/freebsd32_syscalls.c (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/freebsd32_syscalls.c (revision 339015) @@ -1,599 +1,599 @@ /* * System call names. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ const char *freebsd32_syscallnames[] = { #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif "syscall", /* 0 = syscall */ "exit", /* 1 = exit */ "fork", /* 2 = fork */ "read", /* 3 = read */ "write", /* 4 = write */ "open", /* 5 = open */ "close", /* 6 = close */ "freebsd32_wait4", /* 7 = freebsd32_wait4 */ "obs_old", /* 8 = obsolete old creat */ "link", /* 9 = link */ "unlink", /* 10 = unlink */ "obs_execv", /* 11 = obsolete execv */ "chdir", /* 12 = chdir */ "fchdir", /* 13 = fchdir */ "compat11.freebsd32_mknod", /* 14 = freebsd11 freebsd32_mknod */ "chmod", /* 15 = chmod */ "chown", /* 16 = chown */ "break", /* 17 = break */ "compat4.freebsd32_getfsstat", /* 18 = freebsd4 freebsd32_getfsstat */ "compat.freebsd32_lseek", /* 19 = old freebsd32_lseek */ "getpid", /* 20 = getpid */ "mount", /* 21 = mount */ "unmount", /* 22 = unmount */ "setuid", /* 23 = setuid */ "getuid", /* 24 = getuid */ "geteuid", /* 25 = geteuid */ "ptrace", /* 26 = ptrace */ "freebsd32_recvmsg", /* 27 = freebsd32_recvmsg */ "freebsd32_sendmsg", /* 28 = freebsd32_sendmsg */ "freebsd32_recvfrom", /* 29 = freebsd32_recvfrom */ "accept", /* 30 = accept */ "getpeername", /* 31 = getpeername */ "getsockname", /* 32 = getsockname */ "access", /* 33 = access */ "chflags", /* 34 = chflags */ "fchflags", /* 35 = fchflags */ "sync", /* 36 = sync */ "kill", /* 37 = kill */ "compat.freebsd32_stat", /* 38 = old freebsd32_stat */ "getppid", /* 39 = getppid */ "compat.freebsd32_lstat", /* 40 = old freebsd32_lstat */ "dup", /* 41 = dup */ "compat10.freebsd32_pipe", /* 42 = freebsd10 freebsd32_pipe */ "getegid", /* 43 = getegid */ "profil", /* 44 = profil */ "ktrace", /* 45 = ktrace */ "compat.freebsd32_sigaction", /* 46 = old freebsd32_sigaction */ "getgid", /* 47 = getgid */ "compat.freebsd32_sigprocmask", /* 48 = old freebsd32_sigprocmask */ "getlogin", /* 49 = getlogin */ "setlogin", /* 50 = setlogin */ "acct", /* 51 = acct */ "compat.freebsd32_sigpending", /* 52 = old freebsd32_sigpending */ "freebsd32_sigaltstack", /* 53 = freebsd32_sigaltstack */ "freebsd32_ioctl", /* 54 = freebsd32_ioctl */ "reboot", /* 55 = reboot */ "revoke", /* 56 = revoke */ "symlink", /* 57 = symlink */ "readlink", /* 58 = readlink */ "freebsd32_execve", /* 59 = freebsd32_execve */ "umask", /* 60 = umask */ "chroot", /* 61 = chroot */ "compat.freebsd32_fstat", /* 62 = old freebsd32_fstat */ "obs_ogetkerninfo", /* 63 = obsolete ogetkerninfo */ "compat.freebsd32_getpagesize", /* 64 = old freebsd32_getpagesize */ "msync", /* 65 = msync */ "vfork", /* 66 = vfork */ "obs_vread", /* 67 = obsolete vread */ "obs_vwrite", /* 68 = obsolete vwrite */ "sbrk", /* 69 = sbrk */ "sstk", /* 70 = sstk */ "compat.mmap", /* 71 = old mmap */ "compat11.vadvise", /* 72 = freebsd11 vadvise */ "munmap", /* 73 = munmap */ "freebsd32_mprotect", /* 74 = freebsd32_mprotect */ "madvise", /* 75 = madvise */ "obs_vhangup", /* 76 = obsolete vhangup */ "obs_vlimit", /* 77 = obsolete vlimit */ "mincore", /* 78 = mincore */ "getgroups", /* 79 = getgroups */ "setgroups", /* 80 = setgroups */ "getpgrp", /* 81 = getpgrp */ "setpgid", /* 82 = setpgid */ "freebsd32_setitimer", /* 83 = freebsd32_setitimer */ "obs_owait", /* 84 = obsolete owait */ "swapon", /* 85 = swapon */ "freebsd32_getitimer", /* 86 = freebsd32_getitimer */ "obs_ogethostname", /* 87 = obsolete ogethostname */ "obs_osethostname", /* 88 = obsolete osethostname */ "getdtablesize", /* 89 = getdtablesize */ "dup2", /* 90 = dup2 */ "#91", /* 91 = getdopt */ "freebsd32_fcntl", /* 92 = freebsd32_fcntl */ "freebsd32_select", /* 93 = freebsd32_select */ "#94", /* 94 = setdopt */ "fsync", /* 95 = fsync */ "setpriority", /* 96 = setpriority */ "socket", /* 97 = socket */ "connect", /* 98 = connect */ "obs_oaccept", /* 99 = obsolete oaccept */ "getpriority", /* 100 = getpriority */ "obs_osend", /* 101 = obsolete osend */ "obs_orecv", /* 102 = obsolete orecv */ "compat.freebsd32_sigreturn", /* 103 = old freebsd32_sigreturn */ "bind", /* 104 = bind */ "setsockopt", /* 105 = setsockopt */ "listen", /* 106 = listen */ "obs_vtimes", /* 107 = obsolete vtimes */ "compat.freebsd32_sigvec", /* 108 = old freebsd32_sigvec */ "compat.freebsd32_sigblock", /* 109 = old freebsd32_sigblock */ "compat.freebsd32_sigsetmask", /* 110 = old freebsd32_sigsetmask */ "compat.freebsd32_sigsuspend", /* 111 = old freebsd32_sigsuspend */ "compat.freebsd32_sigstack", /* 112 = old freebsd32_sigstack */ "obs_orecvmsg", /* 113 = obsolete orecvmsg */ "obs_osendmsg", /* 114 = obsolete osendmsg */ "obs_vtrace", /* 115 = obsolete vtrace */ "freebsd32_gettimeofday", /* 116 = freebsd32_gettimeofday */ "freebsd32_getrusage", /* 117 = freebsd32_getrusage */ "getsockopt", /* 118 = getsockopt */ "#119", /* 119 = resuba */ "freebsd32_readv", /* 120 = freebsd32_readv */ "freebsd32_writev", /* 121 = freebsd32_writev */ "freebsd32_settimeofday", /* 122 = freebsd32_settimeofday */ "fchown", /* 123 = fchown */ "fchmod", /* 124 = fchmod */ "obs_orecvfrom", /* 125 = obsolete orecvfrom */ "setreuid", /* 126 = setreuid */ "setregid", /* 127 = setregid */ "rename", /* 128 = rename */ "compat.truncate", /* 129 = old truncate */ "compat.ftruncate", /* 130 = old ftruncate */ "flock", /* 131 = flock */ "mkfifo", /* 132 = mkfifo */ "sendto", /* 133 = sendto */ "shutdown", /* 134 = shutdown */ "socketpair", /* 135 = socketpair */ "mkdir", /* 136 = mkdir */ "rmdir", /* 137 = rmdir */ "freebsd32_utimes", /* 138 = freebsd32_utimes */ "obs_4.2", /* 139 = obsolete 4.2 sigreturn */ "freebsd32_adjtime", /* 140 = freebsd32_adjtime */ "obs_ogetpeername", /* 141 = obsolete ogetpeername */ "obs_ogethostid", /* 142 = obsolete ogethostid */ "obs_sethostid", /* 143 = obsolete sethostid */ "obs_getrlimit", /* 144 = obsolete getrlimit */ "obs_setrlimit", /* 145 = obsolete setrlimit */ "obs_killpg", /* 146 = obsolete killpg */ "setsid", /* 147 = setsid */ "quotactl", /* 148 = quotactl */ "obs_oquota", /* 149 = obsolete oquota */ "obs_ogetsockname", /* 150 = obsolete ogetsockname */ "#151", /* 151 = sem_lock */ "#152", /* 152 = sem_wakeup */ "#153", /* 153 = asyncdaemon */ "#154", /* 154 = nlm_syscall */ "#155", /* 155 = nfssvc */ "compat.freebsd32_getdirentries", /* 156 = old freebsd32_getdirentries */ "compat4.freebsd32_statfs", /* 157 = freebsd4 freebsd32_statfs */ "compat4.freebsd32_fstatfs", /* 158 = freebsd4 freebsd32_fstatfs */ "#159", /* 159 = nosys */ "#160", /* 160 = lgetfh */ "getfh", /* 161 = getfh */ "obs_getdomainname", /* 162 = obsolete getdomainname */ "obs_setdomainname", /* 163 = obsolete setdomainname */ "obs_uname", /* 164 = obsolete uname */ "freebsd32_sysarch", /* 165 = freebsd32_sysarch */ "rtprio", /* 166 = rtprio */ "#167", /* 167 = nosys */ "#168", /* 168 = nosys */ "freebsd32_semsys", /* 169 = freebsd32_semsys */ "freebsd32_msgsys", /* 170 = freebsd32_msgsys */ "freebsd32_shmsys", /* 171 = freebsd32_shmsys */ "#172", /* 172 = nosys */ "compat6.freebsd32_pread", /* 173 = freebsd6 freebsd32_pread */ "compat6.freebsd32_pwrite", /* 174 = freebsd6 freebsd32_pwrite */ "#175", /* 175 = nosys */ "ntp_adjtime", /* 176 = ntp_adjtime */ "#177", /* 177 = sfork */ "#178", /* 178 = getdescriptor */ "#179", /* 179 = setdescriptor */ "#180", /* 180 = nosys */ "setgid", /* 181 = setgid */ "setegid", /* 182 = setegid */ "seteuid", /* 183 = seteuid */ - "#184", /* 184 = lfs_bmapv */ - "#185", /* 185 = lfs_markv */ - "#186", /* 186 = lfs_segclean */ - "#187", /* 187 = lfs_segwait */ + "obs_lfs_bmapv", /* 184 = obsolete lfs_bmapv */ + "obs_lfs_markv", /* 185 = obsolete lfs_markv */ + "obs_lfs_segclean", /* 186 = obsolete lfs_segclean */ + "obs_lfs_segwait", /* 187 = obsolete lfs_segwait */ "compat11.freebsd32_stat", /* 188 = freebsd11 freebsd32_stat */ "compat11.freebsd32_fstat", /* 189 = freebsd11 freebsd32_fstat */ "compat11.freebsd32_lstat", /* 190 = freebsd11 freebsd32_lstat */ "pathconf", /* 191 = pathconf */ "fpathconf", /* 192 = fpathconf */ "#193", /* 193 = nosys */ "getrlimit", /* 194 = getrlimit */ "setrlimit", /* 195 = setrlimit */ "compat11.freebsd32_getdirentries", /* 196 = freebsd11 freebsd32_getdirentries */ "compat6.freebsd32_mmap", /* 197 = freebsd6 freebsd32_mmap */ "__syscall", /* 198 = __syscall */ "compat6.freebsd32_lseek", /* 199 = freebsd6 freebsd32_lseek */ "compat6.freebsd32_truncate", /* 200 = freebsd6 freebsd32_truncate */ "compat6.freebsd32_ftruncate", /* 201 = freebsd6 freebsd32_ftruncate */ "freebsd32_sysctl", /* 202 = freebsd32_sysctl */ "mlock", /* 203 = mlock */ "munlock", /* 204 = munlock */ "undelete", /* 205 = undelete */ "freebsd32_futimes", /* 206 = freebsd32_futimes */ "getpgid", /* 207 = getpgid */ "#208", /* 208 = nosys */ "poll", /* 209 = poll */ "lkmnosys", /* 210 = lkmnosys */ "lkmnosys", /* 211 = lkmnosys */ "lkmnosys", /* 212 = lkmnosys */ "lkmnosys", /* 213 = lkmnosys */ "lkmnosys", /* 214 = lkmnosys */ "lkmnosys", /* 215 = lkmnosys */ "lkmnosys", /* 216 = lkmnosys */ "lkmnosys", /* 217 = lkmnosys */ "lkmnosys", /* 218 = lkmnosys */ "lkmnosys", /* 219 = lkmnosys */ "compat7.freebsd32_semctl", /* 220 = freebsd7 freebsd32_semctl */ "semget", /* 221 = semget */ "semop", /* 222 = semop */ - "#223", /* 223 = semconfig */ + "obs_semconfig", /* 223 = obsolete semconfig */ "compat7.freebsd32_msgctl", /* 224 = freebsd7 freebsd32_msgctl */ "msgget", /* 225 = msgget */ "freebsd32_msgsnd", /* 226 = freebsd32_msgsnd */ "freebsd32_msgrcv", /* 227 = freebsd32_msgrcv */ "shmat", /* 228 = shmat */ "compat7.freebsd32_shmctl", /* 229 = freebsd7 freebsd32_shmctl */ "shmdt", /* 230 = shmdt */ "shmget", /* 231 = shmget */ "freebsd32_clock_gettime", /* 232 = freebsd32_clock_gettime */ "freebsd32_clock_settime", /* 233 = freebsd32_clock_settime */ "freebsd32_clock_getres", /* 234 = freebsd32_clock_getres */ "freebsd32_ktimer_create", /* 235 = freebsd32_ktimer_create */ "ktimer_delete", /* 236 = ktimer_delete */ "freebsd32_ktimer_settime", /* 237 = freebsd32_ktimer_settime */ "freebsd32_ktimer_gettime", /* 238 = freebsd32_ktimer_gettime */ "ktimer_getoverrun", /* 239 = ktimer_getoverrun */ "freebsd32_nanosleep", /* 240 = freebsd32_nanosleep */ "ffclock_getcounter", /* 241 = ffclock_getcounter */ "ffclock_setestimate", /* 242 = ffclock_setestimate */ "ffclock_getestimate", /* 243 = ffclock_getestimate */ "freebsd32_clock_nanosleep", /* 244 = freebsd32_clock_nanosleep */ "#245", /* 245 = nosys */ "#246", /* 246 = nosys */ "freebsd32_clock_getcpuclockid2", /* 247 = freebsd32_clock_getcpuclockid2 */ "#248", /* 248 = ntp_gettime */ "#249", /* 249 = nosys */ "minherit", /* 250 = minherit */ "rfork", /* 251 = rfork */ "obs_openbsd_poll", /* 252 = obsolete openbsd_poll */ "issetugid", /* 253 = issetugid */ "lchown", /* 254 = lchown */ "freebsd32_aio_read", /* 255 = freebsd32_aio_read */ "freebsd32_aio_write", /* 256 = freebsd32_aio_write */ "freebsd32_lio_listio", /* 257 = freebsd32_lio_listio */ "#258", /* 258 = nosys */ "#259", /* 259 = nosys */ "#260", /* 260 = nosys */ "#261", /* 261 = nosys */ "#262", /* 262 = nosys */ "#263", /* 263 = nosys */ "#264", /* 264 = nosys */ "#265", /* 265 = nosys */ "#266", /* 266 = nosys */ "#267", /* 267 = nosys */ "#268", /* 268 = nosys */ "#269", /* 269 = nosys */ "#270", /* 270 = nosys */ "#271", /* 271 = nosys */ "compat11.freebsd32_getdents", /* 272 = freebsd11 freebsd32_getdents */ "#273", /* 273 = nosys */ "lchmod", /* 274 = lchmod */ "obs_netbsd_lchown", /* 275 = obsolete netbsd_lchown */ "freebsd32_lutimes", /* 276 = freebsd32_lutimes */ "obs_netbsd_msync", /* 277 = obsolete netbsd_msync */ "compat11.nstat", /* 278 = freebsd11 nstat */ "compat11.nfstat", /* 279 = freebsd11 nfstat */ "compat11.nlstat", /* 280 = freebsd11 nlstat */ "#281", /* 281 = nosys */ "#282", /* 282 = nosys */ "#283", /* 283 = nosys */ "#284", /* 284 = nosys */ "#285", /* 285 = nosys */ "#286", /* 286 = nosys */ "#287", /* 287 = nosys */ "#288", /* 288 = nosys */ "freebsd32_preadv", /* 289 = freebsd32_preadv */ "freebsd32_pwritev", /* 290 = freebsd32_pwritev */ "#291", /* 291 = nosys */ "#292", /* 292 = nosys */ "#293", /* 293 = nosys */ "#294", /* 294 = nosys */ "#295", /* 295 = nosys */ "#296", /* 296 = nosys */ "compat4.freebsd32_fhstatfs", /* 297 = freebsd4 freebsd32_fhstatfs */ "fhopen", /* 298 = fhopen */ "compat11.freebsd32_fhstat", /* 299 = freebsd11 freebsd32_fhstat */ "modnext", /* 300 = modnext */ "freebsd32_modstat", /* 301 = freebsd32_modstat */ "modfnext", /* 302 = modfnext */ "modfind", /* 303 = modfind */ "kldload", /* 304 = kldload */ "kldunload", /* 305 = kldunload */ "kldfind", /* 306 = kldfind */ "kldnext", /* 307 = kldnext */ "freebsd32_kldstat", /* 308 = freebsd32_kldstat */ "kldfirstmod", /* 309 = kldfirstmod */ "getsid", /* 310 = getsid */ "setresuid", /* 311 = setresuid */ "setresgid", /* 312 = setresgid */ "obs_signanosleep", /* 313 = obsolete signanosleep */ "freebsd32_aio_return", /* 314 = freebsd32_aio_return */ "freebsd32_aio_suspend", /* 315 = freebsd32_aio_suspend */ "aio_cancel", /* 316 = aio_cancel */ "freebsd32_aio_error", /* 317 = freebsd32_aio_error */ "compat6.freebsd32_aio_read", /* 318 = freebsd6 freebsd32_aio_read */ "compat6.freebsd32_aio_write", /* 319 = freebsd6 freebsd32_aio_write */ "compat6.freebsd32_lio_listio", /* 320 = freebsd6 freebsd32_lio_listio */ "yield", /* 321 = yield */ "obs_thr_sleep", /* 322 = obsolete thr_sleep */ "obs_thr_wakeup", /* 323 = obsolete thr_wakeup */ "mlockall", /* 324 = mlockall */ "munlockall", /* 325 = munlockall */ "__getcwd", /* 326 = __getcwd */ "sched_setparam", /* 327 = sched_setparam */ "sched_getparam", /* 328 = sched_getparam */ "sched_setscheduler", /* 329 = sched_setscheduler */ "sched_getscheduler", /* 330 = sched_getscheduler */ "sched_yield", /* 331 = sched_yield */ "sched_get_priority_max", /* 332 = sched_get_priority_max */ "sched_get_priority_min", /* 333 = sched_get_priority_min */ "freebsd32_sched_rr_get_interval", /* 334 = freebsd32_sched_rr_get_interval */ "utrace", /* 335 = utrace */ "compat4.freebsd32_sendfile", /* 336 = freebsd4 freebsd32_sendfile */ "kldsym", /* 337 = kldsym */ "freebsd32_jail", /* 338 = freebsd32_jail */ "#339", /* 339 = pioctl */ "sigprocmask", /* 340 = sigprocmask */ "sigsuspend", /* 341 = sigsuspend */ "compat4.freebsd32_sigaction", /* 342 = freebsd4 freebsd32_sigaction */ "sigpending", /* 343 = sigpending */ "compat4.freebsd32_sigreturn", /* 344 = freebsd4 freebsd32_sigreturn */ "freebsd32_sigtimedwait", /* 345 = freebsd32_sigtimedwait */ "freebsd32_sigwaitinfo", /* 346 = freebsd32_sigwaitinfo */ "__acl_get_file", /* 347 = __acl_get_file */ "__acl_set_file", /* 348 = __acl_set_file */ "__acl_get_fd", /* 349 = __acl_get_fd */ "__acl_set_fd", /* 350 = __acl_set_fd */ "__acl_delete_file", /* 351 = __acl_delete_file */ "__acl_delete_fd", /* 352 = __acl_delete_fd */ "__acl_aclcheck_file", /* 353 = __acl_aclcheck_file */ "__acl_aclcheck_fd", /* 354 = __acl_aclcheck_fd */ "extattrctl", /* 355 = extattrctl */ "extattr_set_file", /* 356 = extattr_set_file */ "extattr_get_file", /* 357 = extattr_get_file */ "extattr_delete_file", /* 358 = extattr_delete_file */ "freebsd32_aio_waitcomplete", /* 359 = freebsd32_aio_waitcomplete */ "getresuid", /* 360 = getresuid */ "getresgid", /* 361 = getresgid */ "kqueue", /* 362 = kqueue */ "compat11.freebsd32_kevent", /* 363 = freebsd11 freebsd32_kevent */ - "#364", /* 364 = __cap_get_proc */ - "#365", /* 365 = __cap_set_proc */ - "#366", /* 366 = __cap_get_fd */ - "#367", /* 367 = __cap_get_file */ - "#368", /* 368 = __cap_set_fd */ - "#369", /* 369 = __cap_set_file */ + "obs___cap_get_proc", /* 364 = obsolete __cap_get_proc */ + "obs___cap_set_proc", /* 365 = obsolete __cap_set_proc */ + "obs___cap_get_fd", /* 366 = obsolete __cap_get_fd */ + "obs___cap_get_file", /* 367 = obsolete __cap_get_file */ + "obs___cap_set_fd", /* 368 = obsolete __cap_set_fd */ + "obs___cap_set_file", /* 369 = obsolete __cap_set_file */ "#370", /* 370 = nosys */ "extattr_set_fd", /* 371 = extattr_set_fd */ "extattr_get_fd", /* 372 = extattr_get_fd */ "extattr_delete_fd", /* 373 = extattr_delete_fd */ "__setugid", /* 374 = __setugid */ - "#375", /* 375 = nfsclnt */ + "obs_nfsclnt", /* 375 = obsolete nfsclnt */ "eaccess", /* 376 = eaccess */ "#377", /* 377 = afs_syscall */ "freebsd32_nmount", /* 378 = freebsd32_nmount */ - "#379", /* 379 = kse_exit */ - "#380", /* 380 = kse_wakeup */ - "#381", /* 381 = kse_create */ - "#382", /* 382 = kse_thr_interrupt */ - "#383", /* 383 = kse_release */ + "obs_kse_exit", /* 379 = obsolete kse_exit */ + "obs_kse_wakeup", /* 380 = obsolete kse_wakeup */ + "obs_kse_create", /* 381 = obsolete kse_create */ + "obs_kse_thr_interrupt", /* 382 = obsolete kse_thr_interrupt */ + "obs_kse_release", /* 383 = obsolete kse_release */ "#384", /* 384 = __mac_get_proc */ "#385", /* 385 = __mac_set_proc */ "#386", /* 386 = __mac_get_fd */ "#387", /* 387 = __mac_get_file */ "#388", /* 388 = __mac_set_fd */ "#389", /* 389 = __mac_set_file */ "kenv", /* 390 = kenv */ "lchflags", /* 391 = lchflags */ "uuidgen", /* 392 = uuidgen */ "freebsd32_sendfile", /* 393 = freebsd32_sendfile */ "#394", /* 394 = mac_syscall */ "compat11.getfsstat", /* 395 = freebsd11 getfsstat */ "compat11.statfs", /* 396 = freebsd11 statfs */ "compat11.fstatfs", /* 397 = freebsd11 fstatfs */ "compat11.fhstatfs", /* 398 = freebsd11 fhstatfs */ "#399", /* 399 = nosys */ "ksem_close", /* 400 = ksem_close */ "ksem_post", /* 401 = ksem_post */ "ksem_wait", /* 402 = ksem_wait */ "ksem_trywait", /* 403 = ksem_trywait */ "freebsd32_ksem_init", /* 404 = freebsd32_ksem_init */ "freebsd32_ksem_open", /* 405 = freebsd32_ksem_open */ "ksem_unlink", /* 406 = ksem_unlink */ "ksem_getvalue", /* 407 = ksem_getvalue */ "ksem_destroy", /* 408 = ksem_destroy */ "#409", /* 409 = __mac_get_pid */ "#410", /* 410 = __mac_get_link */ "#411", /* 411 = __mac_set_link */ "extattr_set_link", /* 412 = extattr_set_link */ "extattr_get_link", /* 413 = extattr_get_link */ "extattr_delete_link", /* 414 = extattr_delete_link */ "#415", /* 415 = __mac_execve */ "freebsd32_sigaction", /* 416 = freebsd32_sigaction */ "freebsd32_sigreturn", /* 417 = freebsd32_sigreturn */ "#418", /* 418 = __xstat */ "#419", /* 419 = __xfstat */ "#420", /* 420 = __xlstat */ "freebsd32_getcontext", /* 421 = freebsd32_getcontext */ "freebsd32_setcontext", /* 422 = freebsd32_setcontext */ "freebsd32_swapcontext", /* 423 = freebsd32_swapcontext */ "#424", /* 424 = swapoff */ "__acl_get_link", /* 425 = __acl_get_link */ "__acl_set_link", /* 426 = __acl_set_link */ "__acl_delete_link", /* 427 = __acl_delete_link */ "__acl_aclcheck_link", /* 428 = __acl_aclcheck_link */ "sigwait", /* 429 = sigwait */ "#430", /* 430 = thr_create; */ "thr_exit", /* 431 = thr_exit */ "thr_self", /* 432 = thr_self */ "thr_kill", /* 433 = thr_kill */ "#434", /* 434 = nosys */ "#435", /* 435 = nosys */ "jail_attach", /* 436 = jail_attach */ "extattr_list_fd", /* 437 = extattr_list_fd */ "extattr_list_file", /* 438 = extattr_list_file */ "extattr_list_link", /* 439 = extattr_list_link */ - "#440", /* 440 = kse_switchin */ + "obs_kse_switchin", /* 440 = obsolete kse_switchin */ "freebsd32_ksem_timedwait", /* 441 = freebsd32_ksem_timedwait */ "freebsd32_thr_suspend", /* 442 = freebsd32_thr_suspend */ "thr_wake", /* 443 = thr_wake */ "kldunloadf", /* 444 = kldunloadf */ "audit", /* 445 = audit */ "auditon", /* 446 = auditon */ "getauid", /* 447 = getauid */ "setauid", /* 448 = setauid */ "getaudit", /* 449 = getaudit */ "setaudit", /* 450 = setaudit */ "getaudit_addr", /* 451 = getaudit_addr */ "setaudit_addr", /* 452 = setaudit_addr */ "auditctl", /* 453 = auditctl */ "freebsd32_umtx_op", /* 454 = freebsd32_umtx_op */ "freebsd32_thr_new", /* 455 = freebsd32_thr_new */ "freebsd32_sigqueue", /* 456 = freebsd32_sigqueue */ "freebsd32_kmq_open", /* 457 = freebsd32_kmq_open */ "freebsd32_kmq_setattr", /* 458 = freebsd32_kmq_setattr */ "freebsd32_kmq_timedreceive", /* 459 = freebsd32_kmq_timedreceive */ "freebsd32_kmq_timedsend", /* 460 = freebsd32_kmq_timedsend */ "freebsd32_kmq_notify", /* 461 = freebsd32_kmq_notify */ "kmq_unlink", /* 462 = kmq_unlink */ "abort2", /* 463 = abort2 */ "thr_set_name", /* 464 = thr_set_name */ "freebsd32_aio_fsync", /* 465 = freebsd32_aio_fsync */ "rtprio_thread", /* 466 = rtprio_thread */ "#467", /* 467 = nosys */ "#468", /* 468 = nosys */ "#469", /* 469 = __getpath_fromfd */ "#470", /* 470 = __getpath_fromaddr */ "sctp_peeloff", /* 471 = sctp_peeloff */ "sctp_generic_sendmsg", /* 472 = sctp_generic_sendmsg */ "sctp_generic_sendmsg_iov", /* 473 = sctp_generic_sendmsg_iov */ "sctp_generic_recvmsg", /* 474 = sctp_generic_recvmsg */ #ifdef PAD64_REQUIRED "freebsd32_pread", /* 475 = freebsd32_pread */ "freebsd32_pwrite", /* 476 = freebsd32_pwrite */ "freebsd32_mmap", /* 477 = freebsd32_mmap */ "freebsd32_lseek", /* 478 = freebsd32_lseek */ "freebsd32_truncate", /* 479 = freebsd32_truncate */ "freebsd32_ftruncate", /* 480 = freebsd32_ftruncate */ #else "freebsd32_pread", /* 475 = freebsd32_pread */ "freebsd32_pwrite", /* 476 = freebsd32_pwrite */ "freebsd32_mmap", /* 477 = freebsd32_mmap */ "freebsd32_lseek", /* 478 = freebsd32_lseek */ "freebsd32_truncate", /* 479 = freebsd32_truncate */ "freebsd32_ftruncate", /* 480 = freebsd32_ftruncate */ #endif "thr_kill2", /* 481 = thr_kill2 */ "shm_open", /* 482 = shm_open */ "shm_unlink", /* 483 = shm_unlink */ "cpuset", /* 484 = cpuset */ #ifdef PAD64_REQUIRED "freebsd32_cpuset_setid", /* 485 = freebsd32_cpuset_setid */ #else "freebsd32_cpuset_setid", /* 485 = freebsd32_cpuset_setid */ #endif "freebsd32_cpuset_getid", /* 486 = freebsd32_cpuset_getid */ "freebsd32_cpuset_getaffinity", /* 487 = freebsd32_cpuset_getaffinity */ "freebsd32_cpuset_setaffinity", /* 488 = freebsd32_cpuset_setaffinity */ "faccessat", /* 489 = faccessat */ "fchmodat", /* 490 = fchmodat */ "fchownat", /* 491 = fchownat */ "freebsd32_fexecve", /* 492 = freebsd32_fexecve */ "compat11.freebsd32_fstatat", /* 493 = freebsd11 freebsd32_fstatat */ "freebsd32_futimesat", /* 494 = freebsd32_futimesat */ "linkat", /* 495 = linkat */ "mkdirat", /* 496 = mkdirat */ "mkfifoat", /* 497 = mkfifoat */ "compat11.freebsd32_mknodat", /* 498 = freebsd11 freebsd32_mknodat */ "openat", /* 499 = openat */ "readlinkat", /* 500 = readlinkat */ "renameat", /* 501 = renameat */ "symlinkat", /* 502 = symlinkat */ "unlinkat", /* 503 = unlinkat */ "posix_openpt", /* 504 = posix_openpt */ "#505", /* 505 = gssd_syscall */ "freebsd32_jail_get", /* 506 = freebsd32_jail_get */ "freebsd32_jail_set", /* 507 = freebsd32_jail_set */ "jail_remove", /* 508 = jail_remove */ "closefrom", /* 509 = closefrom */ "freebsd32_semctl", /* 510 = freebsd32_semctl */ "freebsd32_msgctl", /* 511 = freebsd32_msgctl */ "freebsd32_shmctl", /* 512 = freebsd32_shmctl */ "lpathconf", /* 513 = lpathconf */ "obs_cap_new", /* 514 = obsolete cap_new */ "__cap_rights_get", /* 515 = __cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "pdfork", /* 518 = pdfork */ "pdkill", /* 519 = pdkill */ "pdgetpid", /* 520 = pdgetpid */ "#521", /* 521 = pdwait4 */ "freebsd32_pselect", /* 522 = freebsd32_pselect */ "getloginclass", /* 523 = getloginclass */ "setloginclass", /* 524 = setloginclass */ "rctl_get_racct", /* 525 = rctl_get_racct */ "rctl_get_rules", /* 526 = rctl_get_rules */ "rctl_get_limits", /* 527 = rctl_get_limits */ "rctl_add_rule", /* 528 = rctl_add_rule */ "rctl_remove_rule", /* 529 = rctl_remove_rule */ #ifdef PAD64_REQUIRED "freebsd32_posix_fallocate", /* 530 = freebsd32_posix_fallocate */ "freebsd32_posix_fadvise", /* 531 = freebsd32_posix_fadvise */ "freebsd32_wait6", /* 532 = freebsd32_wait6 */ #else "freebsd32_posix_fallocate", /* 530 = freebsd32_posix_fallocate */ "freebsd32_posix_fadvise", /* 531 = freebsd32_posix_fadvise */ "freebsd32_wait6", /* 532 = freebsd32_wait6 */ #endif "cap_rights_limit", /* 533 = cap_rights_limit */ "freebsd32_cap_ioctls_limit", /* 534 = freebsd32_cap_ioctls_limit */ "freebsd32_cap_ioctls_get", /* 535 = freebsd32_cap_ioctls_get */ "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ "cap_fcntls_get", /* 537 = cap_fcntls_get */ "bindat", /* 538 = bindat */ "connectat", /* 539 = connectat */ "chflagsat", /* 540 = chflagsat */ "accept4", /* 541 = accept4 */ "pipe2", /* 542 = pipe2 */ "freebsd32_aio_mlock", /* 543 = freebsd32_aio_mlock */ #ifdef PAD64_REQUIRED "freebsd32_procctl", /* 544 = freebsd32_procctl */ #else "freebsd32_procctl", /* 544 = freebsd32_procctl */ #endif "freebsd32_ppoll", /* 545 = freebsd32_ppoll */ "freebsd32_futimens", /* 546 = freebsd32_futimens */ "freebsd32_utimensat", /* 547 = freebsd32_utimensat */ - "#548", /* 548 = numa_getaffinity */ - "#549", /* 549 = numa_setaffinity */ + "obs_numa_getaffinity", /* 548 = obsolete numa_getaffinity */ + "obs_numa_setaffinity", /* 549 = obsolete numa_setaffinity */ "fdatasync", /* 550 = fdatasync */ "freebsd32_fstat", /* 551 = freebsd32_fstat */ "freebsd32_fstatat", /* 552 = freebsd32_fstatat */ "freebsd32_fhstat", /* 553 = freebsd32_fhstat */ "getdirentries", /* 554 = getdirentries */ "statfs", /* 555 = statfs */ "fstatfs", /* 556 = fstatfs */ "getfsstat", /* 557 = getfsstat */ "fhstatfs", /* 558 = fhstatfs */ "mknodat", /* 559 = mknodat */ "freebsd32_kevent", /* 560 = freebsd32_kevent */ "freebsd32_cpuset_getdomain", /* 561 = freebsd32_cpuset_getdomain */ "freebsd32_cpuset_setdomain", /* 562 = freebsd32_cpuset_setdomain */ "getrandom", /* 563 = getrandom */ }; Index: projects/clang700-import/sys/compat/freebsd32/freebsd32_sysent.c =================================================================== --- projects/clang700-import/sys/compat/freebsd32/freebsd32_sysent.c (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/freebsd32_sysent.c (revision 339015) @@ -1,646 +1,646 @@ /* * System call switch table. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ #include #include #include #include #include #include #include #define AS(name) (sizeof(struct name) / sizeof(register_t)) #ifdef COMPAT_43 #define compat(n, name) n, (sy_call_t *)__CONCAT(o,name) #else #define compat(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD4 #define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name) #else #define compat4(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD6 #define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name) #else #define compat6(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD7 #define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name) #else #define compat7(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD10 #define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name) #else #define compat10(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD11 #define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name) #else #define compat11(n, name) 0, (sy_call_t *)nosys #endif /* The casts are bogus but will do for now. */ struct sysent freebsd32_sysent[] = { #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 0 = syscall */ { AS(sys_exit_args), (sy_call_t *)sys_sys_exit, AUE_EXIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 1 = exit */ { 0, (sy_call_t *)sys_fork, AUE_FORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 2 = fork */ { AS(read_args), (sy_call_t *)sys_read, AUE_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 3 = read */ { AS(write_args), (sy_call_t *)sys_write, AUE_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 4 = write */ { AS(open_args), (sy_call_t *)sys_open, AUE_OPEN_RWTC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 5 = open */ { AS(close_args), (sy_call_t *)sys_close, AUE_CLOSE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 6 = close */ { AS(freebsd32_wait4_args), (sy_call_t *)freebsd32_wait4, AUE_WAIT4, NULL, 0, 0, 0, SY_THR_STATIC }, /* 7 = freebsd32_wait4 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 8 = obsolete old creat */ { AS(link_args), (sy_call_t *)sys_link, AUE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 9 = link */ { AS(unlink_args), (sy_call_t *)sys_unlink, AUE_UNLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 10 = unlink */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 11 = obsolete execv */ { AS(chdir_args), (sy_call_t *)sys_chdir, AUE_CHDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 12 = chdir */ { AS(fchdir_args), (sy_call_t *)sys_fchdir, AUE_FCHDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 13 = fchdir */ { compat11(AS(freebsd11_freebsd32_mknod_args),freebsd32_mknod), AUE_MKNOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 14 = freebsd11 freebsd32_mknod */ { AS(chmod_args), (sy_call_t *)sys_chmod, AUE_CHMOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 15 = chmod */ { AS(chown_args), (sy_call_t *)sys_chown, AUE_CHOWN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 16 = chown */ { AS(break_args), (sy_call_t *)sys_break, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 17 = break */ { compat4(AS(freebsd4_freebsd32_getfsstat_args),freebsd32_getfsstat), AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 18 = freebsd4 freebsd32_getfsstat */ { compat(AS(ofreebsd32_lseek_args),freebsd32_lseek), AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 19 = old freebsd32_lseek */ { 0, (sy_call_t *)sys_getpid, AUE_GETPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 20 = getpid */ { AS(mount_args), (sy_call_t *)sys_mount, AUE_MOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 21 = mount */ { AS(unmount_args), (sy_call_t *)sys_unmount, AUE_UMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 22 = unmount */ { AS(setuid_args), (sy_call_t *)sys_setuid, AUE_SETUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 23 = setuid */ { 0, (sy_call_t *)sys_getuid, AUE_GETUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 24 = getuid */ { 0, (sy_call_t *)sys_geteuid, AUE_GETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 25 = geteuid */ { AS(ptrace_args), (sy_call_t *)sys_ptrace, AUE_PTRACE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 26 = ptrace */ { AS(freebsd32_recvmsg_args), (sy_call_t *)freebsd32_recvmsg, AUE_RECVMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 27 = freebsd32_recvmsg */ { AS(freebsd32_sendmsg_args), (sy_call_t *)freebsd32_sendmsg, AUE_SENDMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 28 = freebsd32_sendmsg */ { AS(freebsd32_recvfrom_args), (sy_call_t *)freebsd32_recvfrom, AUE_RECVFROM, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 29 = freebsd32_recvfrom */ { AS(accept_args), (sy_call_t *)sys_accept, AUE_ACCEPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 30 = accept */ { AS(getpeername_args), (sy_call_t *)sys_getpeername, AUE_GETPEERNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 31 = getpeername */ { AS(getsockname_args), (sy_call_t *)sys_getsockname, AUE_GETSOCKNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 32 = getsockname */ { AS(access_args), (sy_call_t *)sys_access, AUE_ACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 33 = access */ { AS(chflags_args), (sy_call_t *)sys_chflags, AUE_CHFLAGS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 34 = chflags */ { AS(fchflags_args), (sy_call_t *)sys_fchflags, AUE_FCHFLAGS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 35 = fchflags */ { 0, (sy_call_t *)sys_sync, AUE_SYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 36 = sync */ { AS(kill_args), (sy_call_t *)sys_kill, AUE_KILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 37 = kill */ { compat(AS(ofreebsd32_stat_args),freebsd32_stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 38 = old freebsd32_stat */ { 0, (sy_call_t *)sys_getppid, AUE_GETPPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 39 = getppid */ { compat(AS(ofreebsd32_lstat_args),freebsd32_lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 40 = old freebsd32_lstat */ { AS(dup_args), (sy_call_t *)sys_dup, AUE_DUP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 41 = dup */ { compat10(0,freebsd32_pipe), AUE_PIPE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 42 = freebsd10 freebsd32_pipe */ { 0, (sy_call_t *)sys_getegid, AUE_GETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 43 = getegid */ { AS(profil_args), (sy_call_t *)sys_profil, AUE_PROFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 44 = profil */ { AS(ktrace_args), (sy_call_t *)sys_ktrace, AUE_KTRACE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 45 = ktrace */ { compat(AS(ofreebsd32_sigaction_args),freebsd32_sigaction), AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 46 = old freebsd32_sigaction */ { 0, (sy_call_t *)sys_getgid, AUE_GETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 47 = getgid */ { compat(AS(ofreebsd32_sigprocmask_args),freebsd32_sigprocmask), AUE_SIGPROCMASK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 48 = old freebsd32_sigprocmask */ { AS(getlogin_args), (sy_call_t *)sys_getlogin, AUE_GETLOGIN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 49 = getlogin */ { AS(setlogin_args), (sy_call_t *)sys_setlogin, AUE_SETLOGIN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 50 = setlogin */ { AS(acct_args), (sy_call_t *)sys_acct, AUE_ACCT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 51 = acct */ { compat(0,freebsd32_sigpending), AUE_SIGPENDING, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 52 = old freebsd32_sigpending */ { AS(freebsd32_sigaltstack_args), (sy_call_t *)freebsd32_sigaltstack, AUE_SIGALTSTACK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 53 = freebsd32_sigaltstack */ { AS(freebsd32_ioctl_args), (sy_call_t *)freebsd32_ioctl, AUE_IOCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 54 = freebsd32_ioctl */ { AS(reboot_args), (sy_call_t *)sys_reboot, AUE_REBOOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 55 = reboot */ { AS(revoke_args), (sy_call_t *)sys_revoke, AUE_REVOKE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 56 = revoke */ { AS(symlink_args), (sy_call_t *)sys_symlink, AUE_SYMLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 57 = symlink */ { AS(readlink_args), (sy_call_t *)sys_readlink, AUE_READLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 58 = readlink */ { AS(freebsd32_execve_args), (sy_call_t *)freebsd32_execve, AUE_EXECVE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 59 = freebsd32_execve */ { AS(umask_args), (sy_call_t *)sys_umask, AUE_UMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 60 = umask */ { AS(chroot_args), (sy_call_t *)sys_chroot, AUE_CHROOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 61 = chroot */ { compat(AS(ofreebsd32_fstat_args),freebsd32_fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 62 = old freebsd32_fstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 63 = obsolete ogetkerninfo */ { compat(AS(ofreebsd32_getpagesize_args),freebsd32_getpagesize), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 64 = old freebsd32_getpagesize */ { AS(msync_args), (sy_call_t *)sys_msync, AUE_MSYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 65 = msync */ { 0, (sy_call_t *)sys_vfork, AUE_VFORK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 66 = vfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 67 = obsolete vread */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 68 = obsolete vwrite */ { AS(sbrk_args), (sy_call_t *)sys_sbrk, AUE_SBRK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 69 = sbrk */ { AS(sstk_args), (sy_call_t *)sys_sstk, AUE_SSTK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 70 = sstk */ { compat(AS(ommap_args),mmap), AUE_MMAP, NULL, 0, 0, 0, SY_THR_STATIC }, /* 71 = old mmap */ { compat11(AS(freebsd11_vadvise_args),vadvise), AUE_O_VADVISE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 72 = freebsd11 vadvise */ { AS(munmap_args), (sy_call_t *)sys_munmap, AUE_MUNMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 73 = munmap */ { AS(freebsd32_mprotect_args), (sy_call_t *)freebsd32_mprotect, AUE_MPROTECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 74 = freebsd32_mprotect */ { AS(madvise_args), (sy_call_t *)sys_madvise, AUE_MADVISE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 75 = madvise */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 76 = obsolete vhangup */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 77 = obsolete vlimit */ { AS(mincore_args), (sy_call_t *)sys_mincore, AUE_MINCORE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 78 = mincore */ { AS(getgroups_args), (sy_call_t *)sys_getgroups, AUE_GETGROUPS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 79 = getgroups */ { AS(setgroups_args), (sy_call_t *)sys_setgroups, AUE_SETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 80 = setgroups */ { 0, (sy_call_t *)sys_getpgrp, AUE_GETPGRP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 81 = getpgrp */ { AS(setpgid_args), (sy_call_t *)sys_setpgid, AUE_SETPGRP, NULL, 0, 0, 0, SY_THR_STATIC }, /* 82 = setpgid */ { AS(freebsd32_setitimer_args), (sy_call_t *)freebsd32_setitimer, AUE_SETITIMER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 83 = freebsd32_setitimer */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 84 = obsolete owait */ { AS(swapon_args), (sy_call_t *)sys_swapon, AUE_SWAPON, NULL, 0, 0, 0, SY_THR_STATIC }, /* 85 = swapon */ { AS(freebsd32_getitimer_args), (sy_call_t *)freebsd32_getitimer, AUE_GETITIMER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 86 = freebsd32_getitimer */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 87 = obsolete ogethostname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 88 = obsolete osethostname */ { 0, (sy_call_t *)sys_getdtablesize, AUE_GETDTABLESIZE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 89 = getdtablesize */ { AS(dup2_args), (sy_call_t *)sys_dup2, AUE_DUP2, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 90 = dup2 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 91 = getdopt */ { AS(freebsd32_fcntl_args), (sy_call_t *)freebsd32_fcntl, AUE_FCNTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 92 = freebsd32_fcntl */ { AS(freebsd32_select_args), (sy_call_t *)freebsd32_select, AUE_SELECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 93 = freebsd32_select */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 94 = setdopt */ { AS(fsync_args), (sy_call_t *)sys_fsync, AUE_FSYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 95 = fsync */ { AS(setpriority_args), (sy_call_t *)sys_setpriority, AUE_SETPRIORITY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 96 = setpriority */ { AS(socket_args), (sy_call_t *)sys_socket, AUE_SOCKET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 97 = socket */ { AS(connect_args), (sy_call_t *)sys_connect, AUE_CONNECT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 98 = connect */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 99 = obsolete oaccept */ { AS(getpriority_args), (sy_call_t *)sys_getpriority, AUE_GETPRIORITY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 100 = getpriority */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 101 = obsolete osend */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 102 = obsolete orecv */ { compat(AS(ofreebsd32_sigreturn_args),freebsd32_sigreturn), AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 103 = old freebsd32_sigreturn */ { AS(bind_args), (sy_call_t *)sys_bind, AUE_BIND, NULL, 0, 0, 0, SY_THR_STATIC }, /* 104 = bind */ { AS(setsockopt_args), (sy_call_t *)sys_setsockopt, AUE_SETSOCKOPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 105 = setsockopt */ { AS(listen_args), (sy_call_t *)sys_listen, AUE_LISTEN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 106 = listen */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 107 = obsolete vtimes */ { compat(AS(ofreebsd32_sigvec_args),freebsd32_sigvec), AUE_O_SIGVEC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 108 = old freebsd32_sigvec */ { compat(AS(ofreebsd32_sigblock_args),freebsd32_sigblock), AUE_O_SIGBLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 109 = old freebsd32_sigblock */ { compat(AS(ofreebsd32_sigsetmask_args),freebsd32_sigsetmask), AUE_O_SIGSETMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 110 = old freebsd32_sigsetmask */ { compat(AS(ofreebsd32_sigsuspend_args),freebsd32_sigsuspend), AUE_SIGSUSPEND, NULL, 0, 0, 0, SY_THR_STATIC }, /* 111 = old freebsd32_sigsuspend */ { compat(AS(ofreebsd32_sigstack_args),freebsd32_sigstack), AUE_O_SIGSTACK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 112 = old freebsd32_sigstack */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 113 = obsolete orecvmsg */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 114 = obsolete osendmsg */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 115 = obsolete vtrace */ { AS(freebsd32_gettimeofday_args), (sy_call_t *)freebsd32_gettimeofday, AUE_GETTIMEOFDAY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 116 = freebsd32_gettimeofday */ { AS(freebsd32_getrusage_args), (sy_call_t *)freebsd32_getrusage, AUE_GETRUSAGE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 117 = freebsd32_getrusage */ { AS(getsockopt_args), (sy_call_t *)sys_getsockopt, AUE_GETSOCKOPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 118 = getsockopt */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 119 = resuba */ { AS(freebsd32_readv_args), (sy_call_t *)freebsd32_readv, AUE_READV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 120 = freebsd32_readv */ { AS(freebsd32_writev_args), (sy_call_t *)freebsd32_writev, AUE_WRITEV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 121 = freebsd32_writev */ { AS(freebsd32_settimeofday_args), (sy_call_t *)freebsd32_settimeofday, AUE_SETTIMEOFDAY, NULL, 0, 0, 0, SY_THR_STATIC }, /* 122 = freebsd32_settimeofday */ { AS(fchown_args), (sy_call_t *)sys_fchown, AUE_FCHOWN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 123 = fchown */ { AS(fchmod_args), (sy_call_t *)sys_fchmod, AUE_FCHMOD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 124 = fchmod */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 125 = obsolete orecvfrom */ { AS(setreuid_args), (sy_call_t *)sys_setreuid, AUE_SETREUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 126 = setreuid */ { AS(setregid_args), (sy_call_t *)sys_setregid, AUE_SETREGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 127 = setregid */ { AS(rename_args), (sy_call_t *)sys_rename, AUE_RENAME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 128 = rename */ { compat(AS(otruncate_args),truncate), AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 129 = old truncate */ { compat(AS(oftruncate_args),ftruncate), AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 130 = old ftruncate */ { AS(flock_args), (sy_call_t *)sys_flock, AUE_FLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 131 = flock */ { AS(mkfifo_args), (sy_call_t *)sys_mkfifo, AUE_MKFIFO, NULL, 0, 0, 0, SY_THR_STATIC }, /* 132 = mkfifo */ { AS(sendto_args), (sy_call_t *)sys_sendto, AUE_SENDTO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 133 = sendto */ { AS(shutdown_args), (sy_call_t *)sys_shutdown, AUE_SHUTDOWN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 134 = shutdown */ { AS(socketpair_args), (sy_call_t *)sys_socketpair, AUE_SOCKETPAIR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 135 = socketpair */ { AS(mkdir_args), (sy_call_t *)sys_mkdir, AUE_MKDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 136 = mkdir */ { AS(rmdir_args), (sy_call_t *)sys_rmdir, AUE_RMDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 137 = rmdir */ { AS(freebsd32_utimes_args), (sy_call_t *)freebsd32_utimes, AUE_UTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 138 = freebsd32_utimes */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 139 = obsolete 4.2 sigreturn */ { AS(freebsd32_adjtime_args), (sy_call_t *)freebsd32_adjtime, AUE_ADJTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 140 = freebsd32_adjtime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 141 = obsolete ogetpeername */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 142 = obsolete ogethostid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 143 = obsolete sethostid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 144 = obsolete getrlimit */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 145 = obsolete setrlimit */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 146 = obsolete killpg */ { 0, (sy_call_t *)sys_setsid, AUE_SETSID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 147 = setsid */ { AS(quotactl_args), (sy_call_t *)sys_quotactl, AUE_QUOTACTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 148 = quotactl */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 149 = obsolete oquota */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 150 = obsolete ogetsockname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 151 = sem_lock */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 152 = sem_wakeup */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 153 = asyncdaemon */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 154 = nlm_syscall */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 155 = nfssvc */ { compat(AS(ofreebsd32_getdirentries_args),freebsd32_getdirentries), AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 156 = old freebsd32_getdirentries */ { compat4(AS(freebsd4_freebsd32_statfs_args),freebsd32_statfs), AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 157 = freebsd4 freebsd32_statfs */ { compat4(AS(freebsd4_freebsd32_fstatfs_args),freebsd32_fstatfs), AUE_FSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 158 = freebsd4 freebsd32_fstatfs */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 159 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 160 = lgetfh */ { AS(getfh_args), (sy_call_t *)sys_getfh, AUE_NFS_GETFH, NULL, 0, 0, 0, SY_THR_STATIC }, /* 161 = getfh */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 162 = obsolete getdomainname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 163 = obsolete setdomainname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 164 = obsolete uname */ { AS(freebsd32_sysarch_args), (sy_call_t *)freebsd32_sysarch, AUE_SYSARCH, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 165 = freebsd32_sysarch */ { AS(rtprio_args), (sy_call_t *)sys_rtprio, AUE_RTPRIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 166 = rtprio */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 167 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 168 = nosys */ { AS(freebsd32_semsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 169 = freebsd32_semsys */ { AS(freebsd32_msgsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 170 = freebsd32_msgsys */ { AS(freebsd32_shmsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 171 = freebsd32_shmsys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 172 = nosys */ { compat6(AS(freebsd6_freebsd32_pread_args),freebsd32_pread), AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 173 = freebsd6 freebsd32_pread */ { compat6(AS(freebsd6_freebsd32_pwrite_args),freebsd32_pwrite), AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 174 = freebsd6 freebsd32_pwrite */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 175 = nosys */ { AS(ntp_adjtime_args), (sy_call_t *)sys_ntp_adjtime, AUE_NTP_ADJTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 176 = ntp_adjtime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 177 = sfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 178 = getdescriptor */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 179 = setdescriptor */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 180 = nosys */ { AS(setgid_args), (sy_call_t *)sys_setgid, AUE_SETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 181 = setgid */ { AS(setegid_args), (sy_call_t *)sys_setegid, AUE_SETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 182 = setegid */ { AS(seteuid_args), (sy_call_t *)sys_seteuid, AUE_SETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 183 = seteuid */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = lfs_bmapv */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = lfs_markv */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = lfs_segclean */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = lfs_segwait */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = obsolete lfs_bmapv */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = obsolete lfs_markv */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = obsolete lfs_segclean */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = obsolete lfs_segwait */ { compat11(AS(freebsd11_freebsd32_stat_args),freebsd32_stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 188 = freebsd11 freebsd32_stat */ { compat11(AS(freebsd11_freebsd32_fstat_args),freebsd32_fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 189 = freebsd11 freebsd32_fstat */ { compat11(AS(freebsd11_freebsd32_lstat_args),freebsd32_lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 190 = freebsd11 freebsd32_lstat */ { AS(pathconf_args), (sy_call_t *)sys_pathconf, AUE_PATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 191 = pathconf */ { AS(fpathconf_args), (sy_call_t *)sys_fpathconf, AUE_FPATHCONF, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 192 = fpathconf */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 193 = nosys */ { AS(__getrlimit_args), (sy_call_t *)sys_getrlimit, AUE_GETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 194 = getrlimit */ { AS(__setrlimit_args), (sy_call_t *)sys_setrlimit, AUE_SETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 195 = setrlimit */ { compat11(AS(freebsd11_freebsd32_getdirentries_args),freebsd32_getdirentries), AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 196 = freebsd11 freebsd32_getdirentries */ { compat6(AS(freebsd6_freebsd32_mmap_args),freebsd32_mmap), AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 197 = freebsd6 freebsd32_mmap */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 198 = __syscall */ { compat6(AS(freebsd6_freebsd32_lseek_args),freebsd32_lseek), AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 199 = freebsd6 freebsd32_lseek */ { compat6(AS(freebsd6_freebsd32_truncate_args),freebsd32_truncate), AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 200 = freebsd6 freebsd32_truncate */ { compat6(AS(freebsd6_freebsd32_ftruncate_args),freebsd32_ftruncate), AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 201 = freebsd6 freebsd32_ftruncate */ { AS(freebsd32_sysctl_args), (sy_call_t *)freebsd32_sysctl, AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 202 = freebsd32_sysctl */ { AS(mlock_args), (sy_call_t *)sys_mlock, AUE_MLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 203 = mlock */ { AS(munlock_args), (sy_call_t *)sys_munlock, AUE_MUNLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 204 = munlock */ { AS(undelete_args), (sy_call_t *)sys_undelete, AUE_UNDELETE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 205 = undelete */ { AS(freebsd32_futimes_args), (sy_call_t *)freebsd32_futimes, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 206 = freebsd32_futimes */ { AS(getpgid_args), (sy_call_t *)sys_getpgid, AUE_GETPGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 207 = getpgid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 208 = nosys */ { AS(poll_args), (sy_call_t *)sys_poll, AUE_POLL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 209 = poll */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 210 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 211 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 212 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 213 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 214 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 215 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 216 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 217 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 218 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 219 = lkmnosys */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 220 = freebsd7 freebsd32_semctl */ { AS(semget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 221 = semget */ { AS(semop_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 222 = semop */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = semconfig */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = obsolete semconfig */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 224 = freebsd7 freebsd32_msgctl */ { AS(msgget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 225 = msgget */ { AS(freebsd32_msgsnd_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 226 = freebsd32_msgsnd */ { AS(freebsd32_msgrcv_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 227 = freebsd32_msgrcv */ { AS(shmat_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 228 = shmat */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 229 = freebsd7 freebsd32_shmctl */ { AS(shmdt_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 230 = shmdt */ { AS(shmget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 231 = shmget */ { AS(freebsd32_clock_gettime_args), (sy_call_t *)freebsd32_clock_gettime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 232 = freebsd32_clock_gettime */ { AS(freebsd32_clock_settime_args), (sy_call_t *)freebsd32_clock_settime, AUE_CLOCK_SETTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 233 = freebsd32_clock_settime */ { AS(freebsd32_clock_getres_args), (sy_call_t *)freebsd32_clock_getres, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 234 = freebsd32_clock_getres */ { AS(freebsd32_ktimer_create_args), (sy_call_t *)freebsd32_ktimer_create, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 235 = freebsd32_ktimer_create */ { AS(ktimer_delete_args), (sy_call_t *)sys_ktimer_delete, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 236 = ktimer_delete */ { AS(freebsd32_ktimer_settime_args), (sy_call_t *)freebsd32_ktimer_settime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 237 = freebsd32_ktimer_settime */ { AS(freebsd32_ktimer_gettime_args), (sy_call_t *)freebsd32_ktimer_gettime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 238 = freebsd32_ktimer_gettime */ { AS(ktimer_getoverrun_args), (sy_call_t *)sys_ktimer_getoverrun, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 239 = ktimer_getoverrun */ { AS(freebsd32_nanosleep_args), (sy_call_t *)freebsd32_nanosleep, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 240 = freebsd32_nanosleep */ { AS(ffclock_getcounter_args), (sy_call_t *)sys_ffclock_getcounter, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 241 = ffclock_getcounter */ { AS(ffclock_setestimate_args), (sy_call_t *)sys_ffclock_setestimate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 242 = ffclock_setestimate */ { AS(ffclock_getestimate_args), (sy_call_t *)sys_ffclock_getestimate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 243 = ffclock_getestimate */ { AS(freebsd32_clock_nanosleep_args), (sy_call_t *)freebsd32_clock_nanosleep, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 244 = freebsd32_clock_nanosleep */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 245 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 246 = nosys */ { AS(freebsd32_clock_getcpuclockid2_args), (sy_call_t *)freebsd32_clock_getcpuclockid2, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 247 = freebsd32_clock_getcpuclockid2 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 248 = ntp_gettime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 249 = nosys */ { AS(minherit_args), (sy_call_t *)sys_minherit, AUE_MINHERIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 250 = minherit */ { AS(rfork_args), (sy_call_t *)sys_rfork, AUE_RFORK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 251 = rfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 252 = obsolete openbsd_poll */ { 0, (sy_call_t *)sys_issetugid, AUE_ISSETUGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 253 = issetugid */ { AS(lchown_args), (sy_call_t *)sys_lchown, AUE_LCHOWN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 254 = lchown */ { AS(freebsd32_aio_read_args), (sy_call_t *)freebsd32_aio_read, AUE_AIO_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 255 = freebsd32_aio_read */ { AS(freebsd32_aio_write_args), (sy_call_t *)freebsd32_aio_write, AUE_AIO_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 256 = freebsd32_aio_write */ { AS(freebsd32_lio_listio_args), (sy_call_t *)freebsd32_lio_listio, AUE_LIO_LISTIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 257 = freebsd32_lio_listio */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 258 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 259 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 260 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 261 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 262 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 263 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 264 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 265 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 266 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 267 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 268 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 269 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 270 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 271 = nosys */ { compat11(AS(freebsd11_freebsd32_getdents_args),freebsd32_getdents), AUE_O_GETDENTS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 272 = freebsd11 freebsd32_getdents */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 273 = nosys */ { AS(lchmod_args), (sy_call_t *)sys_lchmod, AUE_LCHMOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 274 = lchmod */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 275 = obsolete netbsd_lchown */ { AS(freebsd32_lutimes_args), (sy_call_t *)freebsd32_lutimes, AUE_LUTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 276 = freebsd32_lutimes */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 277 = obsolete netbsd_msync */ { compat11(AS(freebsd11_nstat_args),nstat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 278 = freebsd11 nstat */ { compat11(AS(freebsd11_nfstat_args),nfstat), AUE_FSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 279 = freebsd11 nfstat */ { compat11(AS(freebsd11_nlstat_args),nlstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 280 = freebsd11 nlstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 281 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 282 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 283 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 284 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 285 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 286 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 287 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 288 = nosys */ { AS(freebsd32_preadv_args), (sy_call_t *)freebsd32_preadv, AUE_PREADV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 289 = freebsd32_preadv */ { AS(freebsd32_pwritev_args), (sy_call_t *)freebsd32_pwritev, AUE_PWRITEV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 290 = freebsd32_pwritev */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 291 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 292 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 293 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 294 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 295 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 296 = nosys */ { compat4(AS(freebsd4_freebsd32_fhstatfs_args),freebsd32_fhstatfs), AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 297 = freebsd4 freebsd32_fhstatfs */ { AS(fhopen_args), (sy_call_t *)sys_fhopen, AUE_FHOPEN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 298 = fhopen */ { compat11(AS(freebsd11_freebsd32_fhstat_args),freebsd32_fhstat), AUE_FHSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 299 = freebsd11 freebsd32_fhstat */ { AS(modnext_args), (sy_call_t *)sys_modnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 300 = modnext */ { AS(freebsd32_modstat_args), (sy_call_t *)freebsd32_modstat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 301 = freebsd32_modstat */ { AS(modfnext_args), (sy_call_t *)sys_modfnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 302 = modfnext */ { AS(modfind_args), (sy_call_t *)sys_modfind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 303 = modfind */ { AS(kldload_args), (sy_call_t *)sys_kldload, AUE_MODLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 304 = kldload */ { AS(kldunload_args), (sy_call_t *)sys_kldunload, AUE_MODUNLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 305 = kldunload */ { AS(kldfind_args), (sy_call_t *)sys_kldfind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 306 = kldfind */ { AS(kldnext_args), (sy_call_t *)sys_kldnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 307 = kldnext */ { AS(freebsd32_kldstat_args), (sy_call_t *)freebsd32_kldstat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 308 = freebsd32_kldstat */ { AS(kldfirstmod_args), (sy_call_t *)sys_kldfirstmod, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 309 = kldfirstmod */ { AS(getsid_args), (sy_call_t *)sys_getsid, AUE_GETSID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 310 = getsid */ { AS(setresuid_args), (sy_call_t *)sys_setresuid, AUE_SETRESUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 311 = setresuid */ { AS(setresgid_args), (sy_call_t *)sys_setresgid, AUE_SETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 312 = setresgid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 313 = obsolete signanosleep */ { AS(freebsd32_aio_return_args), (sy_call_t *)freebsd32_aio_return, AUE_AIO_RETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 314 = freebsd32_aio_return */ { AS(freebsd32_aio_suspend_args), (sy_call_t *)freebsd32_aio_suspend, AUE_AIO_SUSPEND, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 315 = freebsd32_aio_suspend */ { AS(aio_cancel_args), (sy_call_t *)sys_aio_cancel, AUE_AIO_CANCEL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 316 = aio_cancel */ { AS(freebsd32_aio_error_args), (sy_call_t *)freebsd32_aio_error, AUE_AIO_ERROR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 317 = freebsd32_aio_error */ { compat6(AS(freebsd6_freebsd32_aio_read_args),freebsd32_aio_read), AUE_AIO_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 318 = freebsd6 freebsd32_aio_read */ { compat6(AS(freebsd6_freebsd32_aio_write_args),freebsd32_aio_write), AUE_AIO_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 319 = freebsd6 freebsd32_aio_write */ { compat6(AS(freebsd6_freebsd32_lio_listio_args),freebsd32_lio_listio), AUE_LIO_LISTIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 320 = freebsd6 freebsd32_lio_listio */ { 0, (sy_call_t *)sys_yield, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 321 = yield */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 322 = obsolete thr_sleep */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 323 = obsolete thr_wakeup */ { AS(mlockall_args), (sy_call_t *)sys_mlockall, AUE_MLOCKALL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 324 = mlockall */ { 0, (sy_call_t *)sys_munlockall, AUE_MUNLOCKALL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 325 = munlockall */ { AS(__getcwd_args), (sy_call_t *)sys___getcwd, AUE_GETCWD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 326 = __getcwd */ { AS(sched_setparam_args), (sy_call_t *)sys_sched_setparam, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 327 = sched_setparam */ { AS(sched_getparam_args), (sy_call_t *)sys_sched_getparam, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 328 = sched_getparam */ { AS(sched_setscheduler_args), (sy_call_t *)sys_sched_setscheduler, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 329 = sched_setscheduler */ { AS(sched_getscheduler_args), (sy_call_t *)sys_sched_getscheduler, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 330 = sched_getscheduler */ { 0, (sy_call_t *)sys_sched_yield, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 331 = sched_yield */ { AS(sched_get_priority_max_args), (sy_call_t *)sys_sched_get_priority_max, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 332 = sched_get_priority_max */ { AS(sched_get_priority_min_args), (sy_call_t *)sys_sched_get_priority_min, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 333 = sched_get_priority_min */ { AS(freebsd32_sched_rr_get_interval_args), (sy_call_t *)freebsd32_sched_rr_get_interval, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 334 = freebsd32_sched_rr_get_interval */ { AS(utrace_args), (sy_call_t *)sys_utrace, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 335 = utrace */ { compat4(AS(freebsd4_freebsd32_sendfile_args),freebsd32_sendfile), AUE_SENDFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 336 = freebsd4 freebsd32_sendfile */ { AS(kldsym_args), (sy_call_t *)sys_kldsym, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 337 = kldsym */ { AS(freebsd32_jail_args), (sy_call_t *)freebsd32_jail, AUE_JAIL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 338 = freebsd32_jail */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 339 = pioctl */ { AS(sigprocmask_args), (sy_call_t *)sys_sigprocmask, AUE_SIGPROCMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 340 = sigprocmask */ { AS(sigsuspend_args), (sy_call_t *)sys_sigsuspend, AUE_SIGSUSPEND, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 341 = sigsuspend */ { compat4(AS(freebsd4_freebsd32_sigaction_args),freebsd32_sigaction), AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 342 = freebsd4 freebsd32_sigaction */ { AS(sigpending_args), (sy_call_t *)sys_sigpending, AUE_SIGPENDING, NULL, 0, 0, 0, SY_THR_STATIC }, /* 343 = sigpending */ { compat4(AS(freebsd4_freebsd32_sigreturn_args),freebsd32_sigreturn), AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 344 = freebsd4 freebsd32_sigreturn */ { AS(freebsd32_sigtimedwait_args), (sy_call_t *)freebsd32_sigtimedwait, AUE_SIGWAIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 345 = freebsd32_sigtimedwait */ { AS(freebsd32_sigwaitinfo_args), (sy_call_t *)freebsd32_sigwaitinfo, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 346 = freebsd32_sigwaitinfo */ { AS(__acl_get_file_args), (sy_call_t *)sys___acl_get_file, AUE_ACL_GET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 347 = __acl_get_file */ { AS(__acl_set_file_args), (sy_call_t *)sys___acl_set_file, AUE_ACL_SET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 348 = __acl_set_file */ { AS(__acl_get_fd_args), (sy_call_t *)sys___acl_get_fd, AUE_ACL_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 349 = __acl_get_fd */ { AS(__acl_set_fd_args), (sy_call_t *)sys___acl_set_fd, AUE_ACL_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 350 = __acl_set_fd */ { AS(__acl_delete_file_args), (sy_call_t *)sys___acl_delete_file, AUE_ACL_DELETE_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 351 = __acl_delete_file */ { AS(__acl_delete_fd_args), (sy_call_t *)sys___acl_delete_fd, AUE_ACL_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 352 = __acl_delete_fd */ { AS(__acl_aclcheck_file_args), (sy_call_t *)sys___acl_aclcheck_file, AUE_ACL_CHECK_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 353 = __acl_aclcheck_file */ { AS(__acl_aclcheck_fd_args), (sy_call_t *)sys___acl_aclcheck_fd, AUE_ACL_CHECK_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 354 = __acl_aclcheck_fd */ { AS(extattrctl_args), (sy_call_t *)sys_extattrctl, AUE_EXTATTRCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 355 = extattrctl */ { AS(extattr_set_file_args), (sy_call_t *)sys_extattr_set_file, AUE_EXTATTR_SET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 356 = extattr_set_file */ { AS(extattr_get_file_args), (sy_call_t *)sys_extattr_get_file, AUE_EXTATTR_GET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 357 = extattr_get_file */ { AS(extattr_delete_file_args), (sy_call_t *)sys_extattr_delete_file, AUE_EXTATTR_DELETE_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 358 = extattr_delete_file */ { AS(freebsd32_aio_waitcomplete_args), (sy_call_t *)freebsd32_aio_waitcomplete, AUE_AIO_WAITCOMPLETE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 359 = freebsd32_aio_waitcomplete */ { AS(getresuid_args), (sy_call_t *)sys_getresuid, AUE_GETRESUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 360 = getresuid */ { AS(getresgid_args), (sy_call_t *)sys_getresgid, AUE_GETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 361 = getresgid */ { 0, (sy_call_t *)sys_kqueue, AUE_KQUEUE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 362 = kqueue */ { compat11(AS(freebsd11_freebsd32_kevent_args),freebsd32_kevent), AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 363 = freebsd11 freebsd32_kevent */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = __cap_get_proc */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = __cap_set_proc */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = __cap_get_fd */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = __cap_get_file */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = __cap_set_fd */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = __cap_set_file */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = obsolete __cap_get_proc */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = obsolete __cap_set_proc */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = obsolete __cap_get_fd */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = obsolete __cap_get_file */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = obsolete __cap_set_fd */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = obsolete __cap_set_file */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 370 = nosys */ { AS(extattr_set_fd_args), (sy_call_t *)sys_extattr_set_fd, AUE_EXTATTR_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 371 = extattr_set_fd */ { AS(extattr_get_fd_args), (sy_call_t *)sys_extattr_get_fd, AUE_EXTATTR_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 372 = extattr_get_fd */ { AS(extattr_delete_fd_args), (sy_call_t *)sys_extattr_delete_fd, AUE_EXTATTR_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 373 = extattr_delete_fd */ { AS(__setugid_args), (sy_call_t *)sys___setugid, AUE_SETUGID, NULL, 0, 0, 0, SY_THR_STATIC }, /* 374 = __setugid */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = nfsclnt */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = obsolete nfsclnt */ { AS(eaccess_args), (sy_call_t *)sys_eaccess, AUE_EACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 376 = eaccess */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 377 = afs_syscall */ { AS(freebsd32_nmount_args), (sy_call_t *)freebsd32_nmount, AUE_NMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 378 = freebsd32_nmount */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = kse_exit */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = kse_wakeup */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = kse_create */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = kse_thr_interrupt */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = kse_release */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = obsolete kse_exit */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = obsolete kse_wakeup */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = obsolete kse_create */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = obsolete kse_thr_interrupt */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = obsolete kse_release */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 384 = __mac_get_proc */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 385 = __mac_set_proc */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 386 = __mac_get_fd */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 387 = __mac_get_file */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 388 = __mac_set_fd */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 389 = __mac_set_file */ { AS(kenv_args), (sy_call_t *)sys_kenv, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 390 = kenv */ { AS(lchflags_args), (sy_call_t *)sys_lchflags, AUE_LCHFLAGS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 391 = lchflags */ { AS(uuidgen_args), (sy_call_t *)sys_uuidgen, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 392 = uuidgen */ { AS(freebsd32_sendfile_args), (sy_call_t *)freebsd32_sendfile, AUE_SENDFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 393 = freebsd32_sendfile */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 394 = mac_syscall */ { compat11(AS(freebsd11_getfsstat_args),getfsstat), AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 395 = freebsd11 getfsstat */ { compat11(AS(freebsd11_statfs_args),statfs), AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 396 = freebsd11 statfs */ { compat11(AS(freebsd11_fstatfs_args),fstatfs), AUE_FSTATFS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 397 = freebsd11 fstatfs */ { compat11(AS(freebsd11_fhstatfs_args),fhstatfs), AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 398 = freebsd11 fhstatfs */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 399 = nosys */ { AS(ksem_close_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 400 = ksem_close */ { AS(ksem_post_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 401 = ksem_post */ { AS(ksem_wait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 402 = ksem_wait */ { AS(ksem_trywait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 403 = ksem_trywait */ { AS(freebsd32_ksem_init_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 404 = freebsd32_ksem_init */ { AS(freebsd32_ksem_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 405 = freebsd32_ksem_open */ { AS(ksem_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 406 = ksem_unlink */ { AS(ksem_getvalue_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 407 = ksem_getvalue */ { AS(ksem_destroy_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 408 = ksem_destroy */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 409 = __mac_get_pid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 410 = __mac_get_link */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 411 = __mac_set_link */ { AS(extattr_set_link_args), (sy_call_t *)sys_extattr_set_link, AUE_EXTATTR_SET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 412 = extattr_set_link */ { AS(extattr_get_link_args), (sy_call_t *)sys_extattr_get_link, AUE_EXTATTR_GET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 413 = extattr_get_link */ { AS(extattr_delete_link_args), (sy_call_t *)sys_extattr_delete_link, AUE_EXTATTR_DELETE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 414 = extattr_delete_link */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 415 = __mac_execve */ { AS(freebsd32_sigaction_args), (sy_call_t *)freebsd32_sigaction, AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 416 = freebsd32_sigaction */ { AS(freebsd32_sigreturn_args), (sy_call_t *)freebsd32_sigreturn, AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 417 = freebsd32_sigreturn */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 418 = __xstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 419 = __xfstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 420 = __xlstat */ { AS(freebsd32_getcontext_args), (sy_call_t *)freebsd32_getcontext, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 421 = freebsd32_getcontext */ { AS(freebsd32_setcontext_args), (sy_call_t *)freebsd32_setcontext, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 422 = freebsd32_setcontext */ { AS(freebsd32_swapcontext_args), (sy_call_t *)freebsd32_swapcontext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 423 = freebsd32_swapcontext */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 424 = swapoff */ { AS(__acl_get_link_args), (sy_call_t *)sys___acl_get_link, AUE_ACL_GET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 425 = __acl_get_link */ { AS(__acl_set_link_args), (sy_call_t *)sys___acl_set_link, AUE_ACL_SET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 426 = __acl_set_link */ { AS(__acl_delete_link_args), (sy_call_t *)sys___acl_delete_link, AUE_ACL_DELETE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 427 = __acl_delete_link */ { AS(__acl_aclcheck_link_args), (sy_call_t *)sys___acl_aclcheck_link, AUE_ACL_CHECK_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 428 = __acl_aclcheck_link */ { AS(sigwait_args), (sy_call_t *)sys_sigwait, AUE_SIGWAIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 429 = sigwait */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 430 = thr_create; */ { AS(thr_exit_args), (sy_call_t *)sys_thr_exit, AUE_THR_EXIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 431 = thr_exit */ { AS(thr_self_args), (sy_call_t *)sys_thr_self, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 432 = thr_self */ { AS(thr_kill_args), (sy_call_t *)sys_thr_kill, AUE_THR_KILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 433 = thr_kill */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 434 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 435 = nosys */ { AS(jail_attach_args), (sy_call_t *)sys_jail_attach, AUE_JAIL_ATTACH, NULL, 0, 0, 0, SY_THR_STATIC }, /* 436 = jail_attach */ { AS(extattr_list_fd_args), (sy_call_t *)sys_extattr_list_fd, AUE_EXTATTR_LIST_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 437 = extattr_list_fd */ { AS(extattr_list_file_args), (sy_call_t *)sys_extattr_list_file, AUE_EXTATTR_LIST_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 438 = extattr_list_file */ { AS(extattr_list_link_args), (sy_call_t *)sys_extattr_list_link, AUE_EXTATTR_LIST_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 439 = extattr_list_link */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = kse_switchin */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = obsolete kse_switchin */ { AS(freebsd32_ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 441 = freebsd32_ksem_timedwait */ { AS(freebsd32_thr_suspend_args), (sy_call_t *)freebsd32_thr_suspend, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 442 = freebsd32_thr_suspend */ { AS(thr_wake_args), (sy_call_t *)sys_thr_wake, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 443 = thr_wake */ { AS(kldunloadf_args), (sy_call_t *)sys_kldunloadf, AUE_MODUNLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 444 = kldunloadf */ { AS(audit_args), (sy_call_t *)sys_audit, AUE_AUDIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 445 = audit */ { AS(auditon_args), (sy_call_t *)sys_auditon, AUE_AUDITON, NULL, 0, 0, 0, SY_THR_STATIC }, /* 446 = auditon */ { AS(getauid_args), (sy_call_t *)sys_getauid, AUE_GETAUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 447 = getauid */ { AS(setauid_args), (sy_call_t *)sys_setauid, AUE_SETAUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 448 = setauid */ { AS(getaudit_args), (sy_call_t *)sys_getaudit, AUE_GETAUDIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 449 = getaudit */ { AS(setaudit_args), (sy_call_t *)sys_setaudit, AUE_SETAUDIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 450 = setaudit */ { AS(getaudit_addr_args), (sy_call_t *)sys_getaudit_addr, AUE_GETAUDIT_ADDR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 451 = getaudit_addr */ { AS(setaudit_addr_args), (sy_call_t *)sys_setaudit_addr, AUE_SETAUDIT_ADDR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 452 = setaudit_addr */ { AS(auditctl_args), (sy_call_t *)sys_auditctl, AUE_AUDITCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 453 = auditctl */ { AS(freebsd32_umtx_op_args), (sy_call_t *)freebsd32_umtx_op, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 454 = freebsd32_umtx_op */ { AS(freebsd32_thr_new_args), (sy_call_t *)freebsd32_thr_new, AUE_THR_NEW, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 455 = freebsd32_thr_new */ { AS(freebsd32_sigqueue_args), (sy_call_t *)freebsd32_sigqueue, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 456 = freebsd32_sigqueue */ { AS(freebsd32_kmq_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 457 = freebsd32_kmq_open */ { AS(freebsd32_kmq_setattr_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 458 = freebsd32_kmq_setattr */ { AS(freebsd32_kmq_timedreceive_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 459 = freebsd32_kmq_timedreceive */ { AS(freebsd32_kmq_timedsend_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 460 = freebsd32_kmq_timedsend */ { AS(freebsd32_kmq_notify_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 461 = freebsd32_kmq_notify */ { AS(kmq_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 462 = kmq_unlink */ { AS(abort2_args), (sy_call_t *)sys_abort2, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 463 = abort2 */ { AS(thr_set_name_args), (sy_call_t *)sys_thr_set_name, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 464 = thr_set_name */ { AS(freebsd32_aio_fsync_args), (sy_call_t *)freebsd32_aio_fsync, AUE_AIO_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 465 = freebsd32_aio_fsync */ { AS(rtprio_thread_args), (sy_call_t *)sys_rtprio_thread, AUE_RTPRIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 466 = rtprio_thread */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 467 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 468 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 469 = __getpath_fromfd */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 470 = __getpath_fromaddr */ { AS(sctp_peeloff_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 471 = sctp_peeloff */ { AS(sctp_generic_sendmsg_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 472 = sctp_generic_sendmsg */ { AS(sctp_generic_sendmsg_iov_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 473 = sctp_generic_sendmsg_iov */ { AS(sctp_generic_recvmsg_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 474 = sctp_generic_recvmsg */ #ifdef PAD64_REQUIRED { AS(freebsd32_pread_args), (sy_call_t *)freebsd32_pread, AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 475 = freebsd32_pread */ { AS(freebsd32_pwrite_args), (sy_call_t *)freebsd32_pwrite, AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 476 = freebsd32_pwrite */ { AS(freebsd32_mmap_args), (sy_call_t *)freebsd32_mmap, AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 477 = freebsd32_mmap */ { AS(freebsd32_lseek_args), (sy_call_t *)freebsd32_lseek, AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 478 = freebsd32_lseek */ { AS(freebsd32_truncate_args), (sy_call_t *)freebsd32_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 479 = freebsd32_truncate */ { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ #else { AS(freebsd32_pread_args), (sy_call_t *)freebsd32_pread, AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 475 = freebsd32_pread */ { AS(freebsd32_pwrite_args), (sy_call_t *)freebsd32_pwrite, AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 476 = freebsd32_pwrite */ { AS(freebsd32_mmap_args), (sy_call_t *)freebsd32_mmap, AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 477 = freebsd32_mmap */ { AS(freebsd32_lseek_args), (sy_call_t *)freebsd32_lseek, AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 478 = freebsd32_lseek */ { AS(freebsd32_truncate_args), (sy_call_t *)freebsd32_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 479 = freebsd32_truncate */ { AS(freebsd32_ftruncate_args), (sy_call_t *)freebsd32_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 480 = freebsd32_ftruncate */ #endif { AS(thr_kill2_args), (sy_call_t *)sys_thr_kill2, AUE_THR_KILL2, NULL, 0, 0, 0, SY_THR_STATIC }, /* 481 = thr_kill2 */ { AS(shm_open_args), (sy_call_t *)sys_shm_open, AUE_SHMOPEN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 482 = shm_open */ { AS(shm_unlink_args), (sy_call_t *)sys_shm_unlink, AUE_SHMUNLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 483 = shm_unlink */ { AS(cpuset_args), (sy_call_t *)sys_cpuset, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 484 = cpuset */ #ifdef PAD64_REQUIRED { AS(freebsd32_cpuset_setid_args), (sy_call_t *)freebsd32_cpuset_setid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 485 = freebsd32_cpuset_setid */ #else { AS(freebsd32_cpuset_setid_args), (sy_call_t *)freebsd32_cpuset_setid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 485 = freebsd32_cpuset_setid */ #endif { AS(freebsd32_cpuset_getid_args), (sy_call_t *)freebsd32_cpuset_getid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 486 = freebsd32_cpuset_getid */ { AS(freebsd32_cpuset_getaffinity_args), (sy_call_t *)freebsd32_cpuset_getaffinity, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 487 = freebsd32_cpuset_getaffinity */ { AS(freebsd32_cpuset_setaffinity_args), (sy_call_t *)freebsd32_cpuset_setaffinity, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 488 = freebsd32_cpuset_setaffinity */ { AS(faccessat_args), (sy_call_t *)sys_faccessat, AUE_FACCESSAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 489 = faccessat */ { AS(fchmodat_args), (sy_call_t *)sys_fchmodat, AUE_FCHMODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 490 = fchmodat */ { AS(fchownat_args), (sy_call_t *)sys_fchownat, AUE_FCHOWNAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 491 = fchownat */ { AS(freebsd32_fexecve_args), (sy_call_t *)freebsd32_fexecve, AUE_FEXECVE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 492 = freebsd32_fexecve */ { compat11(AS(freebsd11_freebsd32_fstatat_args),freebsd32_fstatat), AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 493 = freebsd11 freebsd32_fstatat */ { AS(freebsd32_futimesat_args), (sy_call_t *)freebsd32_futimesat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 494 = freebsd32_futimesat */ { AS(linkat_args), (sy_call_t *)sys_linkat, AUE_LINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 495 = linkat */ { AS(mkdirat_args), (sy_call_t *)sys_mkdirat, AUE_MKDIRAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 496 = mkdirat */ { AS(mkfifoat_args), (sy_call_t *)sys_mkfifoat, AUE_MKFIFOAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 497 = mkfifoat */ { compat11(AS(freebsd11_freebsd32_mknodat_args),freebsd32_mknodat), AUE_MKNODAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 498 = freebsd11 freebsd32_mknodat */ { AS(openat_args), (sy_call_t *)sys_openat, AUE_OPENAT_RWTC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 499 = openat */ { AS(readlinkat_args), (sy_call_t *)sys_readlinkat, AUE_READLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 500 = readlinkat */ { AS(renameat_args), (sy_call_t *)sys_renameat, AUE_RENAMEAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 501 = renameat */ { AS(symlinkat_args), (sy_call_t *)sys_symlinkat, AUE_SYMLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 502 = symlinkat */ { AS(unlinkat_args), (sy_call_t *)sys_unlinkat, AUE_UNLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 503 = unlinkat */ { AS(posix_openpt_args), (sy_call_t *)sys_posix_openpt, AUE_POSIX_OPENPT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 504 = posix_openpt */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 505 = gssd_syscall */ { AS(freebsd32_jail_get_args), (sy_call_t *)freebsd32_jail_get, AUE_JAIL_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 506 = freebsd32_jail_get */ { AS(freebsd32_jail_set_args), (sy_call_t *)freebsd32_jail_set, AUE_JAIL_SET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 507 = freebsd32_jail_set */ { AS(jail_remove_args), (sy_call_t *)sys_jail_remove, AUE_JAIL_REMOVE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 508 = jail_remove */ { AS(closefrom_args), (sy_call_t *)sys_closefrom, AUE_CLOSEFROM, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 509 = closefrom */ { AS(freebsd32_semctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 510 = freebsd32_semctl */ { AS(freebsd32_msgctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 511 = freebsd32_msgctl */ { AS(freebsd32_shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = freebsd32_shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 514 = obsolete cap_new */ { AS(__cap_rights_get_args), (sy_call_t *)sys___cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = __cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 517 = cap_getmode */ { AS(pdfork_args), (sy_call_t *)sys_pdfork, AUE_PDFORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 518 = pdfork */ { AS(pdkill_args), (sy_call_t *)sys_pdkill, AUE_PDKILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 519 = pdkill */ { AS(pdgetpid_args), (sy_call_t *)sys_pdgetpid, AUE_PDGETPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 520 = pdgetpid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 521 = pdwait4 */ { AS(freebsd32_pselect_args), (sy_call_t *)freebsd32_pselect, AUE_SELECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 522 = freebsd32_pselect */ { AS(getloginclass_args), (sy_call_t *)sys_getloginclass, AUE_GETLOGINCLASS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 523 = getloginclass */ { AS(setloginclass_args), (sy_call_t *)sys_setloginclass, AUE_SETLOGINCLASS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 524 = setloginclass */ { AS(rctl_get_racct_args), (sy_call_t *)sys_rctl_get_racct, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 525 = rctl_get_racct */ { AS(rctl_get_rules_args), (sy_call_t *)sys_rctl_get_rules, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 526 = rctl_get_rules */ { AS(rctl_get_limits_args), (sy_call_t *)sys_rctl_get_limits, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 527 = rctl_get_limits */ { AS(rctl_add_rule_args), (sy_call_t *)sys_rctl_add_rule, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 528 = rctl_add_rule */ { AS(rctl_remove_rule_args), (sy_call_t *)sys_rctl_remove_rule, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 529 = rctl_remove_rule */ #ifdef PAD64_REQUIRED { AS(freebsd32_posix_fallocate_args), (sy_call_t *)freebsd32_posix_fallocate, AUE_POSIX_FALLOCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 530 = freebsd32_posix_fallocate */ { AS(freebsd32_posix_fadvise_args), (sy_call_t *)freebsd32_posix_fadvise, AUE_POSIX_FADVISE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = freebsd32_posix_fadvise */ { AS(freebsd32_wait6_args), (sy_call_t *)freebsd32_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = freebsd32_wait6 */ #else { AS(freebsd32_posix_fallocate_args), (sy_call_t *)freebsd32_posix_fallocate, AUE_POSIX_FALLOCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 530 = freebsd32_posix_fallocate */ { AS(freebsd32_posix_fadvise_args), (sy_call_t *)freebsd32_posix_fadvise, AUE_POSIX_FADVISE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = freebsd32_posix_fadvise */ { AS(freebsd32_wait6_args), (sy_call_t *)freebsd32_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = freebsd32_wait6 */ #endif { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 533 = cap_rights_limit */ { AS(freebsd32_cap_ioctls_limit_args), (sy_call_t *)freebsd32_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 534 = freebsd32_cap_ioctls_limit */ { AS(freebsd32_cap_ioctls_get_args), (sy_call_t *)freebsd32_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 535 = freebsd32_cap_ioctls_get */ { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ { AS(bindat_args), (sy_call_t *)sys_bindat, AUE_BINDAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 538 = bindat */ { AS(connectat_args), (sy_call_t *)sys_connectat, AUE_CONNECTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 539 = connectat */ { AS(chflagsat_args), (sy_call_t *)sys_chflagsat, AUE_CHFLAGSAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 540 = chflagsat */ { AS(accept4_args), (sy_call_t *)sys_accept4, AUE_ACCEPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 541 = accept4 */ { AS(pipe2_args), (sy_call_t *)sys_pipe2, AUE_PIPE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 542 = pipe2 */ { AS(freebsd32_aio_mlock_args), (sy_call_t *)freebsd32_aio_mlock, AUE_AIO_MLOCK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 543 = freebsd32_aio_mlock */ #ifdef PAD64_REQUIRED { AS(freebsd32_procctl_args), (sy_call_t *)freebsd32_procctl, AUE_PROCCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 544 = freebsd32_procctl */ #else { AS(freebsd32_procctl_args), (sy_call_t *)freebsd32_procctl, AUE_PROCCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 544 = freebsd32_procctl */ #endif { AS(freebsd32_ppoll_args), (sy_call_t *)freebsd32_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = freebsd32_ppoll */ { AS(freebsd32_futimens_args), (sy_call_t *)freebsd32_futimens, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 546 = freebsd32_futimens */ { AS(freebsd32_utimensat_args), (sy_call_t *)freebsd32_utimensat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 547 = freebsd32_utimensat */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = numa_getaffinity */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = numa_setaffinity */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = obsolete numa_getaffinity */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = obsolete numa_setaffinity */ { AS(fdatasync_args), (sy_call_t *)sys_fdatasync, AUE_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 550 = fdatasync */ { AS(freebsd32_fstat_args), (sy_call_t *)freebsd32_fstat, AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 551 = freebsd32_fstat */ { AS(freebsd32_fstatat_args), (sy_call_t *)freebsd32_fstatat, AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 552 = freebsd32_fstatat */ { AS(freebsd32_fhstat_args), (sy_call_t *)freebsd32_fhstat, AUE_FHSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 553 = freebsd32_fhstat */ { AS(getdirentries_args), (sy_call_t *)sys_getdirentries, AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 554 = getdirentries */ { AS(statfs_args), (sy_call_t *)sys_statfs, AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 555 = statfs */ { AS(fstatfs_args), (sy_call_t *)sys_fstatfs, AUE_FSTATFS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 556 = fstatfs */ { AS(getfsstat_args), (sy_call_t *)sys_getfsstat, AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 557 = getfsstat */ { AS(fhstatfs_args), (sy_call_t *)sys_fhstatfs, AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 558 = fhstatfs */ { AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 559 = mknodat */ { AS(freebsd32_kevent_args), (sy_call_t *)freebsd32_kevent, AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 560 = freebsd32_kevent */ { AS(freebsd32_cpuset_getdomain_args), (sy_call_t *)freebsd32_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = freebsd32_cpuset_getdomain */ { AS(freebsd32_cpuset_setdomain_args), (sy_call_t *)freebsd32_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = freebsd32_cpuset_setdomain */ { AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 563 = getrandom */ }; Index: projects/clang700-import/sys/compat/freebsd32/syscalls.master =================================================================== --- projects/clang700-import/sys/compat/freebsd32/syscalls.master (revision 339014) +++ projects/clang700-import/sys/compat/freebsd32/syscalls.master (revision 339015) @@ -1,1117 +1,1117 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; from: src/sys/kern/syscalls.master 1.107 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, ; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name pseudo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT10 #ifdef (FreeBSD 10 compat) ; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include #include #include #include #include #if !defined(PAD64_REQUIRED) && (defined(__powerpc__) || defined(__mips__)) #define PAD64_REQUIRED #endif ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL NOPROTO { int nosys(void); } syscall nosys_args int 1 AUE_EXIT NOPROTO { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK NOPROTO { int fork(void); } 3 AUE_READ NOPROTO { ssize_t read(int fd, void *buf, \ size_t nbyte); } 4 AUE_WRITE NOPROTO { ssize_t write(int fd, const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC NOPROTO { int open(char *path, int flags, \ int mode); } 6 AUE_CLOSE NOPROTO { int close(int fd); } 7 AUE_WAIT4 STD { int freebsd32_wait4(int pid, int *status, \ int options, struct rusage32 *rusage); } 8 AUE_CREAT OBSOL old creat 9 AUE_LINK NOPROTO { int link(char *path, char *link); } 10 AUE_UNLINK NOPROTO { int unlink(char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR NOPROTO { int chdir(char *path); } 13 AUE_FCHDIR NOPROTO { int fchdir(int fd); } 14 AUE_MKNOD COMPAT11 { int freebsd32_mknod(char *path, \ int mode, int dev); } 15 AUE_CHMOD NOPROTO { int chmod(char *path, int mode); } 16 AUE_CHOWN NOPROTO { int chown(char *path, int uid, int gid); } 17 AUE_NULL NOPROTO { caddr_t break(char *nsize); } 18 AUE_GETFSSTAT COMPAT4 { int freebsd32_getfsstat( \ struct statfs32 *buf, long bufsize, \ int mode); } 19 AUE_LSEEK COMPAT { int freebsd32_lseek(int fd, int offset, \ int whence); } 20 AUE_GETPID NOPROTO { pid_t getpid(void); } 21 AUE_MOUNT NOPROTO { int mount(char *type, char *path, \ int flags, caddr_t data); } 22 AUE_UMOUNT NOPROTO { int unmount(char *path, int flags); } 23 AUE_SETUID NOPROTO { int setuid(uid_t uid); } 24 AUE_GETUID NOPROTO { uid_t getuid(void); } 25 AUE_GETEUID NOPROTO { uid_t geteuid(void); } 26 AUE_PTRACE NOPROTO { int ptrace(int req, pid_t pid, \ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int freebsd32_recvmsg(int s, struct msghdr32 *msg, \ int flags); } 28 AUE_SENDMSG STD { int freebsd32_sendmsg(int s, struct msghdr32 *msg, \ int flags); } 29 AUE_RECVFROM STD { int freebsd32_recvfrom(int s, uint32_t buf, \ uint32_t len, int flags, uint32_t from, \ uint32_t fromlenaddr); } 30 AUE_ACCEPT NOPROTO { int accept(int s, caddr_t name, \ int *anamelen); } 31 AUE_GETPEERNAME NOPROTO { int getpeername(int fdes, caddr_t asa, \ int *alen); } 32 AUE_GETSOCKNAME NOPROTO { int getsockname(int fdes, caddr_t asa, \ int *alen); } 33 AUE_ACCESS NOPROTO { int access(char *path, int amode); } 34 AUE_CHFLAGS NOPROTO { int chflags(const char *path, u_long flags); } 35 AUE_FCHFLAGS NOPROTO { int fchflags(int fd, u_long flags); } 36 AUE_SYNC NOPROTO { int sync(void); } 37 AUE_KILL NOPROTO { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int freebsd32_stat(char *path, \ struct ostat32 *ub); } 39 AUE_GETPPID NOPROTO { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int freebsd32_lstat(char *path, \ struct ostat *ub); } 41 AUE_DUP NOPROTO { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int freebsd32_pipe(void); } 43 AUE_GETEGID NOPROTO { gid_t getegid(void); } 44 AUE_PROFILE NOPROTO { int profil(caddr_t samples, size_t size, \ size_t offset, u_int scale); } 45 AUE_KTRACE NOPROTO { int ktrace(const char *fname, int ops, \ int facs, int pid); } 46 AUE_SIGACTION COMPAT { int freebsd32_sigaction( int signum, \ struct osigaction32 *nsa, \ struct osigaction32 *osa); } 47 AUE_GETGID NOPROTO { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int freebsd32_sigprocmask(int how, \ osigset_t mask); } 49 AUE_GETLOGIN NOPROTO { int getlogin(char *namebuf, \ u_int namelen); } 50 AUE_SETLOGIN NOPROTO { int setlogin(char *namebuf); } 51 AUE_ACCT NOPROTO { int acct(char *path); } 52 AUE_SIGPENDING COMPAT { int freebsd32_sigpending(void); } 53 AUE_SIGALTSTACK STD { int freebsd32_sigaltstack( \ struct sigaltstack32 *ss, \ struct sigaltstack32 *oss); } 54 AUE_IOCTL STD { int freebsd32_ioctl(int fd, uint32_t com, \ struct md_ioctl32 *data); } 55 AUE_REBOOT NOPROTO { int reboot(int opt); } 56 AUE_REVOKE NOPROTO { int revoke(char *path); } 57 AUE_SYMLINK NOPROTO { int symlink(char *path, char *link); } 58 AUE_READLINK NOPROTO { ssize_t readlink(char *path, char *buf, \ size_t count); } 59 AUE_EXECVE STD { int freebsd32_execve(char *fname, \ uint32_t *argv, uint32_t *envv); } 60 AUE_UMASK NOPROTO { int umask(int newmask); } 61 AUE_CHROOT NOPROTO { int chroot(char *path); } 62 AUE_FSTAT COMPAT { int freebsd32_fstat(int fd, \ struct ostat32 *ub); } 63 AUE_NULL OBSOL ogetkerninfo 64 AUE_NULL COMPAT { int freebsd32_getpagesize( \ int32_t dummy); } 65 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \ int flags); } 66 AUE_VFORK NOPROTO { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK NOPROTO { int sbrk(int incr); } 70 AUE_SSTK NOPROTO { int sstk(int incr); } 71 AUE_MMAP COMPAT|NOPROTO { int mmap(void *addr, int len, \ int prot, int flags, int fd, int pos); } 72 AUE_O_VADVISE COMPAT11|NOPROTO { int vadvise(int anom); } 73 AUE_MUNMAP NOPROTO { int munmap(void *addr, size_t len); } 74 AUE_MPROTECT STD { int freebsd32_mprotect(void *addr, \ size_t len, int prot); } 75 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE NOPROTO { int mincore(const void *addr, size_t len, \ char *vec); } 79 AUE_GETGROUPS NOPROTO { int getgroups(u_int gidsetsize, \ gid_t *gidset); } 80 AUE_SETGROUPS NOPROTO { int setgroups(u_int gidsetsize, \ gid_t *gidset); } 81 AUE_GETPGRP NOPROTO { int getpgrp(void); } 82 AUE_SETPGRP NOPROTO { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int freebsd32_setitimer(u_int which, \ struct itimerval32 *itv, \ struct itimerval32 *oitv); } 84 AUE_NULL OBSOL owait ; XXX implement 85 AUE_SWAPON NOPROTO { int swapon(char *name); } 86 AUE_GETITIMER STD { int freebsd32_getitimer(u_int which, \ struct itimerval32 *itv); } 87 AUE_O_GETHOSTNAME OBSOL ogethostname 88 AUE_O_SETHOSTNAME OBSOL osethostname 89 AUE_GETDTABLESIZE NOPROTO { int getdtablesize(void); } 90 AUE_DUP2 NOPROTO { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int freebsd32_fcntl(int fd, int cmd, \ int arg); } 93 AUE_SELECT STD { int freebsd32_select(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ struct timeval32 *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC NOPROTO { int fsync(int fd); } 96 AUE_SETPRIORITY NOPROTO { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET NOPROTO { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT NOPROTO { int connect(int s, caddr_t name, \ int namelen); } 99 AUE_NULL OBSOL oaccept 100 AUE_GETPRIORITY NOPROTO { int getpriority(int which, int who); } 101 AUE_NULL OBSOL osend 102 AUE_NULL OBSOL orecv 103 AUE_SIGRETURN COMPAT { int freebsd32_sigreturn( \ struct ia32_sigcontext3 *sigcntxp); } 104 AUE_BIND NOPROTO { int bind(int s, caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT NOPROTO { int setsockopt(int s, int level, \ int name, caddr_t val, int valsize); } 106 AUE_LISTEN NOPROTO { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_O_SIGVEC COMPAT { int freebsd32_sigvec(int signum, \ struct sigvec32 *nsv, \ struct sigvec32 *osv); } 109 AUE_O_SIGBLOCK COMPAT { int freebsd32_sigblock(int mask); } 110 AUE_O_SIGSETMASK COMPAT { int freebsd32_sigsetmask( int mask); } 111 AUE_SIGSUSPEND COMPAT { int freebsd32_sigsuspend( int mask); } 112 AUE_O_SIGSTACK COMPAT { int freebsd32_sigstack( \ struct sigstack32 *nss, \ struct sigstack32 *oss); } 113 AUE_NULL OBSOL orecvmsg 114 AUE_NULL OBSOL osendmsg 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int freebsd32_gettimeofday( \ struct timeval32 *tp, \ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int freebsd32_getrusage(int who, \ struct rusage32 *rusage); } 118 AUE_GETSOCKOPT NOPROTO { int getsockopt(int s, int level, \ int name, caddr_t val, int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int freebsd32_readv(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 121 AUE_WRITEV STD { int freebsd32_writev(int fd, \ struct iovec32 *iovp, u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int freebsd32_settimeofday( \ struct timeval32 *tv, \ struct timezone *tzp); } 123 AUE_FCHOWN NOPROTO { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD NOPROTO { int fchmod(int fd, int mode); } 125 AUE_RECVFROM OBSOL orecvfrom 126 AUE_SETREUID NOPROTO { int setreuid(int ruid, int euid); } 127 AUE_SETREGID NOPROTO { int setregid(int rgid, int egid); } 128 AUE_RENAME NOPROTO { int rename(char *from, char *to); } 129 AUE_TRUNCATE COMPAT|NOPROTO { int truncate(char *path, \ int length); } 130 AUE_FTRUNCATE COMPAT|NOPROTO { int ftruncate(int fd, int length); } 131 AUE_FLOCK NOPROTO { int flock(int fd, int how); } 132 AUE_MKFIFO NOPROTO { int mkfifo(char *path, int mode); } 133 AUE_SENDTO NOPROTO { int sendto(int s, caddr_t buf, \ size_t len, int flags, caddr_t to, \ int tolen); } 134 AUE_SHUTDOWN NOPROTO { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR NOPROTO { int socketpair(int domain, int type, \ int protocol, int *rsv); } 136 AUE_MKDIR NOPROTO { int mkdir(char *path, int mode); } 137 AUE_RMDIR NOPROTO { int rmdir(char *path); } 138 AUE_UTIMES STD { int freebsd32_utimes(char *path, \ struct timeval32 *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int freebsd32_adjtime( \ struct timeval32 *delta, \ struct timeval32 *olddelta); } 141 AUE_GETPEERNAME OBSOL ogetpeername 142 AUE_SYSCTL OBSOL ogethostid 143 AUE_SYSCTL OBSOL sethostid 144 AUE_GETRLIMIT OBSOL getrlimit 145 AUE_SETRLIMIT OBSOL setrlimit 146 AUE_KILLPG OBSOL killpg 147 AUE_SETSID NOPROTO { int setsid(void); } 148 AUE_QUOTACTL NOPROTO { int quotactl(char *path, int cmd, int uid, \ caddr_t arg); } 149 AUE_O_QUOTA OBSOL oquota 150 AUE_GETSOCKNAME OBSOL ogetsockname ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL UNIMPL nlm_syscall ; 155 is initialized by the NFS code, if present. ; XXX this is a problem!!! 155 AUE_NFS_SVC UNIMPL nfssvc 156 AUE_GETDIRENTRIES COMPAT { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, uint32_t *basep); } 157 AUE_STATFS COMPAT4 { int freebsd32_statfs(char *path, \ struct statfs32 *buf); } 158 AUE_FSTATFS COMPAT4 { int freebsd32_fstatfs(int fd, \ struct statfs32 *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH UNIMPL lgetfh 161 AUE_NFS_GETFH NOPROTO { int getfh(char *fname, \ struct fhandle *fhp); } 162 AUE_SYSCTL OBSOL getdomainname 163 AUE_SYSCTL OBSOL setdomainname 164 AUE_NULL OBSOL uname 165 AUE_SYSARCH STD { int freebsd32_sysarch(int op, char *parms); } 166 AUE_RTPRIO NOPROTO { int rtprio(int function, pid_t pid, \ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int freebsd32_semsys(int which, int a2, \ int a3, int a4, int a5); } 170 AUE_MSGSYS NOSTD { int freebsd32_msgsys(int which, int a2, \ int a3, int a4, int a5, int a6); } 171 AUE_SHMSYS NOSTD { int freebsd32_shmsys(uint32_t which, uint32_t a2, \ uint32_t a3, uint32_t a4); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t freebsd32_pread(int fd, void *buf, \ size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 174 AUE_PWRITE COMPAT6 { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, int pad, \ uint32_t offset1, uint32_t offset2); } 175 AUE_NULL UNIMPL nosys 176 AUE_NTP_ADJTIME NOPROTO { int ntp_adjtime(struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID NOPROTO { int setgid(gid_t gid); } 182 AUE_SETEGID NOPROTO { int setegid(gid_t egid); } 183 AUE_SETEUID NOPROTO { int seteuid(uid_t euid); } -184 AUE_NULL UNIMPL lfs_bmapv -185 AUE_NULL UNIMPL lfs_markv -186 AUE_NULL UNIMPL lfs_segclean -187 AUE_NULL UNIMPL lfs_segwait +184 AUE_NULL OBSOL lfs_bmapv +185 AUE_NULL OBSOL lfs_markv +186 AUE_NULL OBSOL lfs_segclean +187 AUE_NULL OBSOL lfs_segwait 188 AUE_STAT COMPAT11 { int freebsd32_stat(char *path, \ struct freebsd11_stat32 *ub); } 189 AUE_FSTAT COMPAT11 { int freebsd32_fstat(int fd, \ struct freebsd11_stat32 *ub); } 190 AUE_LSTAT COMPAT11 { int freebsd32_lstat(char *path, \ struct freebsd11_stat32 *ub); } 191 AUE_PATHCONF NOPROTO { int pathconf(char *path, int name); } 192 AUE_FPATHCONF NOPROTO { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT NOPROTO { int getrlimit(u_int which, \ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT NOPROTO { int setrlimit(u_int which, \ struct rlimit *rlp); } setrlimit \ __setrlimit_args int 196 AUE_GETDIRENTRIES COMPAT11 { int freebsd32_getdirentries(int fd, \ char *buf, u_int count, int32_t *basep); } 197 AUE_MMAP COMPAT6 { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, uint32_t pos1, uint32_t pos2); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t freebsd32_lseek(int fd, int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 200 AUE_TRUNCATE COMPAT6 { int freebsd32_truncate(char *path, \ int pad, uint32_t length1, \ uint32_t length2); } 201 AUE_FTRUNCATE COMPAT6 { int freebsd32_ftruncate(int fd, int pad, \ uint32_t length1, uint32_t length2); } 202 AUE_SYSCTL STD { int freebsd32_sysctl(int *name, \ u_int namelen, void *old, \ uint32_t *oldlenp, void *new, \ uint32_t newlen); } 203 AUE_MLOCK NOPROTO { int mlock(const void *addr, \ size_t len); } 204 AUE_MUNLOCK NOPROTO { int munlock(const void *addr, \ size_t len); } 205 AUE_UNDELETE NOPROTO { int undelete(char *path); } 206 AUE_FUTIMES STD { int freebsd32_futimes(int fd, \ struct timeval32 *tptr); } 207 AUE_GETPGID NOPROTO { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL nosys 209 AUE_POLL NOPROTO { int poll(struct pollfd *fds, u_int nfds, \ int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 220 AUE_SEMCTL COMPAT7|NOSTD { int freebsd32_semctl( \ int semid, int semnum, \ int cmd, union semun32 *arg); } 221 AUE_SEMGET NOSTD|NOPROTO { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD|NOPROTO { int semop(int semid, \ struct sembuf *sops, u_int nsops); } -223 AUE_NULL UNIMPL semconfig +223 AUE_NULL OBSOL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int freebsd32_msgctl( \ int msqid, int cmd, \ struct msqid_ds32_old *buf); } 225 AUE_MSGGET NOSTD|NOPROTO { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int freebsd32_msgsnd(int msqid, void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { int freebsd32_msgrcv(int msqid, void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD|NOPROTO { int shmat(int shmid, void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int freebsd32_shmctl( \ int shmid, int cmd, \ struct shmid_ds32_old *buf); } 230 AUE_SHMDT NOSTD|NOPROTO { int shmdt(void *shmaddr); } 231 AUE_SHMGET NOSTD|NOPROTO { int shmget(key_t key, int size, \ int shmflg); } ; 232 AUE_NULL STD { int freebsd32_clock_gettime(clockid_t clock_id, \ struct timespec32 *tp); } 233 AUE_CLOCK_SETTIME STD { int freebsd32_clock_settime(clockid_t clock_id, \ const struct timespec32 *tp); } 234 AUE_NULL STD { int freebsd32_clock_getres(clockid_t clock_id, \ struct timespec32 *tp); } 235 AUE_NULL STD { int freebsd32_ktimer_create(\ clockid_t clock_id, \ struct sigevent32 *evp, int *timerid); } 236 AUE_NULL NOPROTO { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int freebsd32_ktimer_settime(int timerid,\ int flags, \ const struct itimerspec32 *value, \ struct itimerspec32 *ovalue); } 238 AUE_NULL STD { int freebsd32_ktimer_gettime(int timerid,\ struct itimerspec32 *value); } 239 AUE_NULL NOPROTO { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int freebsd32_nanosleep( \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 241 AUE_NULL NOPROTO { int ffclock_getcounter(ffcounter *ffcount); } 242 AUE_NULL NOPROTO { int ffclock_setestimate( \ struct ffclock_estimate *cest); } 243 AUE_NULL NOPROTO { int ffclock_getestimate( \ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int freebsd32_clock_nanosleep( \ clockid_t clock_id, int flags, \ const struct timespec32 *rqtp, \ struct timespec32 *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int freebsd32_clock_getcpuclockid2(\ uint32_t id1, uint32_t id2,\ int which, clockid_t *clock_id); } 248 AUE_NULL UNIMPL ntp_gettime 249 AUE_NULL UNIMPL nosys 250 AUE_MINHERIT NOPROTO { int minherit(void *addr, size_t len, \ int inherit); } 251 AUE_RFORK NOPROTO { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID NOPROTO { int issetugid(void); } 254 AUE_LCHOWN NOPROTO { int lchown(char *path, int uid, int gid); } 255 AUE_AIO_READ STD { int freebsd32_aio_read( \ struct aiocb32 *aiocbp); } 256 AUE_AIO_WRITE STD { int freebsd32_aio_write( \ struct aiocb32 *aiocbp); } 257 AUE_LIO_LISTIO STD { int freebsd32_lio_listio(int mode, \ struct aiocb32 * const *acb_list, \ int nent, struct sigevent32 *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys 272 AUE_O_GETDENTS COMPAT11 { int freebsd32_getdents(int fd, char *buf, \ int count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD NOPROTO { int lchmod(char *path, mode_t mode); } 275 AUE_NULL OBSOL netbsd_lchown 276 AUE_LUTIMES STD { int freebsd32_lutimes(char *path, \ struct timeval32 *tptr); } 277 AUE_NULL OBSOL netbsd_msync 278 AUE_STAT COMPAT11|NOPROTO { int nstat(char *path, struct nstat *ub); } 279 AUE_FSTAT COMPAT11|NOPROTO { int nfstat(int fd, struct nstat *sb); } 280 AUE_LSTAT COMPAT11|NOPROTO { int nlstat(char *path, struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys 289 AUE_PREADV STD { ssize_t freebsd32_preadv(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 290 AUE_PWRITEV STD { ssize_t freebsd32_pwritev(int fd, \ struct iovec32 *iovp, \ u_int iovcnt, \ uint32_t offset1, uint32_t offset2); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys 297 AUE_FHSTATFS COMPAT4 { int freebsd32_fhstatfs( \ const struct fhandle *u_fhp, \ struct statfs32 *buf); } 298 AUE_FHOPEN NOPROTO { int fhopen(const struct fhandle *u_fhp, \ int flags); } 299 AUE_FHSTAT COMPAT11 { int freebsd32_fhstat( \ const struct fhandle *u_fhp, \ struct freebsd11_stat32 *sb); } ; syscall numbers for FreeBSD 300 AUE_NULL NOPROTO { int modnext(int modid); } 301 AUE_NULL STD { int freebsd32_modstat(int modid, \ struct module_stat32* stat); } 302 AUE_NULL NOPROTO { int modfnext(int modid); } 303 AUE_NULL NOPROTO { int modfind(const char *name); } 304 AUE_MODLOAD NOPROTO { int kldload(const char *file); } 305 AUE_MODUNLOAD NOPROTO { int kldunload(int fileid); } 306 AUE_NULL NOPROTO { int kldfind(const char *file); } 307 AUE_NULL NOPROTO { int kldnext(int fileid); } 308 AUE_NULL STD { int freebsd32_kldstat(int fileid, \ struct kld32_file_stat* stat); } 309 AUE_NULL NOPROTO { int kldfirstmod(int fileid); } 310 AUE_GETSID NOPROTO { int getsid(pid_t pid); } 311 AUE_SETRESUID NOPROTO { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID NOPROTO { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { int freebsd32_aio_return( \ struct aiocb32 *aiocbp); } 315 AUE_AIO_SUSPEND STD { int freebsd32_aio_suspend( \ struct aiocb32 * const * aiocbp, int nent, \ const struct timespec32 *timeout); } 316 AUE_AIO_CANCEL NOPROTO { int aio_cancel(int fd, \ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int freebsd32_aio_error( \ struct aiocb32 *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int freebsd32_aio_read( \ struct oaiocb32 *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int freebsd32_aio_write( \ struct oaiocb32 *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int freebsd32_lio_listio(int mode, \ struct oaiocb32 * const *acb_list, \ int nent, struct osigevent32 *sig); } 321 AUE_NULL NOPROTO { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL NOPROTO { int mlockall(int how); } 325 AUE_MUNLOCKALL NOPROTO { int munlockall(void); } 326 AUE_GETCWD NOPROTO { int __getcwd(char *buf, size_t buflen); } 327 AUE_NULL NOPROTO { int sched_setparam (pid_t pid, \ const struct sched_param *param); } 328 AUE_NULL NOPROTO { int sched_getparam (pid_t pid, \ struct sched_param *param); } 329 AUE_NULL NOPROTO { int sched_setscheduler (pid_t pid, \ int policy, \ const struct sched_param *param); } 330 AUE_NULL NOPROTO { int sched_getscheduler (pid_t pid); } 331 AUE_NULL NOPROTO { int sched_yield (void); } 332 AUE_NULL NOPROTO { int sched_get_priority_max (int policy); } 333 AUE_NULL NOPROTO { int sched_get_priority_min (int policy); } 334 AUE_NULL STD { int freebsd32_sched_rr_get_interval ( \ pid_t pid, \ struct timespec32 *interval); } 335 AUE_NULL NOPROTO { int utrace(const void *addr, size_t len); } 336 AUE_SENDFILE COMPAT4 { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 337 AUE_NULL NOPROTO { int kldsym(int fileid, int cmd, \ void *data); } 338 AUE_JAIL STD { int freebsd32_jail(struct jail32 *jail); } 339 AUE_NULL UNIMPL pioctl 340 AUE_SIGPROCMASK NOPROTO { int sigprocmask(int how, \ const sigset_t *set, sigset_t *oset); } 341 AUE_SIGSUSPEND NOPROTO { int sigsuspend(const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 343 AUE_SIGPENDING NOPROTO { int sigpending(sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int freebsd32_sigreturn( \ const struct freebsd4_freebsd32_ucontext *sigcntxp); } 345 AUE_SIGWAIT STD { int freebsd32_sigtimedwait(const sigset_t *set, \ siginfo_t *info, \ const struct timespec *timeout); } 346 AUE_NULL STD { int freebsd32_sigwaitinfo(const sigset_t *set, \ siginfo_t *info); } 347 AUE_ACL_GET_FILE NOPROTO { int __acl_get_file(const char *path, \ acl_type_t type, struct acl *aclp); } 348 AUE_ACL_SET_FILE NOPROTO { int __acl_set_file(const char *path, \ acl_type_t type, struct acl *aclp); } 349 AUE_ACL_GET_FD NOPROTO { int __acl_get_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 350 AUE_ACL_SET_FD NOPROTO { int __acl_set_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 351 AUE_ACL_DELETE_FILE NOPROTO { int __acl_delete_file(const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD NOPROTO { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE NOPROTO { int __acl_aclcheck_file(const char *path, \ acl_type_t type, struct acl *aclp); } 354 AUE_ACL_CHECK_FD NOPROTO { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, struct acl *aclp); } 355 AUE_EXTATTRCTL NOPROTO { int extattrctl(const char *path, int cmd, \ const char *filename, int attrnamespace, \ const char *attrname); } 356 AUE_EXTATTR_SET_FILE NOPROTO { ssize_t extattr_set_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE NOPROTO { ssize_t extattr_get_file( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE NOPROTO { int extattr_delete_file( \ const char *path, int attrnamespace, \ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { int freebsd32_aio_waitcomplete( \ struct aiocb32 **aiocbp, \ struct timespec32 *timeout); } 360 AUE_GETRESUID NOPROTO { int getresuid(uid_t *ruid, uid_t *euid, \ uid_t *suid); } 361 AUE_GETRESGID NOPROTO { int getresgid(gid_t *rgid, gid_t *egid, \ gid_t *sgid); } 362 AUE_KQUEUE NOPROTO { int kqueue(void); } 363 AUE_KEVENT COMPAT11 { int freebsd32_kevent(int fd, \ const struct kevent32_freebsd11 * \ changelist, \ int nchanges, \ struct kevent32_freebsd11 *eventlist, \ int nevents, \ const struct timespec32 *timeout); } -364 AUE_NULL UNIMPL __cap_get_proc -365 AUE_NULL UNIMPL __cap_set_proc -366 AUE_NULL UNIMPL __cap_get_fd -367 AUE_NULL UNIMPL __cap_get_file -368 AUE_NULL UNIMPL __cap_set_fd -369 AUE_NULL UNIMPL __cap_set_file +364 AUE_NULL OBSOL __cap_get_proc +365 AUE_NULL OBSOL __cap_set_proc +366 AUE_NULL OBSOL __cap_get_fd +367 AUE_NULL OBSOL __cap_get_file +368 AUE_NULL OBSOL __cap_set_fd +369 AUE_NULL OBSOL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD NOPROTO { ssize_t extattr_set_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 372 AUE_EXTATTR_GET_FD NOPROTO { ssize_t extattr_get_fd(int fd, \ int attrnamespace, const char *attrname, \ void *data, size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD NOPROTO { int extattr_delete_fd(int fd, \ int attrnamespace, \ const char *attrname); } 374 AUE_SETUGID NOPROTO { int __setugid(int flag); } -375 AUE_NULL UNIMPL nfsclnt +375 AUE_NULL OBSOL nfsclnt 376 AUE_EACCESS NOPROTO { int eaccess(char *path, int amode); } 377 AUE_NULL UNIMPL afs_syscall 378 AUE_NMOUNT STD { int freebsd32_nmount(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } -379 AUE_NULL UNIMPL kse_exit -380 AUE_NULL UNIMPL kse_wakeup -381 AUE_NULL UNIMPL kse_create -382 AUE_NULL UNIMPL kse_thr_interrupt -383 AUE_NULL UNIMPL kse_release +379 AUE_NULL OBSOL kse_exit +380 AUE_NULL OBSOL kse_wakeup +381 AUE_NULL OBSOL kse_create +382 AUE_NULL OBSOL kse_thr_interrupt +383 AUE_NULL OBSOL kse_release 384 AUE_NULL UNIMPL __mac_get_proc 385 AUE_NULL UNIMPL __mac_set_proc 386 AUE_NULL UNIMPL __mac_get_fd 387 AUE_NULL UNIMPL __mac_get_file 388 AUE_NULL UNIMPL __mac_set_fd 389 AUE_NULL UNIMPL __mac_set_file 390 AUE_NULL NOPROTO { int kenv(int what, const char *name, \ char *value, int len); } 391 AUE_LCHFLAGS NOPROTO { int lchflags(const char *path, \ u_long flags); } 392 AUE_NULL NOPROTO { int uuidgen(struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int freebsd32_sendfile(int fd, int s, \ uint32_t offset1, uint32_t offset2, \ size_t nbytes, struct sf_hdtr32 *hdtr, \ off_t *sbytes, int flags); } 394 AUE_NULL UNIMPL mac_syscall 395 AUE_GETFSSTAT COMPAT11|NOPROTO { int getfsstat( \ struct freebsd11_statfs *buf, \ long bufsize, int mode); } 396 AUE_STATFS COMPAT11|NOPROTO { int statfs(char *path, \ struct statfs *buf); } 397 AUE_FSTATFS COMPAT11|NOPROTO { int fstatfs(int fd, \ struct freebsd11_statfs *buf); } 398 AUE_FHSTATFS COMPAT11|NOPROTO { int fhstatfs( \ const struct fhandle *u_fhp, \ struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD|NOPROTO { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD|NOPROTO { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD|NOPROTO { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD|NOPROTO { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int freebsd32_ksem_init(semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int freebsd32_ksem_open(semid_t *idp, \ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD|NOPROTO { int ksem_unlink(const char *name); } 407 AUE_SEMGETVALUE NOSTD|NOPROTO { int ksem_getvalue(semid_t id, \ int *val); } 408 AUE_SEMDESTROY NOSTD|NOPROTO { int ksem_destroy(semid_t id); } 409 AUE_NULL UNIMPL __mac_get_pid 410 AUE_NULL UNIMPL __mac_get_link 411 AUE_NULL UNIMPL __mac_set_link 412 AUE_EXTATTR_SET_LINK NOPROTO { ssize_t extattr_set_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK NOPROTO { ssize_t extattr_get_link( \ const char *path, int attrnamespace, \ const char *attrname, void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK NOPROTO { int extattr_delete_link( \ const char *path, int attrnamespace, \ const char *attrname); } 415 AUE_NULL UNIMPL __mac_execve 416 AUE_SIGACTION STD { int freebsd32_sigaction(int sig, \ struct sigaction32 *act, \ struct sigaction32 *oact); } 417 AUE_SIGRETURN STD { int freebsd32_sigreturn( \ const struct freebsd32_ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int freebsd32_getcontext( \ struct freebsd32_ucontext *ucp); } 422 AUE_NULL STD { int freebsd32_setcontext( \ const struct freebsd32_ucontext *ucp); } 423 AUE_NULL STD { int freebsd32_swapcontext( \ struct freebsd32_ucontext *oucp, \ const struct freebsd32_ucontext *ucp); } 424 AUE_SWAPOFF UNIMPL swapoff 425 AUE_ACL_GET_LINK NOPROTO { int __acl_get_link(const char *path, \ acl_type_t type, struct acl *aclp); } 426 AUE_ACL_SET_LINK NOPROTO { int __acl_set_link(const char *path, \ acl_type_t type, struct acl *aclp); } 427 AUE_ACL_DELETE_LINK NOPROTO { int __acl_delete_link(const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK NOPROTO { int __acl_aclcheck_link(const char *path, \ acl_type_t type, struct acl *aclp); } 429 AUE_SIGWAIT NOPROTO { int sigwait(const sigset_t *set, \ int *sig); } 430 AUE_THR_CREATE UNIMPL thr_create; 431 AUE_THR_EXIT NOPROTO { void thr_exit(long *state); } 432 AUE_NULL NOPROTO { int thr_self(long *id); } 433 AUE_THR_KILL NOPROTO { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH NOPROTO { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD NOPROTO { ssize_t extattr_list_fd(int fd, \ int attrnamespace, void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE NOPROTO { ssize_t extattr_list_file( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK NOPROTO { ssize_t extattr_list_link( \ const char *path, int attrnamespace, \ void *data, size_t nbytes); } -440 AUE_NULL UNIMPL kse_switchin +440 AUE_NULL OBSOL kse_switchin 441 AUE_SEMWAIT NOSTD { int freebsd32_ksem_timedwait(semid_t id, \ const struct timespec32 *abstime); } 442 AUE_NULL STD { int freebsd32_thr_suspend( \ const struct timespec32 *timeout); } 443 AUE_NULL NOPROTO { int thr_wake(long id); } 444 AUE_MODUNLOAD NOPROTO { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT NOPROTO { int audit(const void *record, \ u_int length); } 446 AUE_AUDITON NOPROTO { int auditon(int cmd, void *data, \ u_int length); } 447 AUE_GETAUID NOPROTO { int getauid(uid_t *auid); } 448 AUE_SETAUID NOPROTO { int setauid(uid_t *auid); } 449 AUE_GETAUDIT NOPROTO { int getaudit(struct auditinfo *auditinfo); } 450 AUE_SETAUDIT NOPROTO { int setaudit(struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR NOPROTO { int getaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR NOPROTO { int setaudit_addr( \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL NOPROTO { int auditctl(char *path); } 454 AUE_NULL STD { int freebsd32_umtx_op(void *obj, int op,\ u_long val, void *uaddr, \ void *uaddr2); } 455 AUE_THR_NEW STD { int freebsd32_thr_new( \ struct thr_param32 *param, \ int param_size); } 456 AUE_NULL STD { int freebsd32_sigqueue(pid_t pid, \ int signum, int value); } 457 AUE_MQ_OPEN NOSTD { int freebsd32_kmq_open( \ const char *path, int flags, mode_t mode, \ const struct mq_attr32 *attr); } 458 AUE_MQ_SETATTR NOSTD { int freebsd32_kmq_setattr(int mqd, \ const struct mq_attr32 *attr, \ struct mq_attr32 *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int freebsd32_kmq_timedreceive(int mqd, \ char *msg_ptr, size_t msg_len, \ unsigned *msg_prio, \ const struct timespec32 *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int freebsd32_kmq_timedsend(int mqd, \ const char *msg_ptr, size_t msg_len,\ unsigned msg_prio, \ const struct timespec32 *abs_timeout);} 461 AUE_MQ_NOTIFY NOSTD { int freebsd32_kmq_notify(int mqd, \ const struct sigevent32 *sigev); } 462 AUE_MQ_UNLINK NOPROTO|NOSTD { int kmq_unlink(const char *path); } 463 AUE_NULL NOPROTO { int abort2(const char *why, int nargs, void **args); } 464 AUE_NULL NOPROTO { int thr_set_name(long id, const char *name); } 465 AUE_AIO_FSYNC STD { int freebsd32_aio_fsync(int op, \ struct aiocb32 *aiocbp); } 466 AUE_RTPRIO NOPROTO { int rtprio_thread(int function, \ lwpid_t lwpid, struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOPROTO|NOSTD { int sctp_peeloff(int sd, uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOPROTO|NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOPROTO|NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \ caddr_t to, __socklen_t tolen, \ struct sctp_sndrcvinfo *sinfo, int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOPROTO|NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \ struct sockaddr * from, __socklen_t *fromlenaddr, \ struct sctp_sndrcvinfo *sinfo, int *msg_flags); } #ifdef PAD64_REQUIRED 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ int pad, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ int pad, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ int pad, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ int pad, \ uint32_t length1, uint32_t length2); } #else 475 AUE_PREAD STD { ssize_t freebsd32_pread(int fd, \ void *buf,size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 476 AUE_PWRITE STD { ssize_t freebsd32_pwrite(int fd, \ const void *buf, size_t nbyte, \ uint32_t offset1, uint32_t offset2); } 477 AUE_MMAP STD { caddr_t freebsd32_mmap(caddr_t addr, \ size_t len, int prot, int flags, int fd, \ uint32_t pos1, uint32_t pos2); } 478 AUE_LSEEK STD { off_t freebsd32_lseek(int fd, \ uint32_t offset1, uint32_t offset2, \ int whence); } 479 AUE_TRUNCATE STD { int freebsd32_truncate(char *path, \ uint32_t length1, uint32_t length2); } 480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \ uint32_t length1, uint32_t length2); } #endif 481 AUE_THR_KILL2 NOPROTO { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN NOPROTO { int shm_open(const char *path, int flags, \ mode_t mode); } 483 AUE_SHMUNLINK NOPROTO { int shm_unlink(const char *path); } 484 AUE_NULL NOPROTO { int cpuset(cpusetid_t *setid); } #ifdef PAD64_REQUIRED 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ int pad, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #else 485 AUE_NULL STD { int freebsd32_cpuset_setid(cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t setid); } #endif 486 AUE_NULL STD { int freebsd32_cpuset_getid(cpulevel_t level, \ cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ cpusetid_t *setid); } 487 AUE_NULL STD { int freebsd32_cpuset_getaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ cpuset_t *mask); } 488 AUE_NULL STD { int freebsd32_cpuset_setaffinity( \ cpulevel_t level, cpuwhich_t which, \ uint32_t id1, uint32_t id2, \ size_t cpusetsize, \ const cpuset_t *mask); } 489 AUE_FACCESSAT NOPROTO { int faccessat(int fd, char *path, int amode, \ int flag); } 490 AUE_FCHMODAT NOPROTO { int fchmodat(int fd, const char *path, \ mode_t mode, int flag); } 491 AUE_FCHOWNAT NOPROTO { int fchownat(int fd, char *path, uid_t uid, \ gid_t gid, int flag); } 492 AUE_FEXECVE STD { int freebsd32_fexecve(int fd, \ uint32_t *argv, uint32_t *envv); } 493 AUE_FSTATAT COMPAT11 { int freebsd32_fstatat(int fd, \ char *path, struct freebsd11_stat32 *buf, \ int flag); } 494 AUE_FUTIMESAT STD { int freebsd32_futimesat(int fd, char *path, \ struct timeval *times); } 495 AUE_LINKAT NOPROTO { int linkat(int fd1, char *path1, int fd2, \ char *path2, int flag); } 496 AUE_MKDIRAT NOPROTO { int mkdirat(int fd, char *path, \ mode_t mode); } 497 AUE_MKFIFOAT NOPROTO { int mkfifoat(int fd, char *path, \ mode_t mode); } 498 AUE_MKNODAT COMPAT11 { int freebsd32_mknodat(int fd, char *path, \ mode_t mode, uint32_t dev); } 499 AUE_OPENAT_RWTC NOPROTO { int openat(int fd, char *path, int flag, \ mode_t mode); } 500 AUE_READLINKAT NOPROTO { int readlinkat(int fd, char *path, char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT NOPROTO { int renameat(int oldfd, char *old, int newfd, \ const char *new); } 502 AUE_SYMLINKAT NOPROTO { int symlinkat(char *path1, int fd, \ char *path2); } 503 AUE_UNLINKAT NOPROTO { int unlinkat(int fd, char *path, \ int flag); } 504 AUE_POSIX_OPENPT NOPROTO { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL UNIMPL gssd_syscall 506 AUE_JAIL_GET STD { int freebsd32_jail_get(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int freebsd32_jail_set(struct iovec32 *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE NOPROTO { int jail_remove(int jid); } 509 AUE_CLOSEFROM NOPROTO { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int freebsd32_semctl(int semid, int semnum, \ int cmd, union semun32 *arg); } 511 AUE_MSGCTL NOSTD { int freebsd32_msgctl(int msqid, int cmd, \ struct msqid_ds32 *buf); } 512 AUE_SHMCTL NOSTD { int freebsd32_shmctl(int shmid, int cmd, \ struct shmid_ds32 *buf); } 513 AUE_LPATHCONF NOPROTO { int lpathconf(char *path, int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET NOPROTO { int __cap_rights_get(int version, \ int fd, cap_rights_t *rightsp); } 516 AUE_CAP_ENTER NOPROTO { int cap_enter(void); } 517 AUE_CAP_GETMODE NOPROTO { int cap_getmode(u_int *modep); } 518 AUE_PDFORK NOPROTO { int pdfork(int *fdp, int flags); } 519 AUE_PDKILL NOPROTO { int pdkill(int fd, int signum); } 520 AUE_PDGETPID NOPROTO { int pdgetpid(int fd, pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int freebsd32_pselect(int nd, fd_set *in, \ fd_set *ou, fd_set *ex, \ const struct timespec32 *ts, \ const sigset_t *sm); } 523 AUE_GETLOGINCLASS NOPROTO { int getloginclass(char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS NOPROTO { int setloginclass(const char *namebuf); } 525 AUE_NULL NOPROTO { int rctl_get_racct(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 526 AUE_NULL NOPROTO { int rctl_get_rules(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 527 AUE_NULL NOPROTO { int rctl_get_limits(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 528 AUE_NULL NOPROTO { int rctl_add_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } 529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } #ifdef PAD64_REQUIRED 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ int pad, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, int pad, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #else 530 AUE_POSIX_FALLOCATE STD { int freebsd32_posix_fallocate(int fd,\ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2); } 531 AUE_POSIX_FADVISE STD { int freebsd32_posix_fadvise(int fd, \ uint32_t offset1, uint32_t offset2,\ uint32_t len1, uint32_t len2, \ int advice); } 532 AUE_WAIT6 STD { int freebsd32_wait6(int idtype, \ uint32_t id1, uint32_t id2, \ int *status, int options, \ struct wrusage32 *wrusage, \ siginfo_t *info); } #endif 533 AUE_CAP_RIGHTS_LIMIT NOPROTO { \ int cap_rights_limit(int fd, \ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { \ int freebsd32_cap_ioctls_limit(int fd, \ const uint32_t *cmds, size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { \ ssize_t freebsd32_cap_ioctls_get(int fd, \ uint32_t *cmds, size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT NOPROTO { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET NOPROTO { int cap_fcntls_get(int fd, \ uint32_t *fcntlrightsp); } 538 AUE_BINDAT NOPROTO { int bindat(int fd, int s, caddr_t name, \ int namelen); } 539 AUE_CONNECTAT NOPROTO { int connectat(int fd, int s, caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT NOPROTO { int chflagsat(int fd, const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT NOPROTO { int accept4(int s, \ struct sockaddr * __restrict name, \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE NOPROTO { int pipe2(int *fildes, int flags); } 543 AUE_AIO_MLOCK STD { int freebsd32_aio_mlock( \ struct aiocb32 *aiocbp); } #ifdef PAD64_REQUIRED 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, int pad, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #else 544 AUE_PROCCTL STD { int freebsd32_procctl(int idtype, \ uint32_t id1, uint32_t id2, int com, \ void *data); } #endif 545 AUE_POLL STD { int freebsd32_ppoll(struct pollfd *fds, \ u_int nfds, const struct timespec32 *ts, \ const sigset_t *set); } 546 AUE_FUTIMES STD { int freebsd32_futimens(int fd, \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int freebsd32_utimensat(int fd, \ char *path, \ struct timespec *times, int flag); } -548 AUE_NULL UNIMPL numa_getaffinity -549 AUE_NULL UNIMPL numa_setaffinity +548 AUE_NULL OBSOL numa_getaffinity +549 AUE_NULL OBSOL numa_setaffinity 550 AUE_FSYNC NOPROTO { int fdatasync(int fd); } 551 AUE_FSTAT STD { int freebsd32_fstat(int fd, \ struct stat32 *ub); } 552 AUE_FSTATAT STD { int freebsd32_fstatat(int fd, \ char *path, struct stat32 *buf, \ int flag); } 553 AUE_FHSTAT STD { int freebsd32_fhstat( \ const struct fhandle *u_fhp, \ struct stat32 *sb); } 554 AUE_GETDIRENTRIES NOPROTO { ssize_t getdirentries( \ int fd, char *buf, size_t count, \ off_t *basep); } 555 AUE_STATFS NOPROTO { int statfs(char *path, \ struct statfs32 *buf); } 556 AUE_FSTATFS NOPROTO { int fstatfs(int fd, struct statfs32 *buf); } 557 AUE_GETFSSTAT NOPROTO { int getfsstat(struct statfs32 *buf, \ long bufsize, int mode); } 558 AUE_FHSTATFS NOPROTO { int fhstatfs(const struct fhandle *u_fhp, \ struct statfs32 *buf); } 559 AUE_MKNODAT NOPROTO { int mknodat(int fd, char *path, mode_t mode, \ dev_t dev); } 560 AUE_KEVENT STD { int freebsd32_kevent(int fd, \ const struct kevent32 *changelist, \ int nchanges, \ struct kevent32 *eventlist, \ int nevents, \ const struct timespec32 *timeout); } 561 AUE_NULL STD { int freebsd32_cpuset_getdomain(cpulevel_t level, \ cpuwhich_t which, uint32_t id1, uint32_t id2, \ size_t domainsetsize, domainset_t *mask, \ int *policy); } 562 AUE_NULL STD { int freebsd32_cpuset_setdomain(cpulevel_t level, \ cpuwhich_t which, uint32_t id1, uint32_t id2, \ size_t domainsetsize, domainset_t *mask, \ int policy); } 563 AUE_NULL NOPROTO { int getrandom(void *buf, size_t buflen, \ unsigned int flags); } ; vim: syntax=off Index: projects/clang700-import/sys/conf/newvers.sh =================================================================== --- projects/clang700-import/sys/conf/newvers.sh (revision 339014) +++ projects/clang700-import/sys/conf/newvers.sh (revision 339015) @@ -1,318 +1,318 @@ #!/bin/sh - # # SPDX-License-Identifier: BSD-3-Clause # # Copyright (c) 1984, 1986, 1990, 1993 # The Regents of the University of California. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # 3. Neither the name of the University nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE # ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF # SUCH DAMAGE. # # @(#)newvers.sh 8.1 (Berkeley) 4/20/94 # $FreeBSD$ # Command line options: # # -r Reproducible build. Do not embed directory names, user # names, time stamps or other dynamic information into # the output file. This is intended to allow two builds # done at different times and even by different people on # different hosts to produce identical output. # # -R Reproducible build if the tree represents an unmodified # checkout from a version control system. Metadata is # included if the tree is modified. TYPE="FreeBSD" REVISION="12.0" -BRANCH="ALPHA7" +BRANCH="ALPHA8" if [ -n "${BRANCH_OVERRIDE}" ]; then BRANCH=${BRANCH_OVERRIDE} fi RELEASE="${REVISION}-${BRANCH}" VERSION="${TYPE} ${RELEASE}" # # findvcs dir # Looks up directory dir at world root and up the filesystem # findvcs() { local savedir savedir=$(pwd) cd ${SYSDIR}/.. while [ $(pwd) != "/" ]; do if [ -e "./$1" ]; then VCSTOP=$(pwd) VCSDIR=${VCSTOP}"/$1" cd ${savedir} return 0 fi cd .. done cd ${savedir} return 1 } if [ -z "${SYSDIR}" ]; then SYSDIR=$(dirname $0)/.. fi if [ -n "${PARAMFILE}" ]; then RELDATE=$(awk '/__FreeBSD_version.*propagated to newvers/ {print $3}' \ ${PARAMFILE}) else RELDATE=$(awk '/__FreeBSD_version.*propagated to newvers/ {print $3}' \ ${SYSDIR}/sys/param.h) fi b=share/examples/etc/bsd-style-copyright if [ -r "${SYSDIR}/../COPYRIGHT" ]; then year=$(sed -Ee '/^Copyright .* The FreeBSD Project/!d;s/^.*1992-([0-9]*) .*$/\1/g' ${SYSDIR}/../COPYRIGHT) else year=$(date +%Y) fi # look for copyright template for bsd_copyright in ../$b ../../$b ../../../$b /usr/src/$b /usr/$b do if [ -r "$bsd_copyright" ]; then COPYRIGHT=`sed \ -e "s/\[year\]/1992-$year/" \ -e 's/\[your name here\]\.* /The FreeBSD Project./' \ -e 's/\[your name\]\.*/The FreeBSD Project./' \ -e '/\[id for your version control system, if any\]/d' \ $bsd_copyright` break fi done # no copyright found, use a dummy if [ -z "$COPYRIGHT" ]; then COPYRIGHT="/*- * Copyright (c) 1992-$year The FreeBSD Project. * All rights reserved. * */" fi # add newline COPYRIGHT="$COPYRIGHT " # VARS_ONLY means no files should be generated, this is just being # included. if [ -n "$VARS_ONLY" ]; then return 0 fi LC_ALL=C; export LC_ALL if [ ! -r version ] then echo 0 > version fi touch version v=`cat version` u=${USER:-root} d=`pwd` h=${HOSTNAME:-`hostname`} if [ -n "$SOURCE_DATE_EPOCH" ]; then if ! t=`date -r $SOURCE_DATE_EPOCH 2>/dev/null`; then echo "Invalid SOURCE_DATE_EPOCH" >&2 exit 1 fi else t=`date` fi i=`${MAKE:-make} -V KERN_IDENT` compiler_v=$($(${MAKE:-make} -V CC) -v 2>&1 | grep -w 'version') for dir in /usr/bin /usr/local/bin; do if [ ! -z "${svnversion}" ] ; then break fi if [ -x "${dir}/svnversion" ] && [ -z ${svnversion} ] ; then # Run svnversion from ${dir} on this script; if return code # is not zero, the checkout might not be compatible with the # svnversion being used. ${dir}/svnversion $(realpath ${0}) >/dev/null 2>&1 if [ $? -eq 0 ]; then svnversion=${dir}/svnversion break fi fi done if [ -z "${svnversion}" ] && [ -x /usr/bin/svnliteversion ] ; then /usr/bin/svnliteversion $(realpath ${0}) >/dev/null 2>&1 if [ $? -eq 0 ]; then svnversion=/usr/bin/svnliteversion else svnversion= fi fi for dir in /usr/bin /usr/local/bin; do if [ -x "${dir}/p4" ] && [ -z ${p4_cmd} ] ; then p4_cmd=${dir}/p4 fi done if findvcs .git; then for dir in /usr/bin /usr/local/bin; do if [ -x "${dir}/git" ] ; then git_cmd="${dir}/git -c help.autocorrect=0 --git-dir=${VCSDIR}" break fi done fi if findvcs .hg; then for dir in /usr/bin /usr/local/bin; do if [ -x "${dir}/hg" ] ; then hg_cmd="${dir}/hg -R ${VCSDIR}" break fi done fi if [ -n "$svnversion" ] ; then svn=`cd ${SYSDIR} && $svnversion 2>/dev/null` case "$svn" in [0-9]*[MSP]|*:*) svn=" r${svn}" modified=true ;; [0-9]*) svn=" r${svn}" ;; *) unset svn ;; esac fi if [ -n "$git_cmd" ] ; then git=`$git_cmd rev-parse --verify --short HEAD 2>/dev/null` svn=`$git_cmd svn find-rev $git 2>/dev/null` if [ -n "$svn" ] ; then svn=" r${svn}" git="=${git}" else svn=`$git_cmd log --grep '^git-svn-id:' | \ grep '^ git-svn-id:' | head -1 | \ sed -n 's/^.*@\([0-9][0-9]*\).*$/\1/p'` if [ -z "$svn" ] ; then svn=`$git_cmd log --format='format:%N' | \ grep '^svn ' | head -1 | \ sed -n 's/^.*revision=\([0-9][0-9]*\).*$/\1/p'` fi if [ -n "$svn" ] ; then svn=" r${svn}" git="+${git}" else git=" ${git}" fi fi git_b=`$git_cmd rev-parse --abbrev-ref HEAD` if [ -n "$git_b" ] ; then git="${git}(${git_b})" fi if $git_cmd --work-tree=${VCSTOP} diff-index \ --name-only HEAD | read dummy; then git="${git}-dirty" modified=true fi fi if [ -n "$p4_cmd" ] ; then p4version=`cd ${SYSDIR} && $p4_cmd changes -m1 "./...#have" 2>&1 | \ awk '{ print $2 }'` case "$p4version" in [0-9]*) p4version=" ${p4version}" p4opened=`cd ${SYSDIR} && $p4_cmd opened ./... 2>&1` case "$p4opened" in File*) ;; //*) p4version="${p4version}+edit" modified=true ;; esac ;; *) unset p4version ;; esac fi if [ -n "$hg_cmd" ] ; then hg=`$hg_cmd id 2>/dev/null` svn=`$hg_cmd svn info 2>/dev/null | \ awk -F': ' '/Revision/ { print $2 }'` if [ -n "$svn" ] ; then svn=" r${svn}" fi if [ -n "$hg" ] ; then hg=" ${hg}" fi fi include_metadata=true while getopts rR opt; do case "$opt" in r) include_metadata= ;; R) if [ -z "${modified}" ]; then include_metadata= fi esac done shift $((OPTIND - 1)) if [ -z "${include_metadata}" ]; then VERINFO="${VERSION}${svn}${git}${hg}${p4version} ${i}" VERSTR="${VERINFO}\\n" else VERINFO="${VERSION} #${v}${svn}${git}${hg}${p4version}: ${t}" VERSTR="${VERINFO}\\n ${u}@${h}:${d}\\n" fi cat << EOF > vers.c $COPYRIGHT #define SCCSSTR "@(#)${VERINFO}" #define VERSTR "${VERSTR}" #define RELSTR "${RELEASE}" char sccs[sizeof(SCCSSTR) > 128 ? sizeof(SCCSSTR) : 128] = SCCSSTR; char version[sizeof(VERSTR) > 256 ? sizeof(VERSTR) : 256] = VERSTR; char compiler_version[] = "${compiler_v}"; char ostype[] = "${TYPE}"; char osrelease[sizeof(RELSTR) > 32 ? sizeof(RELSTR) : 32] = RELSTR; int osreldate = ${RELDATE}; char kern_ident[] = "${i}"; EOF echo $((v + 1)) > version Index: projects/clang700-import/sys/dev/pci/pci_user.c =================================================================== --- projects/clang700-import/sys/dev/pci/pci_user.c (revision 339014) +++ projects/clang700-import/sys/dev/pci/pci_user.c (revision 339015) @@ -1,1161 +1,1297 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (c) 1997, Stefan Esser * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice unmodified, this list of conditions, and the following * disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_bus.h" /* XXX trim includes */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "pcib_if.h" #include "pci_if.h" +#ifdef COMPAT_FREEBSD32 +struct pci_conf32 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + u_int8_t pc_hdr; /* PCI header type */ + u_int16_t pc_subvendor; /* card vendor ID */ + u_int16_t pc_subdevice; /* card device ID, assigned by + card vendor */ + u_int16_t pc_vendor; /* chip vendor ID */ + u_int16_t pc_device; /* chip device ID, assigned by + chip vendor */ + u_int8_t pc_class; /* chip PCI class */ + u_int8_t pc_subclass; /* chip PCI subclass */ + u_int8_t pc_progif; /* chip PCI programming interface */ + u_int8_t pc_revid; /* chip revision ID */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_int32_t pd_unit; /* device unit number */ +}; + +struct pci_match_conf32 { + struct pcisel pc_sel; /* domain+bus+slot+function */ + char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ + u_int32_t pd_unit; /* Unit number */ + u_int16_t pc_vendor; /* PCI Vendor ID */ + u_int16_t pc_device; /* PCI Device ID */ + u_int8_t pc_class; /* PCI class */ + u_int32_t flags; /* Matching expression */ +}; + +struct pci_conf_io32 { + u_int32_t pat_buf_len; /* pattern buffer length */ + u_int32_t num_patterns; /* number of patterns */ + u_int32_t patterns; /* struct pci_match_conf ptr */ + u_int32_t match_buf_len; /* match buffer length */ + u_int32_t num_matches; /* number of matches returned */ + u_int32_t matches; /* struct pci_conf ptr */ + u_int32_t offset; /* offset into device list */ + u_int32_t generation; /* device list generation */ + u_int32_t status; /* request status */ +}; + +#define PCIOCGETCONF32 _IOC_NEWTYPE(PCIOCGETCONF, struct pci_conf_io32) +#endif + /* * This is the user interface to PCI configuration space. */ static d_open_t pci_open; static d_close_t pci_close; static d_ioctl_t pci_ioctl; struct cdevsw pcicdev = { .d_version = D_VERSION, .d_flags = D_NEEDGIANT, .d_open = pci_open, .d_close = pci_close, .d_ioctl = pci_ioctl, .d_name = "pci", }; static int pci_open(struct cdev *dev, int oflags, int devtype, struct thread *td) { int error; if (oflags & FWRITE) { error = securelevel_gt(td->td_ucred, 0); if (error) return (error); } return (0); } static int pci_close(struct cdev *dev, int flag, int devtype, struct thread *td) { return 0; } /* * Match a single pci_conf structure against an array of pci_match_conf * structures. The first argument, 'matches', is an array of num_matches * pci_match_conf structures. match_buf is a pointer to the pci_conf * structure that will be compared to every entry in the matches array. * This function returns 1 on failure, 0 on success. */ static int pci_conf_match_native(struct pci_match_conf *matches, int num_matches, struct pci_conf *match_buf) { int i; if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0)) return(1); for (i = 0; i < num_matches; i++) { /* * I'm not sure why someone would do this...but... */ if (matches[i].flags == PCI_GETCONF_NO_MATCH) continue; /* * Look at each of the match flags. If it's set, do the * comparison. If the comparison fails, we don't have a * match, go on to the next item if there is one. */ if (((matches[i].flags & PCI_GETCONF_MATCH_DOMAIN) != 0) && (match_buf->pc_sel.pc_domain != matches[i].pc_sel.pc_domain)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_BUS) != 0) && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEV) != 0) && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC) != 0) && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR) != 0) && (match_buf->pc_vendor != matches[i].pc_vendor)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE) != 0) && (match_buf->pc_device != matches[i].pc_device)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS) != 0) && (match_buf->pc_class != matches[i].pc_class)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT) != 0) && (match_buf->pd_unit != matches[i].pd_unit)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_NAME) != 0) && (strncmp(matches[i].pd_name, match_buf->pd_name, sizeof(match_buf->pd_name)) != 0)) continue; return(0); } return(1); } +#ifdef COMPAT_FREEBSD32 +static int +pci_conf_match32(struct pci_match_conf32 *matches, int num_matches, + struct pci_conf *match_buf) +{ + int i; + + if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0)) + return(1); + + for (i = 0; i < num_matches; i++) { + /* + * I'm not sure why someone would do this...but... + */ + if (matches[i].flags == PCI_GETCONF_NO_MATCH) + continue; + + /* + * Look at each of the match flags. If it's set, do the + * comparison. If the comparison fails, we don't have a + * match, go on to the next item if there is one. + */ + if (((matches[i].flags & PCI_GETCONF_MATCH_DOMAIN) != 0) + && (match_buf->pc_sel.pc_domain != + matches[i].pc_sel.pc_domain)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_BUS) != 0) + && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_DEV) != 0) + && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC) != 0) + && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR) != 0) + && (match_buf->pc_vendor != matches[i].pc_vendor)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE) != 0) + && (match_buf->pc_device != matches[i].pc_device)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS) != 0) + && (match_buf->pc_class != matches[i].pc_class)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT) != 0) + && (match_buf->pd_unit != matches[i].pd_unit)) + continue; + + if (((matches[i].flags & PCI_GETCONF_MATCH_NAME) != 0) + && (strncmp(matches[i].pd_name, match_buf->pd_name, + sizeof(match_buf->pd_name)) != 0)) + continue; + + return(0); + } + + return(1); +} +#endif /* COMPAT_FREEBSD32 */ + #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) #define PRE7_COMPAT typedef enum { PCI_GETCONF_NO_MATCH_OLD = 0x00, PCI_GETCONF_MATCH_BUS_OLD = 0x01, PCI_GETCONF_MATCH_DEV_OLD = 0x02, PCI_GETCONF_MATCH_FUNC_OLD = 0x04, PCI_GETCONF_MATCH_NAME_OLD = 0x08, PCI_GETCONF_MATCH_UNIT_OLD = 0x10, PCI_GETCONF_MATCH_VENDOR_OLD = 0x20, PCI_GETCONF_MATCH_DEVICE_OLD = 0x40, PCI_GETCONF_MATCH_CLASS_OLD = 0x80 } pci_getconf_flags_old; struct pcisel_old { u_int8_t pc_bus; /* bus number */ u_int8_t pc_dev; /* device on this bus */ u_int8_t pc_func; /* function on this device */ }; struct pci_conf_old { struct pcisel_old pc_sel; /* bus+slot+function */ u_int8_t pc_hdr; /* PCI header type */ u_int16_t pc_subvendor; /* card vendor ID */ u_int16_t pc_subdevice; /* card device ID, assigned by card vendor */ u_int16_t pc_vendor; /* chip vendor ID */ u_int16_t pc_device; /* chip device ID, assigned by chip vendor */ u_int8_t pc_class; /* chip PCI class */ u_int8_t pc_subclass; /* chip PCI subclass */ u_int8_t pc_progif; /* chip PCI programming interface */ u_int8_t pc_revid; /* chip revision ID */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ u_long pd_unit; /* device unit number */ }; struct pci_match_conf_old { struct pcisel_old pc_sel; /* bus+slot+function */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ u_long pd_unit; /* Unit number */ u_int16_t pc_vendor; /* PCI Vendor ID */ u_int16_t pc_device; /* PCI Device ID */ u_int8_t pc_class; /* PCI class */ pci_getconf_flags_old flags; /* Matching expression */ }; struct pci_io_old { struct pcisel_old pi_sel; /* device to operate on */ int pi_reg; /* configuration register to examine */ int pi_width; /* width (in bytes) of read or write */ u_int32_t pi_data; /* data to write or result of read */ }; #ifdef COMPAT_FREEBSD32 struct pci_conf_old32 { struct pcisel_old pc_sel; /* bus+slot+function */ uint8_t pc_hdr; /* PCI header type */ uint16_t pc_subvendor; /* card vendor ID */ uint16_t pc_subdevice; /* card device ID, assigned by card vendor */ uint16_t pc_vendor; /* chip vendor ID */ uint16_t pc_device; /* chip device ID, assigned by chip vendor */ uint8_t pc_class; /* chip PCI class */ uint8_t pc_subclass; /* chip PCI subclass */ uint8_t pc_progif; /* chip PCI programming interface */ uint8_t pc_revid; /* chip revision ID */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ uint32_t pd_unit; /* device unit number (u_long) */ }; struct pci_match_conf_old32 { struct pcisel_old pc_sel; /* bus+slot+function */ char pd_name[PCI_MAXNAMELEN + 1]; /* device name */ uint32_t pd_unit; /* Unit number (u_long) */ uint16_t pc_vendor; /* PCI Vendor ID */ uint16_t pc_device; /* PCI Device ID */ uint8_t pc_class; /* PCI class */ pci_getconf_flags_old flags; /* Matching expression */ }; -struct pci_conf_io32 { - uint32_t pat_buf_len; /* pattern buffer length */ - uint32_t num_patterns; /* number of patterns */ - uint32_t patterns; /* pattern buffer - (struct pci_match_conf_old32 *) */ - uint32_t match_buf_len; /* match buffer length */ - uint32_t num_matches; /* number of matches returned */ - uint32_t matches; /* match buffer - (struct pci_conf_old32 *) */ - uint32_t offset; /* offset into device list */ - uint32_t generation; /* device list generation */ - pci_getconf_status status; /* request status */ -}; - #define PCIOCGETCONF_OLD32 _IOWR('p', 1, struct pci_conf_io32) #endif /* COMPAT_FREEBSD32 */ #define PCIOCGETCONF_OLD _IOWR('p', 1, struct pci_conf_io) #define PCIOCREAD_OLD _IOWR('p', 2, struct pci_io_old) #define PCIOCWRITE_OLD _IOWR('p', 3, struct pci_io_old) static int pci_conf_match_old(struct pci_match_conf_old *matches, int num_matches, struct pci_conf *match_buf) { int i; if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0)) return(1); for (i = 0; i < num_matches; i++) { if (match_buf->pc_sel.pc_domain != 0) continue; /* * I'm not sure why someone would do this...but... */ if (matches[i].flags == PCI_GETCONF_NO_MATCH_OLD) continue; /* * Look at each of the match flags. If it's set, do the * comparison. If the comparison fails, we don't have a * match, go on to the next item if there is one. */ if (((matches[i].flags & PCI_GETCONF_MATCH_BUS_OLD) != 0) && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEV_OLD) != 0) && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC_OLD) != 0) && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR_OLD) != 0) && (match_buf->pc_vendor != matches[i].pc_vendor)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE_OLD) != 0) && (match_buf->pc_device != matches[i].pc_device)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS_OLD) != 0) && (match_buf->pc_class != matches[i].pc_class)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT_OLD) != 0) && (match_buf->pd_unit != matches[i].pd_unit)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_NAME_OLD) != 0) && (strncmp(matches[i].pd_name, match_buf->pd_name, sizeof(match_buf->pd_name)) != 0)) continue; return(0); } return(1); } #ifdef COMPAT_FREEBSD32 static int pci_conf_match_old32(struct pci_match_conf_old32 *matches, int num_matches, struct pci_conf *match_buf) { int i; if ((matches == NULL) || (match_buf == NULL) || (num_matches <= 0)) return(1); for (i = 0; i < num_matches; i++) { if (match_buf->pc_sel.pc_domain != 0) continue; /* * I'm not sure why someone would do this...but... */ if (matches[i].flags == PCI_GETCONF_NO_MATCH_OLD) continue; /* * Look at each of the match flags. If it's set, do the * comparison. If the comparison fails, we don't have a * match, go on to the next item if there is one. */ if (((matches[i].flags & PCI_GETCONF_MATCH_BUS_OLD) != 0) && (match_buf->pc_sel.pc_bus != matches[i].pc_sel.pc_bus)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEV_OLD) != 0) && (match_buf->pc_sel.pc_dev != matches[i].pc_sel.pc_dev)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_FUNC_OLD) != 0) && (match_buf->pc_sel.pc_func != matches[i].pc_sel.pc_func)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_VENDOR_OLD) != 0) && (match_buf->pc_vendor != matches[i].pc_vendor)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_DEVICE_OLD) != 0) && (match_buf->pc_device != matches[i].pc_device)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_CLASS_OLD) != 0) && (match_buf->pc_class != matches[i].pc_class)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_UNIT_OLD) != 0) && ((u_int32_t)match_buf->pd_unit != matches[i].pd_unit)) continue; if (((matches[i].flags & PCI_GETCONF_MATCH_NAME_OLD) != 0) && (strncmp(matches[i].pd_name, match_buf->pd_name, sizeof(match_buf->pd_name)) != 0)) continue; return (0); } return (1); } #endif /* COMPAT_FREEBSD32 */ #endif /* !PRE7_COMPAT */ union pci_conf_union { struct pci_conf pc; +#ifdef COMPAT_FREEBSD32 + struct pci_conf32 pc32; +#endif #ifdef PRE7_COMPAT struct pci_conf_old pco; #ifdef COMPAT_FREEBSD32 struct pci_conf_old32 pco32; #endif #endif }; static int pci_conf_match(u_long cmd, struct pci_match_conf *matches, int num_matches, struct pci_conf *match_buf) { switch (cmd) { case PCIOCGETCONF: return (pci_conf_match_native( (struct pci_match_conf *)matches, num_matches, match_buf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (pci_conf_match32((struct pci_match_conf32 *)matches, + num_matches, match_buf)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (pci_conf_match_old( (struct pci_match_conf_old *)matches, num_matches, match_buf)); #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: return (pci_conf_match_old32( (struct pci_match_conf_old32 *)matches, num_matches, match_buf)); #endif #endif default: /* programmer error */ return (0); } } /* * Like PVE_NEXT but takes an explicit length since 'pve' is a user * pointer that cannot be dereferenced. */ #define PVE_NEXT_LEN(pve, datalen) \ ((struct pci_vpd_element *)((char *)(pve) + \ sizeof(struct pci_vpd_element) + (datalen))) static int pci_list_vpd(device_t dev, struct pci_list_vpd_io *lvio) { struct pci_vpd_element vpd_element, *vpd_user; struct pcicfg_vpd *vpd; size_t len; int error, i; vpd = pci_fetch_vpd_list(dev); if (vpd->vpd_reg == 0 || vpd->vpd_ident == NULL) return (ENXIO); /* * Calculate the amount of space needed in the data buffer. An * identifier element is always present followed by the read-only * and read-write keywords. */ len = sizeof(struct pci_vpd_element) + strlen(vpd->vpd_ident); for (i = 0; i < vpd->vpd_rocnt; i++) len += sizeof(struct pci_vpd_element) + vpd->vpd_ros[i].len; for (i = 0; i < vpd->vpd_wcnt; i++) len += sizeof(struct pci_vpd_element) + vpd->vpd_w[i].len; if (lvio->plvi_len == 0) { lvio->plvi_len = len; return (0); } if (lvio->plvi_len < len) { lvio->plvi_len = len; return (ENOMEM); } /* * Copyout the identifier string followed by each keyword and * value. */ vpd_user = lvio->plvi_data; vpd_element.pve_keyword[0] = '\0'; vpd_element.pve_keyword[1] = '\0'; vpd_element.pve_flags = PVE_FLAG_IDENT; vpd_element.pve_datalen = strlen(vpd->vpd_ident); error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_ident, vpd_user->pve_data, strlen(vpd->vpd_ident)); if (error) return (error); vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen); vpd_element.pve_flags = 0; for (i = 0; i < vpd->vpd_rocnt; i++) { vpd_element.pve_keyword[0] = vpd->vpd_ros[i].keyword[0]; vpd_element.pve_keyword[1] = vpd->vpd_ros[i].keyword[1]; vpd_element.pve_datalen = vpd->vpd_ros[i].len; error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_ros[i].value, vpd_user->pve_data, vpd->vpd_ros[i].len); if (error) return (error); vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen); } vpd_element.pve_flags = PVE_FLAG_RW; for (i = 0; i < vpd->vpd_wcnt; i++) { vpd_element.pve_keyword[0] = vpd->vpd_w[i].keyword[0]; vpd_element.pve_keyword[1] = vpd->vpd_w[i].keyword[1]; vpd_element.pve_datalen = vpd->vpd_w[i].len; error = copyout(&vpd_element, vpd_user, sizeof(vpd_element)); if (error) return (error); error = copyout(vpd->vpd_w[i].value, vpd_user->pve_data, vpd->vpd_w[i].len); if (error) return (error); vpd_user = PVE_NEXT_LEN(vpd_user, vpd_element.pve_datalen); } KASSERT((char *)vpd_user - (char *)lvio->plvi_data == len, ("length mismatch")); lvio->plvi_len = len; return (0); } static size_t pci_match_conf_size(u_long cmd) { switch (cmd) { case PCIOCGETCONF: return (sizeof(struct pci_match_conf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (sizeof(struct pci_match_conf32)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (sizeof(struct pci_match_conf_old)); #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: return (sizeof(struct pci_match_conf_old32)); #endif #endif default: /* programmer error */ return (0); } } static size_t pci_conf_size(u_long cmd) { switch (cmd) { case PCIOCGETCONF: return (sizeof(struct pci_conf)); +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + return (sizeof(struct pci_conf32)); +#endif #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: return (sizeof(struct pci_conf_old)); #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: return (sizeof(struct pci_conf_old32)); #endif #endif default: /* programmer error */ return (0); } } static void pci_conf_io_init(struct pci_conf_io *cio, caddr_t data, u_long cmd) { -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#if defined(COMPAT_FREEBSD32) struct pci_conf_io32 *cio32; #endif switch (cmd) { case PCIOCGETCONF: #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: #endif *cio = *(struct pci_conf_io *)data; return; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: +#ifdef PRE7_COMPAT case PCIOCGETCONF_OLD32: +#endif cio32 = (struct pci_conf_io32 *)data; cio->pat_buf_len = cio32->pat_buf_len; cio->num_patterns = cio32->num_patterns; cio->patterns = (void *)(uintptr_t)cio32->patterns; cio->match_buf_len = cio32->match_buf_len; cio->num_matches = cio32->num_matches; cio->matches = (void *)(uintptr_t)cio32->matches; cio->offset = cio32->offset; cio->generation = cio32->generation; cio->status = cio32->status; return; #endif default: /* programmer error */ return; } } static void pci_conf_io_update_data(const struct pci_conf_io *cio, caddr_t data, u_long cmd) { struct pci_conf_io *d_cio; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#if defined(COMPAT_FREEBSD32) struct pci_conf_io32 *cio32; #endif switch (cmd) { case PCIOCGETCONF: #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: #endif d_cio = (struct pci_conf_io *)data; d_cio->status = cio->status; d_cio->generation = cio->generation; d_cio->offset = cio->offset; d_cio->num_matches = cio->num_matches; return; -#if defined(PRE7_COMPAT) && defined(COMPAT_FREEBSD32) +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: +#ifdef PRE7_COMPAT case PCIOCGETCONF_OLD32: +#endif cio32 = (struct pci_conf_io32 *)data; cio32->status = cio->status; cio32->generation = cio->generation; cio32->offset = cio->offset; cio32->num_matches = cio->num_matches; return; #endif default: /* programmer error */ return; } } static void pci_conf_for_copyout(const struct pci_conf *pcp, union pci_conf_union *pcup, u_long cmd) { memset(pcup, 0, sizeof(*pcup)); switch (cmd) { case PCIOCGETCONF: pcup->pc = *pcp; return; + +#ifdef COMPAT_FREEBSD32 + case PCIOCGETCONF32: + pcup->pc32.pc_sel = pcp->pc_sel; + pcup->pc32.pc_hdr = pcp->pc_hdr; + pcup->pc32.pc_subvendor = pcp->pc_subvendor; + pcup->pc32.pc_subdevice = pcp->pc_subdevice; + pcup->pc32.pc_vendor = pcp->pc_vendor; + pcup->pc32.pc_device = pcp->pc_device; + pcup->pc32.pc_class = pcp->pc_class; + pcup->pc32.pc_subclass = pcp->pc_subclass; + pcup->pc32.pc_progif = pcp->pc_progif; + pcup->pc32.pc_revid = pcp->pc_revid; + strlcpy(pcup->pc32.pd_name, pcp->pd_name, + sizeof(pcup->pc32.pd_name)); + pcup->pc32.pd_unit = (uint32_t)pcp->pd_unit; + return; +#endif #ifdef PRE7_COMPAT #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: pcup->pco32.pc_sel.pc_bus = pcp->pc_sel.pc_bus; pcup->pco32.pc_sel.pc_dev = pcp->pc_sel.pc_dev; pcup->pco32.pc_sel.pc_func = pcp->pc_sel.pc_func; pcup->pco32.pc_hdr = pcp->pc_hdr; pcup->pco32.pc_subvendor = pcp->pc_subvendor; pcup->pco32.pc_subdevice = pcp->pc_subdevice; pcup->pco32.pc_vendor = pcp->pc_vendor; pcup->pco32.pc_device = pcp->pc_device; pcup->pco32.pc_class = pcp->pc_class; pcup->pco32.pc_subclass = pcp->pc_subclass; pcup->pco32.pc_progif = pcp->pc_progif; pcup->pco32.pc_revid = pcp->pc_revid; strlcpy(pcup->pco32.pd_name, pcp->pd_name, sizeof(pcup->pco32.pd_name)); pcup->pco32.pd_unit = (uint32_t)pcp->pd_unit; return; #endif /* COMPAT_FREEBSD32 */ case PCIOCGETCONF_OLD: pcup->pco.pc_sel.pc_bus = pcp->pc_sel.pc_bus; pcup->pco.pc_sel.pc_dev = pcp->pc_sel.pc_dev; pcup->pco.pc_sel.pc_func = pcp->pc_sel.pc_func; pcup->pco.pc_hdr = pcp->pc_hdr; pcup->pco.pc_subvendor = pcp->pc_subvendor; pcup->pco.pc_subdevice = pcp->pc_subdevice; pcup->pco.pc_vendor = pcp->pc_vendor; pcup->pco.pc_device = pcp->pc_device; pcup->pco.pc_class = pcp->pc_class; pcup->pco.pc_subclass = pcp->pc_subclass; pcup->pco.pc_progif = pcp->pc_progif; pcup->pco.pc_revid = pcp->pc_revid; strlcpy(pcup->pco.pd_name, pcp->pd_name, sizeof(pcup->pco.pd_name)); pcup->pco.pd_unit = pcp->pd_unit; return; #endif /* PRE7_COMPAT */ default: /* programmer error */ return; } } static int pci_bar_mmap(device_t pcidev, struct pci_bar_mmap *pbm) { vm_map_t map; vm_object_t obj; struct thread *td; struct sglist *sg; struct pci_map *pm; vm_paddr_t pbase; vm_size_t plen; vm_offset_t addr; vm_prot_t prot; int error, flags; td = curthread; map = &td->td_proc->p_vmspace->vm_map; if ((pbm->pbm_flags & ~(PCIIO_BAR_MMAP_FIXED | PCIIO_BAR_MMAP_EXCL | PCIIO_BAR_MMAP_RW | PCIIO_BAR_MMAP_ACTIVATE)) != 0 || pbm->pbm_memattr != (vm_memattr_t)pbm->pbm_memattr || !pmap_is_valid_memattr(map->pmap, pbm->pbm_memattr)) return (EINVAL); /* Fetch the BAR physical base and length. */ pm = pci_find_bar(pcidev, pbm->pbm_reg); if (pm == NULL) return (EINVAL); if (!pci_bar_enabled(pcidev, pm)) return (EBUSY); /* XXXKIB enable if _ACTIVATE */ if (!PCI_BAR_MEM(pm->pm_value)) return (EIO); pbase = trunc_page(pm->pm_value); plen = round_page(pm->pm_value + ((pci_addr_t)1 << pm->pm_size)) - pbase; prot = VM_PROT_READ | (((pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0) ? VM_PROT_WRITE : 0); /* Create vm structures and mmap. */ sg = sglist_alloc(1, M_WAITOK); error = sglist_append_phys(sg, pbase, plen); if (error != 0) goto out; obj = vm_pager_allocate(OBJT_SG, sg, plen, prot, 0, td->td_ucred); if (obj == NULL) { error = EIO; goto out; } obj->memattr = pbm->pbm_memattr; flags = MAP_SHARED; addr = 0; if ((pbm->pbm_flags & PCIIO_BAR_MMAP_FIXED) != 0) { addr = (uintptr_t)pbm->pbm_map_base; flags |= MAP_FIXED; } if ((pbm->pbm_flags & PCIIO_BAR_MMAP_EXCL) != 0) flags |= MAP_CHECK_EXCL; error = vm_mmap_object(map, &addr, plen, prot, prot, flags, obj, 0, FALSE, td); if (error != 0) { vm_object_deallocate(obj); goto out; } pbm->pbm_map_base = (void *)addr; pbm->pbm_map_length = plen; pbm->pbm_bar_off = pm->pm_value - pbase; pbm->pbm_bar_length = (pci_addr_t)1 << pm->pm_size; out: sglist_free(sg); return (error); } static int pci_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { device_t pcidev; const char *name; struct devlist *devlist_head; struct pci_conf_io *cio = NULL; struct pci_devinfo *dinfo; struct pci_io *io; struct pci_bar_io *bio; struct pci_list_vpd_io *lvio; struct pci_match_conf *pattern_buf; struct pci_map *pm; struct pci_bar_mmap *pbm; size_t confsz, iolen; int error, ionum, i, num_patterns; union pci_conf_union pcu; #ifdef PRE7_COMPAT struct pci_io iodata; struct pci_io_old *io_old; io_old = NULL; #endif if (!(flag & FWRITE)) { switch (cmd) { case PCIOCGETCONF: #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: #endif #endif case PCIOCGETBAR: case PCIOCLISTVPD: break; default: return (EPERM); } } switch (cmd) { case PCIOCGETCONF: #ifdef PRE7_COMPAT case PCIOCGETCONF_OLD: #ifdef COMPAT_FREEBSD32 case PCIOCGETCONF_OLD32: #endif #endif cio = malloc(sizeof(struct pci_conf_io), M_TEMP, M_WAITOK | M_ZERO); pci_conf_io_init(cio, data, cmd); pattern_buf = NULL; num_patterns = 0; dinfo = NULL; cio->num_matches = 0; /* * If the user specified an offset into the device list, * but the list has changed since they last called this * ioctl, tell them that the list has changed. They will * have to get the list from the beginning. */ if ((cio->offset != 0) && (cio->generation != pci_generation)){ cio->status = PCI_GETCONF_LIST_CHANGED; error = 0; goto getconfexit; } /* * Check to see whether the user has asked for an offset * past the end of our list. */ if (cio->offset >= pci_numdevs) { cio->status = PCI_GETCONF_LAST_DEVICE; error = 0; goto getconfexit; } /* get the head of the device queue */ devlist_head = &pci_devq; /* * Determine how much room we have for pci_conf structures. * Round the user's buffer size down to the nearest * multiple of sizeof(struct pci_conf) in case the user * didn't specify a multiple of that size. */ confsz = pci_conf_size(cmd); iolen = min(cio->match_buf_len - (cio->match_buf_len % confsz), pci_numdevs * confsz); /* * Since we know that iolen is a multiple of the size of * the pciconf union, it's okay to do this. */ ionum = iolen / confsz; /* * If this test is true, the user wants the pci_conf * structures returned to match the supplied entries. */ if ((cio->num_patterns > 0) && (cio->num_patterns < pci_numdevs) && (cio->pat_buf_len > 0)) { /* * pat_buf_len needs to be: * num_patterns * sizeof(struct pci_match_conf) * While it is certainly possible the user just * allocated a large buffer, but set the number of * matches correctly, it is far more likely that * their kernel doesn't match the userland utility * they're using. It's also possible that the user * forgot to initialize some variables. Yes, this * may be overly picky, but I hazard to guess that * it's far more likely to just catch folks that * updated their kernel but not their userland. */ if (cio->num_patterns * pci_match_conf_size(cmd) != cio->pat_buf_len) { /* The user made a mistake, return an error. */ cio->status = PCI_GETCONF_ERROR; error = EINVAL; goto getconfexit; } /* * Allocate a buffer to hold the patterns. */ pattern_buf = malloc(cio->pat_buf_len, M_TEMP, M_WAITOK); error = copyin(cio->patterns, pattern_buf, cio->pat_buf_len); if (error != 0) { error = EINVAL; goto getconfexit; } num_patterns = cio->num_patterns; } else if ((cio->num_patterns > 0) || (cio->pat_buf_len > 0)) { /* * The user made a mistake, spit out an error. */ cio->status = PCI_GETCONF_ERROR; error = EINVAL; goto getconfexit; } /* * Go through the list of devices and copy out the devices * that match the user's criteria. */ for (cio->num_matches = 0, i = 0, dinfo = STAILQ_FIRST(devlist_head); dinfo != NULL; dinfo = STAILQ_NEXT(dinfo, pci_links), i++) { if (i < cio->offset) continue; /* Populate pd_name and pd_unit */ name = NULL; if (dinfo->cfg.dev) name = device_get_name(dinfo->cfg.dev); if (name) { strncpy(dinfo->conf.pd_name, name, sizeof(dinfo->conf.pd_name)); dinfo->conf.pd_name[PCI_MAXNAMELEN] = 0; dinfo->conf.pd_unit = device_get_unit(dinfo->cfg.dev); } else { dinfo->conf.pd_name[0] = '\0'; dinfo->conf.pd_unit = 0; } if (pattern_buf == NULL || pci_conf_match(cmd, pattern_buf, num_patterns, &dinfo->conf) == 0) { /* * If we've filled up the user's buffer, * break out at this point. Since we've * got a match here, we'll pick right back * up at the matching entry. We can also * tell the user that there are more matches * left. */ if (cio->num_matches >= ionum) { error = 0; break; } pci_conf_for_copyout(&dinfo->conf, &pcu, cmd); error = copyout(&pcu, (caddr_t)cio->matches + confsz * cio->num_matches, confsz); if (error) break; cio->num_matches++; } } /* * Set the pointer into the list, so if the user is getting * n records at a time, where n < pci_numdevs, */ cio->offset = i; /* * Set the generation, the user will need this if they make * another ioctl call with offset != 0. */ cio->generation = pci_generation; /* * If this is the last device, inform the user so he won't * bother asking for more devices. If dinfo isn't NULL, we * know that there are more matches in the list because of * the way the traversal is done. */ if (dinfo == NULL) cio->status = PCI_GETCONF_LAST_DEVICE; else cio->status = PCI_GETCONF_MORE_DEVS; getconfexit: pci_conf_io_update_data(cio, data, cmd); free(cio, M_TEMP); free(pattern_buf, M_TEMP); break; #ifdef PRE7_COMPAT case PCIOCREAD_OLD: case PCIOCWRITE_OLD: io_old = (struct pci_io_old *)data; iodata.pi_sel.pc_domain = 0; iodata.pi_sel.pc_bus = io_old->pi_sel.pc_bus; iodata.pi_sel.pc_dev = io_old->pi_sel.pc_dev; iodata.pi_sel.pc_func = io_old->pi_sel.pc_func; iodata.pi_reg = io_old->pi_reg; iodata.pi_width = io_old->pi_width; iodata.pi_data = io_old->pi_data; data = (caddr_t)&iodata; /* FALLTHROUGH */ #endif case PCIOCREAD: case PCIOCWRITE: io = (struct pci_io *)data; switch(io->pi_width) { case 4: case 2: case 1: /* Make sure register is not negative and aligned. */ if (io->pi_reg < 0 || io->pi_reg & (io->pi_width - 1)) { error = EINVAL; break; } /* * Assume that the user-level bus number is * in fact the physical PCI bus number. * Look up the grandparent, i.e. the bridge device, * so that we can issue configuration space cycles. */ pcidev = pci_find_dbsf(io->pi_sel.pc_domain, io->pi_sel.pc_bus, io->pi_sel.pc_dev, io->pi_sel.pc_func); if (pcidev) { #ifdef PRE7_COMPAT if (cmd == PCIOCWRITE || cmd == PCIOCWRITE_OLD) #else if (cmd == PCIOCWRITE) #endif pci_write_config(pcidev, io->pi_reg, io->pi_data, io->pi_width); #ifdef PRE7_COMPAT else if (cmd == PCIOCREAD_OLD) io_old->pi_data = pci_read_config(pcidev, io->pi_reg, io->pi_width); #endif else io->pi_data = pci_read_config(pcidev, io->pi_reg, io->pi_width); error = 0; } else { #ifdef COMPAT_FREEBSD4 if (cmd == PCIOCREAD_OLD) { io_old->pi_data = -1; error = 0; } else #endif error = ENODEV; } break; default: error = EINVAL; break; } break; case PCIOCGETBAR: bio = (struct pci_bar_io *)data; /* * Assume that the user-level bus number is * in fact the physical PCI bus number. */ pcidev = pci_find_dbsf(bio->pbi_sel.pc_domain, bio->pbi_sel.pc_bus, bio->pbi_sel.pc_dev, bio->pbi_sel.pc_func); if (pcidev == NULL) { error = ENODEV; break; } pm = pci_find_bar(pcidev, bio->pbi_reg); if (pm == NULL) { error = EINVAL; break; } bio->pbi_base = pm->pm_value; bio->pbi_length = (pci_addr_t)1 << pm->pm_size; bio->pbi_enabled = pci_bar_enabled(pcidev, pm); error = 0; break; case PCIOCATTACHED: error = 0; io = (struct pci_io *)data; pcidev = pci_find_dbsf(io->pi_sel.pc_domain, io->pi_sel.pc_bus, io->pi_sel.pc_dev, io->pi_sel.pc_func); if (pcidev != NULL) io->pi_data = device_is_attached(pcidev); else error = ENODEV; break; case PCIOCLISTVPD: lvio = (struct pci_list_vpd_io *)data; /* * Assume that the user-level bus number is * in fact the physical PCI bus number. */ pcidev = pci_find_dbsf(lvio->plvi_sel.pc_domain, lvio->plvi_sel.pc_bus, lvio->plvi_sel.pc_dev, lvio->plvi_sel.pc_func); if (pcidev == NULL) { error = ENODEV; break; } error = pci_list_vpd(pcidev, lvio); break; case PCIOCBARMMAP: pbm = (struct pci_bar_mmap *)data; if ((flag & FWRITE) == 0 && (pbm->pbm_flags & PCIIO_BAR_MMAP_RW) != 0) return (EPERM); pcidev = pci_find_dbsf(pbm->pbm_sel.pc_domain, pbm->pbm_sel.pc_bus, pbm->pbm_sel.pc_dev, pbm->pbm_sel.pc_func); error = pcidev == NULL ? ENODEV : pci_bar_mmap(pcidev, pbm); break; default: error = ENOTTY; break; } return (error); } Index: projects/clang700-import/sys/dev/sdhci/sdhci_acpi.c =================================================================== --- projects/clang700-import/sys/dev/sdhci/sdhci_acpi.c (revision 339014) +++ projects/clang700-import/sys/dev/sdhci/sdhci_acpi.c (revision 339015) @@ -1,410 +1,413 @@ /*- * Copyright (c) 2017 Oleksandr Tymoshenko * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "mmcbr_if.h" #include "sdhci_if.h" static const struct sdhci_acpi_device { const char* hid; int uid; const char *desc; u_int quirks; } sdhci_acpi_devices[] = { { "80860F14", 1, "Intel Bay Trail/Braswell eMMC 4.5/4.5.1 Controller", SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { "80860F14", 3, "Intel Bay Trail/Braswell SDXC Controller", SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { "80860F16", 0, "Intel Bay Trail/Braswell SDXC Controller", SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { "80865ACA", 0, "Intel Apollo Lake SDXC Controller", SDHCI_QUIRK_BROKEN_DMA | /* APL18 erratum */ SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, { "80865ACC", 0, "Intel Apollo Lake eMMC 5.0 Controller", SDHCI_QUIRK_BROKEN_DMA | /* APL18 erratum */ SDHCI_QUIRK_INTEL_POWER_UP_RESET | SDHCI_QUIRK_WAIT_WHILE_BUSY | SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_CAPS_BIT63_FOR_MMC_HS400 | SDHCI_QUIRK_PRESET_VALUE_BROKEN }, + { "AMDI0040", 0, "AMD eMMC 5.0 Controller", + SDHCI_QUIRK_32BIT_DMA_SIZE }, { NULL, 0, NULL, 0} }; static char *sdhci_ids[] = { "80860F14", "80860F16", "80865ACA", "80865ACC", + "AMDI0040", NULL }; struct sdhci_acpi_softc { u_int quirks; /* Chip specific quirks */ struct resource *irq_res; /* IRQ resource */ void *intrhand; /* Interrupt handle */ struct sdhci_slot slot; struct resource *mem_res; /* Memory resource */ }; static void sdhci_acpi_intr(void *arg); static int sdhci_acpi_detach(device_t dev); static uint8_t sdhci_acpi_read_1(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_1(sc->mem_res, off); } static void sdhci_acpi_write_1(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint8_t val) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_1(sc->mem_res, off, val); } static uint16_t sdhci_acpi_read_2(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_2(sc->mem_res, off); } static void sdhci_acpi_write_2(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint16_t val) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_2(sc->mem_res, off, val); } static uint32_t sdhci_acpi_read_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); return bus_read_4(sc->mem_res, off); } static void sdhci_acpi_write_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t val) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_barrier(sc->mem_res, 0, 0xFF, BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE); bus_write_4(sc->mem_res, off, val); } static void sdhci_acpi_read_multi_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t *data, bus_size_t count) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_read_multi_stream_4(sc->mem_res, off, data, count); } static void sdhci_acpi_write_multi_4(device_t dev, struct sdhci_slot *slot __unused, bus_size_t off, uint32_t *data, bus_size_t count) { struct sdhci_acpi_softc *sc = device_get_softc(dev); bus_write_multi_stream_4(sc->mem_res, off, data, count); } static const struct sdhci_acpi_device * sdhci_acpi_find_device(device_t dev) { const char *hid; int i, uid; ACPI_HANDLE handle; ACPI_STATUS status; hid = ACPI_ID_PROBE(device_get_parent(dev), dev, sdhci_ids); if (hid == NULL) return (NULL); handle = acpi_get_handle(dev); status = acpi_GetInteger(handle, "_UID", &uid); if (ACPI_FAILURE(status)) uid = 0; for (i = 0; sdhci_acpi_devices[i].hid != NULL; i++) { if (strcmp(sdhci_acpi_devices[i].hid, hid) != 0) continue; if ((sdhci_acpi_devices[i].uid != 0) && (sdhci_acpi_devices[i].uid != uid)) continue; return (&sdhci_acpi_devices[i]); } return (NULL); } static int sdhci_acpi_probe(device_t dev) { const struct sdhci_acpi_device *acpi_dev; acpi_dev = sdhci_acpi_find_device(dev); if (acpi_dev == NULL) return (ENXIO); device_set_desc(dev, acpi_dev->desc); return (BUS_PROBE_DEFAULT); } static int sdhci_acpi_attach(device_t dev) { struct sdhci_acpi_softc *sc = device_get_softc(dev); int rid, err; const struct sdhci_acpi_device *acpi_dev; acpi_dev = sdhci_acpi_find_device(dev); if (acpi_dev == NULL) return (ENXIO); sc->quirks = acpi_dev->quirks; /* Allocate IRQ. */ rid = 0; sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid, RF_ACTIVE); if (sc->irq_res == NULL) { device_printf(dev, "can't allocate IRQ\n"); return (ENOMEM); } rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); if (sc->mem_res == NULL) { device_printf(dev, "can't allocate memory resource for slot\n"); sdhci_acpi_detach(dev); return (ENOMEM); } /* * Intel Bay Trail and Braswell eMMC controllers share the same IDs, * but while with these former DDR52 is affected by the VLI54 erratum, * these latter require the timeout clock to be hardcoded to 1 MHz. */ if (strcmp(acpi_dev->hid, "80860F14") == 0 && acpi_dev->uid == 1 && SDHCI_READ_4(dev, &sc->slot, SDHCI_CAPABILITIES) == 0x446cc8b2 && SDHCI_READ_4(dev, &sc->slot, SDHCI_CAPABILITIES2) == 0x00000807) sc->quirks |= SDHCI_QUIRK_MMC_DDR52 | SDHCI_QUIRK_DATA_TIMEOUT_1MHZ; sc->quirks &= ~sdhci_quirk_clear; sc->quirks |= sdhci_quirk_set; sc->slot.quirks = sc->quirks; err = sdhci_init_slot(dev, &sc->slot, 0); if (err) { device_printf(dev, "failed to init slot\n"); sdhci_acpi_detach(dev); return (err); } /* Activate the interrupt */ err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE, NULL, sdhci_acpi_intr, sc, &sc->intrhand); if (err) { device_printf(dev, "can't setup IRQ\n"); sdhci_acpi_detach(dev); return (err); } /* Process cards detection. */ sdhci_start_slot(&sc->slot); return (0); } static int sdhci_acpi_detach(device_t dev) { struct sdhci_acpi_softc *sc = device_get_softc(dev); if (sc->intrhand) bus_teardown_intr(dev, sc->irq_res, sc->intrhand); if (sc->irq_res) bus_release_resource(dev, SYS_RES_IRQ, rman_get_rid(sc->irq_res), sc->irq_res); if (sc->mem_res) { sdhci_cleanup_slot(&sc->slot); bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(sc->mem_res), sc->mem_res); } return (0); } static int sdhci_acpi_shutdown(device_t dev) { return (0); } static int sdhci_acpi_suspend(device_t dev) { struct sdhci_acpi_softc *sc = device_get_softc(dev); int err; err = bus_generic_suspend(dev); if (err) return (err); sdhci_generic_suspend(&sc->slot); return (0); } static int sdhci_acpi_resume(device_t dev) { struct sdhci_acpi_softc *sc = device_get_softc(dev); int err; sdhci_generic_resume(&sc->slot); err = bus_generic_resume(dev); if (err) return (err); return (0); } static void sdhci_acpi_intr(void *arg) { struct sdhci_acpi_softc *sc = (struct sdhci_acpi_softc *)arg; sdhci_generic_intr(&sc->slot); } static device_method_t sdhci_methods[] = { /* device_if */ DEVMETHOD(device_probe, sdhci_acpi_probe), DEVMETHOD(device_attach, sdhci_acpi_attach), DEVMETHOD(device_detach, sdhci_acpi_detach), DEVMETHOD(device_shutdown, sdhci_acpi_shutdown), DEVMETHOD(device_suspend, sdhci_acpi_suspend), DEVMETHOD(device_resume, sdhci_acpi_resume), /* Bus interface */ DEVMETHOD(bus_read_ivar, sdhci_generic_read_ivar), DEVMETHOD(bus_write_ivar, sdhci_generic_write_ivar), /* mmcbr_if */ DEVMETHOD(mmcbr_update_ios, sdhci_generic_update_ios), DEVMETHOD(mmcbr_switch_vccq, sdhci_generic_switch_vccq), DEVMETHOD(mmcbr_tune, sdhci_generic_tune), DEVMETHOD(mmcbr_retune, sdhci_generic_retune), DEVMETHOD(mmcbr_request, sdhci_generic_request), DEVMETHOD(mmcbr_get_ro, sdhci_generic_get_ro), DEVMETHOD(mmcbr_acquire_host, sdhci_generic_acquire_host), DEVMETHOD(mmcbr_release_host, sdhci_generic_release_host), /* SDHCI accessors */ DEVMETHOD(sdhci_read_1, sdhci_acpi_read_1), DEVMETHOD(sdhci_read_2, sdhci_acpi_read_2), DEVMETHOD(sdhci_read_4, sdhci_acpi_read_4), DEVMETHOD(sdhci_read_multi_4, sdhci_acpi_read_multi_4), DEVMETHOD(sdhci_write_1, sdhci_acpi_write_1), DEVMETHOD(sdhci_write_2, sdhci_acpi_write_2), DEVMETHOD(sdhci_write_4, sdhci_acpi_write_4), DEVMETHOD(sdhci_write_multi_4, sdhci_acpi_write_multi_4), DEVMETHOD(sdhci_set_uhs_timing, sdhci_generic_set_uhs_timing), DEVMETHOD_END }; static driver_t sdhci_acpi_driver = { "sdhci_acpi", sdhci_methods, sizeof(struct sdhci_acpi_softc), }; static devclass_t sdhci_acpi_devclass; DRIVER_MODULE(sdhci_acpi, acpi, sdhci_acpi_driver, sdhci_acpi_devclass, NULL, NULL); MODULE_DEPEND(sdhci_acpi, sdhci, 1, 1, 1); #ifndef MMCCAM MMC_DECLARE_BRIDGE(sdhci_acpi); #endif Index: projects/clang700-import/sys/kern/init_sysent.c =================================================================== --- projects/clang700-import/sys/kern/init_sysent.c (revision 339014) +++ projects/clang700-import/sys/kern/init_sysent.c (revision 339015) @@ -1,616 +1,616 @@ /* * System call switch table. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ #include #include #include #define AS(name) (sizeof(struct name) / sizeof(register_t)) #ifdef COMPAT_43 #define compat(n, name) n, (sy_call_t *)__CONCAT(o,name) #else #define compat(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD4 #define compat4(n, name) n, (sy_call_t *)__CONCAT(freebsd4_,name) #else #define compat4(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD6 #define compat6(n, name) n, (sy_call_t *)__CONCAT(freebsd6_,name) #else #define compat6(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD7 #define compat7(n, name) n, (sy_call_t *)__CONCAT(freebsd7_,name) #else #define compat7(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD10 #define compat10(n, name) n, (sy_call_t *)__CONCAT(freebsd10_,name) #else #define compat10(n, name) 0, (sy_call_t *)nosys #endif #ifdef COMPAT_FREEBSD11 #define compat11(n, name) n, (sy_call_t *)__CONCAT(freebsd11_,name) #else #define compat11(n, name) 0, (sy_call_t *)nosys #endif /* The casts are bogus but will do for now. */ struct sysent sysent[] = { { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 0 = syscall */ { AS(sys_exit_args), (sy_call_t *)sys_sys_exit, AUE_EXIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 1 = exit */ { 0, (sy_call_t *)sys_fork, AUE_FORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 2 = fork */ { AS(read_args), (sy_call_t *)sys_read, AUE_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 3 = read */ { AS(write_args), (sy_call_t *)sys_write, AUE_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 4 = write */ { AS(open_args), (sy_call_t *)sys_open, AUE_OPEN_RWTC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 5 = open */ { AS(close_args), (sy_call_t *)sys_close, AUE_CLOSE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 6 = close */ { AS(wait4_args), (sy_call_t *)sys_wait4, AUE_WAIT4, NULL, 0, 0, 0, SY_THR_STATIC }, /* 7 = wait4 */ { compat(AS(ocreat_args),creat), AUE_CREAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 8 = old creat */ { AS(link_args), (sy_call_t *)sys_link, AUE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 9 = link */ { AS(unlink_args), (sy_call_t *)sys_unlink, AUE_UNLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 10 = unlink */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 11 = obsolete execv */ { AS(chdir_args), (sy_call_t *)sys_chdir, AUE_CHDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 12 = chdir */ { AS(fchdir_args), (sy_call_t *)sys_fchdir, AUE_FCHDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 13 = fchdir */ { compat11(AS(freebsd11_mknod_args),mknod), AUE_MKNOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 14 = freebsd11 mknod */ { AS(chmod_args), (sy_call_t *)sys_chmod, AUE_CHMOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 15 = chmod */ { AS(chown_args), (sy_call_t *)sys_chown, AUE_CHOWN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 16 = chown */ { AS(break_args), (sy_call_t *)sys_break, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 17 = break */ { compat4(AS(freebsd4_getfsstat_args),getfsstat), AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 18 = freebsd4 getfsstat */ { compat(AS(olseek_args),lseek), AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 19 = old lseek */ { 0, (sy_call_t *)sys_getpid, AUE_GETPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 20 = getpid */ { AS(mount_args), (sy_call_t *)sys_mount, AUE_MOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 21 = mount */ { AS(unmount_args), (sy_call_t *)sys_unmount, AUE_UMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 22 = unmount */ { AS(setuid_args), (sy_call_t *)sys_setuid, AUE_SETUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 23 = setuid */ { 0, (sy_call_t *)sys_getuid, AUE_GETUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 24 = getuid */ { 0, (sy_call_t *)sys_geteuid, AUE_GETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 25 = geteuid */ { AS(ptrace_args), (sy_call_t *)sys_ptrace, AUE_PTRACE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 26 = ptrace */ { AS(recvmsg_args), (sy_call_t *)sys_recvmsg, AUE_RECVMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 27 = recvmsg */ { AS(sendmsg_args), (sy_call_t *)sys_sendmsg, AUE_SENDMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 28 = sendmsg */ { AS(recvfrom_args), (sy_call_t *)sys_recvfrom, AUE_RECVFROM, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 29 = recvfrom */ { AS(accept_args), (sy_call_t *)sys_accept, AUE_ACCEPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 30 = accept */ { AS(getpeername_args), (sy_call_t *)sys_getpeername, AUE_GETPEERNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 31 = getpeername */ { AS(getsockname_args), (sy_call_t *)sys_getsockname, AUE_GETSOCKNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 32 = getsockname */ { AS(access_args), (sy_call_t *)sys_access, AUE_ACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 33 = access */ { AS(chflags_args), (sy_call_t *)sys_chflags, AUE_CHFLAGS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 34 = chflags */ { AS(fchflags_args), (sy_call_t *)sys_fchflags, AUE_FCHFLAGS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 35 = fchflags */ { 0, (sy_call_t *)sys_sync, AUE_SYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 36 = sync */ { AS(kill_args), (sy_call_t *)sys_kill, AUE_KILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 37 = kill */ { compat(AS(ostat_args),stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 38 = old stat */ { 0, (sy_call_t *)sys_getppid, AUE_GETPPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 39 = getppid */ { compat(AS(olstat_args),lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 40 = old lstat */ { AS(dup_args), (sy_call_t *)sys_dup, AUE_DUP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 41 = dup */ { compat10(0,pipe), AUE_PIPE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 42 = freebsd10 pipe */ { 0, (sy_call_t *)sys_getegid, AUE_GETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 43 = getegid */ { AS(profil_args), (sy_call_t *)sys_profil, AUE_PROFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 44 = profil */ { AS(ktrace_args), (sy_call_t *)sys_ktrace, AUE_KTRACE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 45 = ktrace */ { compat(AS(osigaction_args),sigaction), AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 46 = old sigaction */ { 0, (sy_call_t *)sys_getgid, AUE_GETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 47 = getgid */ { compat(AS(osigprocmask_args),sigprocmask), AUE_SIGPROCMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 48 = old sigprocmask */ { AS(getlogin_args), (sy_call_t *)sys_getlogin, AUE_GETLOGIN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 49 = getlogin */ { AS(setlogin_args), (sy_call_t *)sys_setlogin, AUE_SETLOGIN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 50 = setlogin */ { AS(acct_args), (sy_call_t *)sys_acct, AUE_ACCT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 51 = acct */ { compat(0,sigpending), AUE_SIGPENDING, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 52 = old sigpending */ { AS(sigaltstack_args), (sy_call_t *)sys_sigaltstack, AUE_SIGALTSTACK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 53 = sigaltstack */ { AS(ioctl_args), (sy_call_t *)sys_ioctl, AUE_IOCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 54 = ioctl */ { AS(reboot_args), (sy_call_t *)sys_reboot, AUE_REBOOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 55 = reboot */ { AS(revoke_args), (sy_call_t *)sys_revoke, AUE_REVOKE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 56 = revoke */ { AS(symlink_args), (sy_call_t *)sys_symlink, AUE_SYMLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 57 = symlink */ { AS(readlink_args), (sy_call_t *)sys_readlink, AUE_READLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 58 = readlink */ { AS(execve_args), (sy_call_t *)sys_execve, AUE_EXECVE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 59 = execve */ { AS(umask_args), (sy_call_t *)sys_umask, AUE_UMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 60 = umask */ { AS(chroot_args), (sy_call_t *)sys_chroot, AUE_CHROOT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 61 = chroot */ { compat(AS(ofstat_args),fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 62 = old fstat */ { compat(AS(getkerninfo_args),getkerninfo), AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 63 = old getkerninfo */ { compat(0,getpagesize), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 64 = old getpagesize */ { AS(msync_args), (sy_call_t *)sys_msync, AUE_MSYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 65 = msync */ { 0, (sy_call_t *)sys_vfork, AUE_VFORK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 66 = vfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 67 = obsolete vread */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 68 = obsolete vwrite */ { AS(sbrk_args), (sy_call_t *)sys_sbrk, AUE_SBRK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 69 = sbrk */ { AS(sstk_args), (sy_call_t *)sys_sstk, AUE_SSTK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 70 = sstk */ { compat(AS(ommap_args),mmap), AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 71 = old mmap */ { compat11(AS(freebsd11_vadvise_args),vadvise), AUE_O_VADVISE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 72 = freebsd11 vadvise */ { AS(munmap_args), (sy_call_t *)sys_munmap, AUE_MUNMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 73 = munmap */ { AS(mprotect_args), (sy_call_t *)sys_mprotect, AUE_MPROTECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 74 = mprotect */ { AS(madvise_args), (sy_call_t *)sys_madvise, AUE_MADVISE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 75 = madvise */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 76 = obsolete vhangup */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 77 = obsolete vlimit */ { AS(mincore_args), (sy_call_t *)sys_mincore, AUE_MINCORE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 78 = mincore */ { AS(getgroups_args), (sy_call_t *)sys_getgroups, AUE_GETGROUPS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 79 = getgroups */ { AS(setgroups_args), (sy_call_t *)sys_setgroups, AUE_SETGROUPS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 80 = setgroups */ { 0, (sy_call_t *)sys_getpgrp, AUE_GETPGRP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 81 = getpgrp */ { AS(setpgid_args), (sy_call_t *)sys_setpgid, AUE_SETPGRP, NULL, 0, 0, 0, SY_THR_STATIC }, /* 82 = setpgid */ { AS(setitimer_args), (sy_call_t *)sys_setitimer, AUE_SETITIMER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 83 = setitimer */ { compat(0,wait), AUE_WAIT4, NULL, 0, 0, 0, SY_THR_STATIC }, /* 84 = old wait */ { AS(swapon_args), (sy_call_t *)sys_swapon, AUE_SWAPON, NULL, 0, 0, 0, SY_THR_STATIC }, /* 85 = swapon */ { AS(getitimer_args), (sy_call_t *)sys_getitimer, AUE_GETITIMER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 86 = getitimer */ { compat(AS(gethostname_args),gethostname), AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 87 = old gethostname */ { compat(AS(sethostname_args),sethostname), AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 88 = old sethostname */ { 0, (sy_call_t *)sys_getdtablesize, AUE_GETDTABLESIZE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 89 = getdtablesize */ { AS(dup2_args), (sy_call_t *)sys_dup2, AUE_DUP2, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 90 = dup2 */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 91 = getdopt */ { AS(fcntl_args), (sy_call_t *)sys_fcntl, AUE_FCNTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 92 = fcntl */ { AS(select_args), (sy_call_t *)sys_select, AUE_SELECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 93 = select */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 94 = setdopt */ { AS(fsync_args), (sy_call_t *)sys_fsync, AUE_FSYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 95 = fsync */ { AS(setpriority_args), (sy_call_t *)sys_setpriority, AUE_SETPRIORITY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 96 = setpriority */ { AS(socket_args), (sy_call_t *)sys_socket, AUE_SOCKET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 97 = socket */ { AS(connect_args), (sy_call_t *)sys_connect, AUE_CONNECT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 98 = connect */ { compat(AS(accept_args),accept), AUE_ACCEPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 99 = old accept */ { AS(getpriority_args), (sy_call_t *)sys_getpriority, AUE_GETPRIORITY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 100 = getpriority */ { compat(AS(osend_args),send), AUE_SEND, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 101 = old send */ { compat(AS(orecv_args),recv), AUE_RECV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 102 = old recv */ { compat(AS(osigreturn_args),sigreturn), AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 103 = old sigreturn */ { AS(bind_args), (sy_call_t *)sys_bind, AUE_BIND, NULL, 0, 0, 0, SY_THR_STATIC }, /* 104 = bind */ { AS(setsockopt_args), (sy_call_t *)sys_setsockopt, AUE_SETSOCKOPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 105 = setsockopt */ { AS(listen_args), (sy_call_t *)sys_listen, AUE_LISTEN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 106 = listen */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 107 = obsolete vtimes */ { compat(AS(osigvec_args),sigvec), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 108 = old sigvec */ { compat(AS(osigblock_args),sigblock), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 109 = old sigblock */ { compat(AS(osigsetmask_args),sigsetmask), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 110 = old sigsetmask */ { compat(AS(osigsuspend_args),sigsuspend), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 111 = old sigsuspend */ { compat(AS(osigstack_args),sigstack), AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 112 = old sigstack */ { compat(AS(orecvmsg_args),recvmsg), AUE_RECVMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 113 = old recvmsg */ { compat(AS(osendmsg_args),sendmsg), AUE_SENDMSG, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 114 = old sendmsg */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 115 = obsolete vtrace */ { AS(gettimeofday_args), (sy_call_t *)sys_gettimeofday, AUE_GETTIMEOFDAY, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 116 = gettimeofday */ { AS(getrusage_args), (sy_call_t *)sys_getrusage, AUE_GETRUSAGE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 117 = getrusage */ { AS(getsockopt_args), (sy_call_t *)sys_getsockopt, AUE_GETSOCKOPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 118 = getsockopt */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 119 = resuba */ { AS(readv_args), (sy_call_t *)sys_readv, AUE_READV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 120 = readv */ { AS(writev_args), (sy_call_t *)sys_writev, AUE_WRITEV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 121 = writev */ { AS(settimeofday_args), (sy_call_t *)sys_settimeofday, AUE_SETTIMEOFDAY, NULL, 0, 0, 0, SY_THR_STATIC }, /* 122 = settimeofday */ { AS(fchown_args), (sy_call_t *)sys_fchown, AUE_FCHOWN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 123 = fchown */ { AS(fchmod_args), (sy_call_t *)sys_fchmod, AUE_FCHMOD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 124 = fchmod */ { compat(AS(recvfrom_args),recvfrom), AUE_RECVFROM, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 125 = old recvfrom */ { AS(setreuid_args), (sy_call_t *)sys_setreuid, AUE_SETREUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 126 = setreuid */ { AS(setregid_args), (sy_call_t *)sys_setregid, AUE_SETREGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 127 = setregid */ { AS(rename_args), (sy_call_t *)sys_rename, AUE_RENAME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 128 = rename */ { compat(AS(otruncate_args),truncate), AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 129 = old truncate */ { compat(AS(oftruncate_args),ftruncate), AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 130 = old ftruncate */ { AS(flock_args), (sy_call_t *)sys_flock, AUE_FLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 131 = flock */ { AS(mkfifo_args), (sy_call_t *)sys_mkfifo, AUE_MKFIFO, NULL, 0, 0, 0, SY_THR_STATIC }, /* 132 = mkfifo */ { AS(sendto_args), (sy_call_t *)sys_sendto, AUE_SENDTO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 133 = sendto */ { AS(shutdown_args), (sy_call_t *)sys_shutdown, AUE_SHUTDOWN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 134 = shutdown */ { AS(socketpair_args), (sy_call_t *)sys_socketpair, AUE_SOCKETPAIR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 135 = socketpair */ { AS(mkdir_args), (sy_call_t *)sys_mkdir, AUE_MKDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 136 = mkdir */ { AS(rmdir_args), (sy_call_t *)sys_rmdir, AUE_RMDIR, NULL, 0, 0, 0, SY_THR_STATIC }, /* 137 = rmdir */ { AS(utimes_args), (sy_call_t *)sys_utimes, AUE_UTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 138 = utimes */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 139 = obsolete 4.2 sigreturn */ { AS(adjtime_args), (sy_call_t *)sys_adjtime, AUE_ADJTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 140 = adjtime */ { compat(AS(ogetpeername_args),getpeername), AUE_GETPEERNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 141 = old getpeername */ { compat(0,gethostid), AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 142 = old gethostid */ { compat(AS(osethostid_args),sethostid), AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 143 = old sethostid */ { compat(AS(ogetrlimit_args),getrlimit), AUE_GETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 144 = old getrlimit */ { compat(AS(osetrlimit_args),setrlimit), AUE_SETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 145 = old setrlimit */ { compat(AS(okillpg_args),killpg), AUE_KILLPG, NULL, 0, 0, 0, SY_THR_STATIC }, /* 146 = old killpg */ { 0, (sy_call_t *)sys_setsid, AUE_SETSID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 147 = setsid */ { AS(quotactl_args), (sy_call_t *)sys_quotactl, AUE_QUOTACTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 148 = quotactl */ { compat(0,quota), AUE_O_QUOTA, NULL, 0, 0, 0, SY_THR_STATIC }, /* 149 = old quota */ { compat(AS(getsockname_args),getsockname), AUE_GETSOCKNAME, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 150 = old getsockname */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 151 = sem_lock */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 152 = sem_wakeup */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 153 = asyncdaemon */ { AS(nlm_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 154 = nlm_syscall */ { AS(nfssvc_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 155 = nfssvc */ { compat(AS(ogetdirentries_args),getdirentries), AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 156 = old getdirentries */ { compat4(AS(freebsd4_statfs_args),statfs), AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 157 = freebsd4 statfs */ { compat4(AS(freebsd4_fstatfs_args),fstatfs), AUE_FSTATFS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 158 = freebsd4 fstatfs */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 159 = nosys */ { AS(lgetfh_args), (sy_call_t *)sys_lgetfh, AUE_LGETFH, NULL, 0, 0, 0, SY_THR_STATIC }, /* 160 = lgetfh */ { AS(getfh_args), (sy_call_t *)sys_getfh, AUE_NFS_GETFH, NULL, 0, 0, 0, SY_THR_STATIC }, /* 161 = getfh */ { compat4(AS(freebsd4_getdomainname_args),getdomainname), AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 162 = freebsd4 getdomainname */ { compat4(AS(freebsd4_setdomainname_args),setdomainname), AUE_SYSCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 163 = freebsd4 setdomainname */ { compat4(AS(freebsd4_uname_args),uname), AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 164 = freebsd4 uname */ { AS(sysarch_args), (sy_call_t *)sysarch, AUE_SYSARCH, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 165 = sysarch */ { AS(rtprio_args), (sy_call_t *)sys_rtprio, AUE_RTPRIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 166 = rtprio */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 167 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 168 = nosys */ { AS(semsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 169 = semsys */ { AS(msgsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 170 = msgsys */ { AS(shmsys_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 171 = shmsys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 172 = nosys */ { compat6(AS(freebsd6_pread_args),pread), AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 173 = freebsd6 pread */ { compat6(AS(freebsd6_pwrite_args),pwrite), AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 174 = freebsd6 pwrite */ { AS(setfib_args), (sy_call_t *)sys_setfib, AUE_SETFIB, NULL, 0, 0, 0, SY_THR_STATIC }, /* 175 = setfib */ { AS(ntp_adjtime_args), (sy_call_t *)sys_ntp_adjtime, AUE_NTP_ADJTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 176 = ntp_adjtime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 177 = sfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 178 = getdescriptor */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 179 = setdescriptor */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 180 = nosys */ { AS(setgid_args), (sy_call_t *)sys_setgid, AUE_SETGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 181 = setgid */ { AS(setegid_args), (sy_call_t *)sys_setegid, AUE_SETEGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 182 = setegid */ { AS(seteuid_args), (sy_call_t *)sys_seteuid, AUE_SETEUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 183 = seteuid */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = lfs_bmapv */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = lfs_markv */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = lfs_segclean */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = lfs_segwait */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 184 = obsolete lfs_bmapv */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 185 = obsolete lfs_markv */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 186 = obsolete lfs_segclean */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 187 = obsolete lfs_segwait */ { compat11(AS(freebsd11_stat_args),stat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 188 = freebsd11 stat */ { compat11(AS(freebsd11_fstat_args),fstat), AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 189 = freebsd11 fstat */ { compat11(AS(freebsd11_lstat_args),lstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 190 = freebsd11 lstat */ { AS(pathconf_args), (sy_call_t *)sys_pathconf, AUE_PATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 191 = pathconf */ { AS(fpathconf_args), (sy_call_t *)sys_fpathconf, AUE_FPATHCONF, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 192 = fpathconf */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 193 = nosys */ { AS(__getrlimit_args), (sy_call_t *)sys_getrlimit, AUE_GETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 194 = getrlimit */ { AS(__setrlimit_args), (sy_call_t *)sys_setrlimit, AUE_SETRLIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 195 = setrlimit */ { compat11(AS(freebsd11_getdirentries_args),getdirentries), AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 196 = freebsd11 getdirentries */ { compat6(AS(freebsd6_mmap_args),mmap), AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 197 = freebsd6 mmap */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 198 = __syscall */ { compat6(AS(freebsd6_lseek_args),lseek), AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 199 = freebsd6 lseek */ { compat6(AS(freebsd6_truncate_args),truncate), AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 200 = freebsd6 truncate */ { compat6(AS(freebsd6_ftruncate_args),ftruncate), AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 201 = freebsd6 ftruncate */ { AS(sysctl_args), (sy_call_t *)sys___sysctl, AUE_SYSCTL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 202 = __sysctl */ { AS(mlock_args), (sy_call_t *)sys_mlock, AUE_MLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 203 = mlock */ { AS(munlock_args), (sy_call_t *)sys_munlock, AUE_MUNLOCK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 204 = munlock */ { AS(undelete_args), (sy_call_t *)sys_undelete, AUE_UNDELETE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 205 = undelete */ { AS(futimes_args), (sy_call_t *)sys_futimes, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 206 = futimes */ { AS(getpgid_args), (sy_call_t *)sys_getpgid, AUE_GETPGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 207 = getpgid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 208 = nosys */ { AS(poll_args), (sy_call_t *)sys_poll, AUE_POLL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 209 = poll */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 210 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 211 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 212 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 213 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 214 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 215 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 216 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 217 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 218 = lkmnosys */ { AS(nosys_args), (sy_call_t *)lkmnosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 219 = lkmnosys */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 220 = freebsd7 __semctl */ { AS(semget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 221 = semget */ { AS(semop_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 222 = semop */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = semconfig */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 223 = obsolete semconfig */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 224 = freebsd7 msgctl */ { AS(msgget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 225 = msgget */ { AS(msgsnd_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 226 = msgsnd */ { AS(msgrcv_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 227 = msgrcv */ { AS(shmat_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 228 = shmat */ { 0, (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 229 = freebsd7 shmctl */ { AS(shmdt_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 230 = shmdt */ { AS(shmget_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 231 = shmget */ { AS(clock_gettime_args), (sy_call_t *)sys_clock_gettime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 232 = clock_gettime */ { AS(clock_settime_args), (sy_call_t *)sys_clock_settime, AUE_CLOCK_SETTIME, NULL, 0, 0, 0, SY_THR_STATIC }, /* 233 = clock_settime */ { AS(clock_getres_args), (sy_call_t *)sys_clock_getres, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 234 = clock_getres */ { AS(ktimer_create_args), (sy_call_t *)sys_ktimer_create, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 235 = ktimer_create */ { AS(ktimer_delete_args), (sy_call_t *)sys_ktimer_delete, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 236 = ktimer_delete */ { AS(ktimer_settime_args), (sy_call_t *)sys_ktimer_settime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 237 = ktimer_settime */ { AS(ktimer_gettime_args), (sy_call_t *)sys_ktimer_gettime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 238 = ktimer_gettime */ { AS(ktimer_getoverrun_args), (sy_call_t *)sys_ktimer_getoverrun, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 239 = ktimer_getoverrun */ { AS(nanosleep_args), (sy_call_t *)sys_nanosleep, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 240 = nanosleep */ { AS(ffclock_getcounter_args), (sy_call_t *)sys_ffclock_getcounter, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 241 = ffclock_getcounter */ { AS(ffclock_setestimate_args), (sy_call_t *)sys_ffclock_setestimate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 242 = ffclock_setestimate */ { AS(ffclock_getestimate_args), (sy_call_t *)sys_ffclock_getestimate, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 243 = ffclock_getestimate */ { AS(clock_nanosleep_args), (sy_call_t *)sys_clock_nanosleep, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 244 = clock_nanosleep */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 245 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 246 = nosys */ { AS(clock_getcpuclockid2_args), (sy_call_t *)sys_clock_getcpuclockid2, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 247 = clock_getcpuclockid2 */ { AS(ntp_gettime_args), (sy_call_t *)sys_ntp_gettime, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 248 = ntp_gettime */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 249 = nosys */ { AS(minherit_args), (sy_call_t *)sys_minherit, AUE_MINHERIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 250 = minherit */ { AS(rfork_args), (sy_call_t *)sys_rfork, AUE_RFORK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 251 = rfork */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 252 = obsolete openbsd_poll */ { 0, (sy_call_t *)sys_issetugid, AUE_ISSETUGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 253 = issetugid */ { AS(lchown_args), (sy_call_t *)sys_lchown, AUE_LCHOWN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 254 = lchown */ { AS(aio_read_args), (sy_call_t *)sys_aio_read, AUE_AIO_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 255 = aio_read */ { AS(aio_write_args), (sy_call_t *)sys_aio_write, AUE_AIO_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 256 = aio_write */ { AS(lio_listio_args), (sy_call_t *)sys_lio_listio, AUE_LIO_LISTIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 257 = lio_listio */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 258 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 259 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 260 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 261 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 262 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 263 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 264 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 265 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 266 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 267 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 268 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 269 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 270 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 271 = nosys */ { compat11(AS(freebsd11_getdents_args),getdents), AUE_O_GETDENTS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 272 = freebsd11 getdents */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 273 = nosys */ { AS(lchmod_args), (sy_call_t *)sys_lchmod, AUE_LCHMOD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 274 = lchmod */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 275 = obsolete netbsd_lchown */ { AS(lutimes_args), (sy_call_t *)sys_lutimes, AUE_LUTIMES, NULL, 0, 0, 0, SY_THR_STATIC }, /* 276 = lutimes */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 277 = obsolete netbsd_msync */ { compat11(AS(freebsd11_nstat_args),nstat), AUE_STAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 278 = freebsd11 nstat */ { compat11(AS(freebsd11_nfstat_args),nfstat), AUE_FSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 279 = freebsd11 nfstat */ { compat11(AS(freebsd11_nlstat_args),nlstat), AUE_LSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 280 = freebsd11 nlstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 281 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 282 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 283 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 284 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 285 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 286 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 287 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 288 = nosys */ { AS(preadv_args), (sy_call_t *)sys_preadv, AUE_PREADV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 289 = preadv */ { AS(pwritev_args), (sy_call_t *)sys_pwritev, AUE_PWRITEV, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 290 = pwritev */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 291 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 292 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 293 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 294 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 295 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 296 = nosys */ { compat4(AS(freebsd4_fhstatfs_args),fhstatfs), AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 297 = freebsd4 fhstatfs */ { AS(fhopen_args), (sy_call_t *)sys_fhopen, AUE_FHOPEN, NULL, 0, 0, 0, SY_THR_STATIC }, /* 298 = fhopen */ { compat11(AS(freebsd11_fhstat_args),fhstat), AUE_FHSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 299 = freebsd11 fhstat */ { AS(modnext_args), (sy_call_t *)sys_modnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 300 = modnext */ { AS(modstat_args), (sy_call_t *)sys_modstat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 301 = modstat */ { AS(modfnext_args), (sy_call_t *)sys_modfnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 302 = modfnext */ { AS(modfind_args), (sy_call_t *)sys_modfind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 303 = modfind */ { AS(kldload_args), (sy_call_t *)sys_kldload, AUE_MODLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 304 = kldload */ { AS(kldunload_args), (sy_call_t *)sys_kldunload, AUE_MODUNLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 305 = kldunload */ { AS(kldfind_args), (sy_call_t *)sys_kldfind, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 306 = kldfind */ { AS(kldnext_args), (sy_call_t *)sys_kldnext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 307 = kldnext */ { AS(kldstat_args), (sy_call_t *)sys_kldstat, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 308 = kldstat */ { AS(kldfirstmod_args), (sy_call_t *)sys_kldfirstmod, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 309 = kldfirstmod */ { AS(getsid_args), (sy_call_t *)sys_getsid, AUE_GETSID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 310 = getsid */ { AS(setresuid_args), (sy_call_t *)sys_setresuid, AUE_SETRESUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 311 = setresuid */ { AS(setresgid_args), (sy_call_t *)sys_setresgid, AUE_SETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 312 = setresgid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 313 = obsolete signanosleep */ { AS(aio_return_args), (sy_call_t *)sys_aio_return, AUE_AIO_RETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 314 = aio_return */ { AS(aio_suspend_args), (sy_call_t *)sys_aio_suspend, AUE_AIO_SUSPEND, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 315 = aio_suspend */ { AS(aio_cancel_args), (sy_call_t *)sys_aio_cancel, AUE_AIO_CANCEL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 316 = aio_cancel */ { AS(aio_error_args), (sy_call_t *)sys_aio_error, AUE_AIO_ERROR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 317 = aio_error */ { compat6(AS(freebsd6_aio_read_args),aio_read), AUE_AIO_READ, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 318 = freebsd6 aio_read */ { compat6(AS(freebsd6_aio_write_args),aio_write), AUE_AIO_WRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 319 = freebsd6 aio_write */ { compat6(AS(freebsd6_lio_listio_args),lio_listio), AUE_LIO_LISTIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 320 = freebsd6 lio_listio */ { 0, (sy_call_t *)sys_yield, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 321 = yield */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 322 = obsolete thr_sleep */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 323 = obsolete thr_wakeup */ { AS(mlockall_args), (sy_call_t *)sys_mlockall, AUE_MLOCKALL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 324 = mlockall */ { 0, (sy_call_t *)sys_munlockall, AUE_MUNLOCKALL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 325 = munlockall */ { AS(__getcwd_args), (sy_call_t *)sys___getcwd, AUE_GETCWD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 326 = __getcwd */ { AS(sched_setparam_args), (sy_call_t *)sys_sched_setparam, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 327 = sched_setparam */ { AS(sched_getparam_args), (sy_call_t *)sys_sched_getparam, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 328 = sched_getparam */ { AS(sched_setscheduler_args), (sy_call_t *)sys_sched_setscheduler, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 329 = sched_setscheduler */ { AS(sched_getscheduler_args), (sy_call_t *)sys_sched_getscheduler, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 330 = sched_getscheduler */ { 0, (sy_call_t *)sys_sched_yield, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 331 = sched_yield */ { AS(sched_get_priority_max_args), (sy_call_t *)sys_sched_get_priority_max, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 332 = sched_get_priority_max */ { AS(sched_get_priority_min_args), (sy_call_t *)sys_sched_get_priority_min, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 333 = sched_get_priority_min */ { AS(sched_rr_get_interval_args), (sy_call_t *)sys_sched_rr_get_interval, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 334 = sched_rr_get_interval */ { AS(utrace_args), (sy_call_t *)sys_utrace, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 335 = utrace */ { compat4(AS(freebsd4_sendfile_args),sendfile), AUE_SENDFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 336 = freebsd4 sendfile */ { AS(kldsym_args), (sy_call_t *)sys_kldsym, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 337 = kldsym */ { AS(jail_args), (sy_call_t *)sys_jail, AUE_JAIL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 338 = jail */ { AS(nnpfs_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 339 = nnpfs_syscall */ { AS(sigprocmask_args), (sy_call_t *)sys_sigprocmask, AUE_SIGPROCMASK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 340 = sigprocmask */ { AS(sigsuspend_args), (sy_call_t *)sys_sigsuspend, AUE_SIGSUSPEND, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 341 = sigsuspend */ { compat4(AS(freebsd4_sigaction_args),sigaction), AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 342 = freebsd4 sigaction */ { AS(sigpending_args), (sy_call_t *)sys_sigpending, AUE_SIGPENDING, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 343 = sigpending */ { compat4(AS(freebsd4_sigreturn_args),sigreturn), AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 344 = freebsd4 sigreturn */ { AS(sigtimedwait_args), (sy_call_t *)sys_sigtimedwait, AUE_SIGWAIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 345 = sigtimedwait */ { AS(sigwaitinfo_args), (sy_call_t *)sys_sigwaitinfo, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 346 = sigwaitinfo */ { AS(__acl_get_file_args), (sy_call_t *)sys___acl_get_file, AUE_ACL_GET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 347 = __acl_get_file */ { AS(__acl_set_file_args), (sy_call_t *)sys___acl_set_file, AUE_ACL_SET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 348 = __acl_set_file */ { AS(__acl_get_fd_args), (sy_call_t *)sys___acl_get_fd, AUE_ACL_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 349 = __acl_get_fd */ { AS(__acl_set_fd_args), (sy_call_t *)sys___acl_set_fd, AUE_ACL_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 350 = __acl_set_fd */ { AS(__acl_delete_file_args), (sy_call_t *)sys___acl_delete_file, AUE_ACL_DELETE_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 351 = __acl_delete_file */ { AS(__acl_delete_fd_args), (sy_call_t *)sys___acl_delete_fd, AUE_ACL_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 352 = __acl_delete_fd */ { AS(__acl_aclcheck_file_args), (sy_call_t *)sys___acl_aclcheck_file, AUE_ACL_CHECK_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 353 = __acl_aclcheck_file */ { AS(__acl_aclcheck_fd_args), (sy_call_t *)sys___acl_aclcheck_fd, AUE_ACL_CHECK_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 354 = __acl_aclcheck_fd */ { AS(extattrctl_args), (sy_call_t *)sys_extattrctl, AUE_EXTATTRCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 355 = extattrctl */ { AS(extattr_set_file_args), (sy_call_t *)sys_extattr_set_file, AUE_EXTATTR_SET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 356 = extattr_set_file */ { AS(extattr_get_file_args), (sy_call_t *)sys_extattr_get_file, AUE_EXTATTR_GET_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 357 = extattr_get_file */ { AS(extattr_delete_file_args), (sy_call_t *)sys_extattr_delete_file, AUE_EXTATTR_DELETE_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 358 = extattr_delete_file */ { AS(aio_waitcomplete_args), (sy_call_t *)sys_aio_waitcomplete, AUE_AIO_WAITCOMPLETE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 359 = aio_waitcomplete */ { AS(getresuid_args), (sy_call_t *)sys_getresuid, AUE_GETRESUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 360 = getresuid */ { AS(getresgid_args), (sy_call_t *)sys_getresgid, AUE_GETRESGID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 361 = getresgid */ { 0, (sy_call_t *)sys_kqueue, AUE_KQUEUE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 362 = kqueue */ { compat11(AS(freebsd11_kevent_args),kevent), AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 363 = freebsd11 kevent */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = __cap_get_proc */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = __cap_set_proc */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = __cap_get_fd */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = __cap_get_file */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = __cap_set_fd */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = __cap_set_file */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 364 = obsolete __cap_get_proc */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 365 = obsolete __cap_set_proc */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 366 = obsolete __cap_get_fd */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 367 = obsolete __cap_get_file */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 368 = obsolete __cap_set_fd */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 369 = obsolete __cap_set_file */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 370 = nosys */ { AS(extattr_set_fd_args), (sy_call_t *)sys_extattr_set_fd, AUE_EXTATTR_SET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 371 = extattr_set_fd */ { AS(extattr_get_fd_args), (sy_call_t *)sys_extattr_get_fd, AUE_EXTATTR_GET_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 372 = extattr_get_fd */ { AS(extattr_delete_fd_args), (sy_call_t *)sys_extattr_delete_fd, AUE_EXTATTR_DELETE_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 373 = extattr_delete_fd */ { AS(__setugid_args), (sy_call_t *)sys___setugid, AUE_SETUGID, NULL, 0, 0, 0, SY_THR_STATIC }, /* 374 = __setugid */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = nfsclnt */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 375 = obsolete nfsclnt */ { AS(eaccess_args), (sy_call_t *)sys_eaccess, AUE_EACCESS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 376 = eaccess */ { AS(afs3_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 377 = afs3_syscall */ { AS(nmount_args), (sy_call_t *)sys_nmount, AUE_NMOUNT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 378 = nmount */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = kse_exit */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = kse_wakeup */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = kse_create */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = kse_thr_interrupt */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = kse_release */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 379 = obsolete kse_exit */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 380 = obsolete kse_wakeup */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 381 = obsolete kse_create */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 382 = obsolete kse_thr_interrupt */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 383 = obsolete kse_release */ { AS(__mac_get_proc_args), (sy_call_t *)sys___mac_get_proc, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 384 = __mac_get_proc */ { AS(__mac_set_proc_args), (sy_call_t *)sys___mac_set_proc, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 385 = __mac_set_proc */ { AS(__mac_get_fd_args), (sy_call_t *)sys___mac_get_fd, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 386 = __mac_get_fd */ { AS(__mac_get_file_args), (sy_call_t *)sys___mac_get_file, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 387 = __mac_get_file */ { AS(__mac_set_fd_args), (sy_call_t *)sys___mac_set_fd, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 388 = __mac_set_fd */ { AS(__mac_set_file_args), (sy_call_t *)sys___mac_set_file, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 389 = __mac_set_file */ { AS(kenv_args), (sy_call_t *)sys_kenv, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 390 = kenv */ { AS(lchflags_args), (sy_call_t *)sys_lchflags, AUE_LCHFLAGS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 391 = lchflags */ { AS(uuidgen_args), (sy_call_t *)sys_uuidgen, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 392 = uuidgen */ { AS(sendfile_args), (sy_call_t *)sys_sendfile, AUE_SENDFILE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 393 = sendfile */ { AS(mac_syscall_args), (sy_call_t *)sys_mac_syscall, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 394 = mac_syscall */ { compat11(AS(freebsd11_getfsstat_args),getfsstat), AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 395 = freebsd11 getfsstat */ { compat11(AS(freebsd11_statfs_args),statfs), AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 396 = freebsd11 statfs */ { compat11(AS(freebsd11_fstatfs_args),fstatfs), AUE_FSTATFS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 397 = freebsd11 fstatfs */ { compat11(AS(freebsd11_fhstatfs_args),fhstatfs), AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 398 = freebsd11 fhstatfs */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 399 = nosys */ { AS(ksem_close_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 400 = ksem_close */ { AS(ksem_post_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 401 = ksem_post */ { AS(ksem_wait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 402 = ksem_wait */ { AS(ksem_trywait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 403 = ksem_trywait */ { AS(ksem_init_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 404 = ksem_init */ { AS(ksem_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 405 = ksem_open */ { AS(ksem_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 406 = ksem_unlink */ { AS(ksem_getvalue_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 407 = ksem_getvalue */ { AS(ksem_destroy_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 408 = ksem_destroy */ { AS(__mac_get_pid_args), (sy_call_t *)sys___mac_get_pid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 409 = __mac_get_pid */ { AS(__mac_get_link_args), (sy_call_t *)sys___mac_get_link, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 410 = __mac_get_link */ { AS(__mac_set_link_args), (sy_call_t *)sys___mac_set_link, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 411 = __mac_set_link */ { AS(extattr_set_link_args), (sy_call_t *)sys_extattr_set_link, AUE_EXTATTR_SET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 412 = extattr_set_link */ { AS(extattr_get_link_args), (sy_call_t *)sys_extattr_get_link, AUE_EXTATTR_GET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 413 = extattr_get_link */ { AS(extattr_delete_link_args), (sy_call_t *)sys_extattr_delete_link, AUE_EXTATTR_DELETE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 414 = extattr_delete_link */ { AS(__mac_execve_args), (sy_call_t *)sys___mac_execve, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 415 = __mac_execve */ { AS(sigaction_args), (sy_call_t *)sys_sigaction, AUE_SIGACTION, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 416 = sigaction */ { AS(sigreturn_args), (sy_call_t *)sys_sigreturn, AUE_SIGRETURN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 417 = sigreturn */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 418 = __xstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 419 = __xfstat */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 420 = __xlstat */ { AS(getcontext_args), (sy_call_t *)sys_getcontext, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 421 = getcontext */ { AS(setcontext_args), (sy_call_t *)sys_setcontext, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 422 = setcontext */ { AS(swapcontext_args), (sy_call_t *)sys_swapcontext, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 423 = swapcontext */ { AS(swapoff_args), (sy_call_t *)sys_swapoff, AUE_SWAPOFF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 424 = swapoff */ { AS(__acl_get_link_args), (sy_call_t *)sys___acl_get_link, AUE_ACL_GET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 425 = __acl_get_link */ { AS(__acl_set_link_args), (sy_call_t *)sys___acl_set_link, AUE_ACL_SET_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 426 = __acl_set_link */ { AS(__acl_delete_link_args), (sy_call_t *)sys___acl_delete_link, AUE_ACL_DELETE_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 427 = __acl_delete_link */ { AS(__acl_aclcheck_link_args), (sy_call_t *)sys___acl_aclcheck_link, AUE_ACL_CHECK_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 428 = __acl_aclcheck_link */ { AS(sigwait_args), (sy_call_t *)sys_sigwait, AUE_SIGWAIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 429 = sigwait */ { AS(thr_create_args), (sy_call_t *)sys_thr_create, AUE_THR_CREATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 430 = thr_create */ { AS(thr_exit_args), (sy_call_t *)sys_thr_exit, AUE_THR_EXIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 431 = thr_exit */ { AS(thr_self_args), (sy_call_t *)sys_thr_self, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 432 = thr_self */ { AS(thr_kill_args), (sy_call_t *)sys_thr_kill, AUE_THR_KILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 433 = thr_kill */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 434 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 435 = nosys */ { AS(jail_attach_args), (sy_call_t *)sys_jail_attach, AUE_JAIL_ATTACH, NULL, 0, 0, 0, SY_THR_STATIC }, /* 436 = jail_attach */ { AS(extattr_list_fd_args), (sy_call_t *)sys_extattr_list_fd, AUE_EXTATTR_LIST_FD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 437 = extattr_list_fd */ { AS(extattr_list_file_args), (sy_call_t *)sys_extattr_list_file, AUE_EXTATTR_LIST_FILE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 438 = extattr_list_file */ { AS(extattr_list_link_args), (sy_call_t *)sys_extattr_list_link, AUE_EXTATTR_LIST_LINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 439 = extattr_list_link */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = kse_switchin */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 440 = obsolete kse_switchin */ { AS(ksem_timedwait_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 441 = ksem_timedwait */ { AS(thr_suspend_args), (sy_call_t *)sys_thr_suspend, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 442 = thr_suspend */ { AS(thr_wake_args), (sy_call_t *)sys_thr_wake, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 443 = thr_wake */ { AS(kldunloadf_args), (sy_call_t *)sys_kldunloadf, AUE_MODUNLOAD, NULL, 0, 0, 0, SY_THR_STATIC }, /* 444 = kldunloadf */ { AS(audit_args), (sy_call_t *)sys_audit, AUE_AUDIT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 445 = audit */ { AS(auditon_args), (sy_call_t *)sys_auditon, AUE_AUDITON, NULL, 0, 0, 0, SY_THR_STATIC }, /* 446 = auditon */ { AS(getauid_args), (sy_call_t *)sys_getauid, AUE_GETAUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 447 = getauid */ { AS(setauid_args), (sy_call_t *)sys_setauid, AUE_SETAUID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 448 = setauid */ { AS(getaudit_args), (sy_call_t *)sys_getaudit, AUE_GETAUDIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 449 = getaudit */ { AS(setaudit_args), (sy_call_t *)sys_setaudit, AUE_SETAUDIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 450 = setaudit */ { AS(getaudit_addr_args), (sy_call_t *)sys_getaudit_addr, AUE_GETAUDIT_ADDR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 451 = getaudit_addr */ { AS(setaudit_addr_args), (sy_call_t *)sys_setaudit_addr, AUE_SETAUDIT_ADDR, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 452 = setaudit_addr */ { AS(auditctl_args), (sy_call_t *)sys_auditctl, AUE_AUDITCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 453 = auditctl */ { AS(_umtx_op_args), (sy_call_t *)sys__umtx_op, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 454 = _umtx_op */ { AS(thr_new_args), (sy_call_t *)sys_thr_new, AUE_THR_NEW, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 455 = thr_new */ { AS(sigqueue_args), (sy_call_t *)sys_sigqueue, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 456 = sigqueue */ { AS(kmq_open_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 457 = kmq_open */ { AS(kmq_setattr_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 458 = kmq_setattr */ { AS(kmq_timedreceive_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 459 = kmq_timedreceive */ { AS(kmq_timedsend_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 460 = kmq_timedsend */ { AS(kmq_notify_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 461 = kmq_notify */ { AS(kmq_unlink_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 462 = kmq_unlink */ { AS(abort2_args), (sy_call_t *)sys_abort2, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 463 = abort2 */ { AS(thr_set_name_args), (sy_call_t *)sys_thr_set_name, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 464 = thr_set_name */ { AS(aio_fsync_args), (sy_call_t *)sys_aio_fsync, AUE_AIO_FSYNC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 465 = aio_fsync */ { AS(rtprio_thread_args), (sy_call_t *)sys_rtprio_thread, AUE_RTPRIO, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 466 = rtprio_thread */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 467 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 468 = nosys */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 469 = __getpath_fromfd */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 470 = __getpath_fromaddr */ { AS(sctp_peeloff_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 471 = sctp_peeloff */ { AS(sctp_generic_sendmsg_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 472 = sctp_generic_sendmsg */ { AS(sctp_generic_sendmsg_iov_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 473 = sctp_generic_sendmsg_iov */ { AS(sctp_generic_recvmsg_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_ABSENT }, /* 474 = sctp_generic_recvmsg */ { AS(pread_args), (sy_call_t *)sys_pread, AUE_PREAD, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 475 = pread */ { AS(pwrite_args), (sy_call_t *)sys_pwrite, AUE_PWRITE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 476 = pwrite */ { AS(mmap_args), (sy_call_t *)sys_mmap, AUE_MMAP, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 477 = mmap */ { AS(lseek_args), (sy_call_t *)sys_lseek, AUE_LSEEK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 478 = lseek */ { AS(truncate_args), (sy_call_t *)sys_truncate, AUE_TRUNCATE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 479 = truncate */ { AS(ftruncate_args), (sy_call_t *)sys_ftruncate, AUE_FTRUNCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 480 = ftruncate */ { AS(thr_kill2_args), (sy_call_t *)sys_thr_kill2, AUE_THR_KILL2, NULL, 0, 0, 0, SY_THR_STATIC }, /* 481 = thr_kill2 */ { AS(shm_open_args), (sy_call_t *)sys_shm_open, AUE_SHMOPEN, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 482 = shm_open */ { AS(shm_unlink_args), (sy_call_t *)sys_shm_unlink, AUE_SHMUNLINK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 483 = shm_unlink */ { AS(cpuset_args), (sy_call_t *)sys_cpuset, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 484 = cpuset */ { AS(cpuset_setid_args), (sy_call_t *)sys_cpuset_setid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 485 = cpuset_setid */ { AS(cpuset_getid_args), (sy_call_t *)sys_cpuset_getid, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 486 = cpuset_getid */ { AS(cpuset_getaffinity_args), (sy_call_t *)sys_cpuset_getaffinity, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 487 = cpuset_getaffinity */ { AS(cpuset_setaffinity_args), (sy_call_t *)sys_cpuset_setaffinity, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 488 = cpuset_setaffinity */ { AS(faccessat_args), (sy_call_t *)sys_faccessat, AUE_FACCESSAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 489 = faccessat */ { AS(fchmodat_args), (sy_call_t *)sys_fchmodat, AUE_FCHMODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 490 = fchmodat */ { AS(fchownat_args), (sy_call_t *)sys_fchownat, AUE_FCHOWNAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 491 = fchownat */ { AS(fexecve_args), (sy_call_t *)sys_fexecve, AUE_FEXECVE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 492 = fexecve */ { compat11(AS(freebsd11_fstatat_args),fstatat), AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 493 = freebsd11 fstatat */ { AS(futimesat_args), (sy_call_t *)sys_futimesat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 494 = futimesat */ { AS(linkat_args), (sy_call_t *)sys_linkat, AUE_LINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 495 = linkat */ { AS(mkdirat_args), (sy_call_t *)sys_mkdirat, AUE_MKDIRAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 496 = mkdirat */ { AS(mkfifoat_args), (sy_call_t *)sys_mkfifoat, AUE_MKFIFOAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 497 = mkfifoat */ { compat11(AS(freebsd11_mknodat_args),mknodat), AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 498 = freebsd11 mknodat */ { AS(openat_args), (sy_call_t *)sys_openat, AUE_OPENAT_RWTC, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 499 = openat */ { AS(readlinkat_args), (sy_call_t *)sys_readlinkat, AUE_READLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 500 = readlinkat */ { AS(renameat_args), (sy_call_t *)sys_renameat, AUE_RENAMEAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 501 = renameat */ { AS(symlinkat_args), (sy_call_t *)sys_symlinkat, AUE_SYMLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 502 = symlinkat */ { AS(unlinkat_args), (sy_call_t *)sys_unlinkat, AUE_UNLINKAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 503 = unlinkat */ { AS(posix_openpt_args), (sy_call_t *)sys_posix_openpt, AUE_POSIX_OPENPT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 504 = posix_openpt */ { AS(gssd_syscall_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 505 = gssd_syscall */ { AS(jail_get_args), (sy_call_t *)sys_jail_get, AUE_JAIL_GET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 506 = jail_get */ { AS(jail_set_args), (sy_call_t *)sys_jail_set, AUE_JAIL_SET, NULL, 0, 0, 0, SY_THR_STATIC }, /* 507 = jail_set */ { AS(jail_remove_args), (sy_call_t *)sys_jail_remove, AUE_JAIL_REMOVE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 508 = jail_remove */ { AS(closefrom_args), (sy_call_t *)sys_closefrom, AUE_CLOSEFROM, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 509 = closefrom */ { AS(__semctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 510 = __semctl */ { AS(msgctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 511 = msgctl */ { AS(shmctl_args), (sy_call_t *)lkmressys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 512 = shmctl */ { AS(lpathconf_args), (sy_call_t *)sys_lpathconf, AUE_LPATHCONF, NULL, 0, 0, 0, SY_THR_STATIC }, /* 513 = lpathconf */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 514 = obsolete cap_new */ { AS(__cap_rights_get_args), (sy_call_t *)sys___cap_rights_get, AUE_CAP_RIGHTS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 515 = __cap_rights_get */ { 0, (sy_call_t *)sys_cap_enter, AUE_CAP_ENTER, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 516 = cap_enter */ { AS(cap_getmode_args), (sy_call_t *)sys_cap_getmode, AUE_CAP_GETMODE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 517 = cap_getmode */ { AS(pdfork_args), (sy_call_t *)sys_pdfork, AUE_PDFORK, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 518 = pdfork */ { AS(pdkill_args), (sy_call_t *)sys_pdkill, AUE_PDKILL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 519 = pdkill */ { AS(pdgetpid_args), (sy_call_t *)sys_pdgetpid, AUE_PDGETPID, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 520 = pdgetpid */ { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 521 = pdwait4 */ { AS(pselect_args), (sy_call_t *)sys_pselect, AUE_SELECT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 522 = pselect */ { AS(getloginclass_args), (sy_call_t *)sys_getloginclass, AUE_GETLOGINCLASS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 523 = getloginclass */ { AS(setloginclass_args), (sy_call_t *)sys_setloginclass, AUE_SETLOGINCLASS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 524 = setloginclass */ { AS(rctl_get_racct_args), (sy_call_t *)sys_rctl_get_racct, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 525 = rctl_get_racct */ { AS(rctl_get_rules_args), (sy_call_t *)sys_rctl_get_rules, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 526 = rctl_get_rules */ { AS(rctl_get_limits_args), (sy_call_t *)sys_rctl_get_limits, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 527 = rctl_get_limits */ { AS(rctl_add_rule_args), (sy_call_t *)sys_rctl_add_rule, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 528 = rctl_add_rule */ { AS(rctl_remove_rule_args), (sy_call_t *)sys_rctl_remove_rule, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 529 = rctl_remove_rule */ { AS(posix_fallocate_args), (sy_call_t *)sys_posix_fallocate, AUE_POSIX_FALLOCATE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 530 = posix_fallocate */ { AS(posix_fadvise_args), (sy_call_t *)sys_posix_fadvise, AUE_POSIX_FADVISE, NULL, 0, 0, 0, SY_THR_STATIC }, /* 531 = posix_fadvise */ { AS(wait6_args), (sy_call_t *)sys_wait6, AUE_WAIT6, NULL, 0, 0, 0, SY_THR_STATIC }, /* 532 = wait6 */ { AS(cap_rights_limit_args), (sy_call_t *)sys_cap_rights_limit, AUE_CAP_RIGHTS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 533 = cap_rights_limit */ { AS(cap_ioctls_limit_args), (sy_call_t *)sys_cap_ioctls_limit, AUE_CAP_IOCTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 534 = cap_ioctls_limit */ { AS(cap_ioctls_get_args), (sy_call_t *)sys_cap_ioctls_get, AUE_CAP_IOCTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 535 = cap_ioctls_get */ { AS(cap_fcntls_limit_args), (sy_call_t *)sys_cap_fcntls_limit, AUE_CAP_FCNTLS_LIMIT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 536 = cap_fcntls_limit */ { AS(cap_fcntls_get_args), (sy_call_t *)sys_cap_fcntls_get, AUE_CAP_FCNTLS_GET, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 537 = cap_fcntls_get */ { AS(bindat_args), (sy_call_t *)sys_bindat, AUE_BINDAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 538 = bindat */ { AS(connectat_args), (sy_call_t *)sys_connectat, AUE_CONNECTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 539 = connectat */ { AS(chflagsat_args), (sy_call_t *)sys_chflagsat, AUE_CHFLAGSAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 540 = chflagsat */ { AS(accept4_args), (sy_call_t *)sys_accept4, AUE_ACCEPT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 541 = accept4 */ { AS(pipe2_args), (sy_call_t *)sys_pipe2, AUE_PIPE, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 542 = pipe2 */ { AS(aio_mlock_args), (sy_call_t *)sys_aio_mlock, AUE_AIO_MLOCK, NULL, 0, 0, 0, SY_THR_STATIC }, /* 543 = aio_mlock */ { AS(procctl_args), (sy_call_t *)sys_procctl, AUE_PROCCTL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 544 = procctl */ { AS(ppoll_args), (sy_call_t *)sys_ppoll, AUE_POLL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 545 = ppoll */ { AS(futimens_args), (sy_call_t *)sys_futimens, AUE_FUTIMES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 546 = futimens */ { AS(utimensat_args), (sy_call_t *)sys_utimensat, AUE_FUTIMESAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 547 = utimensat */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = numa_getaffinity */ - { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = numa_setaffinity */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 548 = obsolete numa_getaffinity */ + { 0, (sy_call_t *)nosys, AUE_NULL, NULL, 0, 0, 0, SY_THR_ABSENT }, /* 549 = obsolete numa_setaffinity */ { AS(fdatasync_args), (sy_call_t *)sys_fdatasync, AUE_FSYNC, NULL, 0, 0, 0, SY_THR_STATIC }, /* 550 = fdatasync */ { AS(fstat_args), (sy_call_t *)sys_fstat, AUE_FSTAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 551 = fstat */ { AS(fstatat_args), (sy_call_t *)sys_fstatat, AUE_FSTATAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 552 = fstatat */ { AS(fhstat_args), (sy_call_t *)sys_fhstat, AUE_FHSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 553 = fhstat */ { AS(getdirentries_args), (sy_call_t *)sys_getdirentries, AUE_GETDIRENTRIES, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 554 = getdirentries */ { AS(statfs_args), (sy_call_t *)sys_statfs, AUE_STATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 555 = statfs */ { AS(fstatfs_args), (sy_call_t *)sys_fstatfs, AUE_FSTATFS, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 556 = fstatfs */ { AS(getfsstat_args), (sy_call_t *)sys_getfsstat, AUE_GETFSSTAT, NULL, 0, 0, 0, SY_THR_STATIC }, /* 557 = getfsstat */ { AS(fhstatfs_args), (sy_call_t *)sys_fhstatfs, AUE_FHSTATFS, NULL, 0, 0, 0, SY_THR_STATIC }, /* 558 = fhstatfs */ { AS(mknodat_args), (sy_call_t *)sys_mknodat, AUE_MKNODAT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 559 = mknodat */ { AS(kevent_args), (sy_call_t *)sys_kevent, AUE_KEVENT, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 560 = kevent */ { AS(cpuset_getdomain_args), (sy_call_t *)sys_cpuset_getdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 561 = cpuset_getdomain */ { AS(cpuset_setdomain_args), (sy_call_t *)sys_cpuset_setdomain, AUE_NULL, NULL, 0, 0, 0, SY_THR_STATIC }, /* 562 = cpuset_setdomain */ { AS(getrandom_args), (sy_call_t *)sys_getrandom, AUE_NULL, NULL, 0, 0, SYF_CAPENABLED, SY_THR_STATIC }, /* 563 = getrandom */ }; Index: projects/clang700-import/sys/kern/syscalls.c =================================================================== --- projects/clang700-import/sys/kern/syscalls.c (revision 339014) +++ projects/clang700-import/sys/kern/syscalls.c (revision 339015) @@ -1,573 +1,573 @@ /* * System call names. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ const char *syscallnames[] = { "syscall", /* 0 = syscall */ "exit", /* 1 = exit */ "fork", /* 2 = fork */ "read", /* 3 = read */ "write", /* 4 = write */ "open", /* 5 = open */ "close", /* 6 = close */ "wait4", /* 7 = wait4 */ "compat.creat", /* 8 = old creat */ "link", /* 9 = link */ "unlink", /* 10 = unlink */ "obs_execv", /* 11 = obsolete execv */ "chdir", /* 12 = chdir */ "fchdir", /* 13 = fchdir */ "compat11.mknod", /* 14 = freebsd11 mknod */ "chmod", /* 15 = chmod */ "chown", /* 16 = chown */ "break", /* 17 = break */ "compat4.getfsstat", /* 18 = freebsd4 getfsstat */ "compat.lseek", /* 19 = old lseek */ "getpid", /* 20 = getpid */ "mount", /* 21 = mount */ "unmount", /* 22 = unmount */ "setuid", /* 23 = setuid */ "getuid", /* 24 = getuid */ "geteuid", /* 25 = geteuid */ "ptrace", /* 26 = ptrace */ "recvmsg", /* 27 = recvmsg */ "sendmsg", /* 28 = sendmsg */ "recvfrom", /* 29 = recvfrom */ "accept", /* 30 = accept */ "getpeername", /* 31 = getpeername */ "getsockname", /* 32 = getsockname */ "access", /* 33 = access */ "chflags", /* 34 = chflags */ "fchflags", /* 35 = fchflags */ "sync", /* 36 = sync */ "kill", /* 37 = kill */ "compat.stat", /* 38 = old stat */ "getppid", /* 39 = getppid */ "compat.lstat", /* 40 = old lstat */ "dup", /* 41 = dup */ "compat10.pipe", /* 42 = freebsd10 pipe */ "getegid", /* 43 = getegid */ "profil", /* 44 = profil */ "ktrace", /* 45 = ktrace */ "compat.sigaction", /* 46 = old sigaction */ "getgid", /* 47 = getgid */ "compat.sigprocmask", /* 48 = old sigprocmask */ "getlogin", /* 49 = getlogin */ "setlogin", /* 50 = setlogin */ "acct", /* 51 = acct */ "compat.sigpending", /* 52 = old sigpending */ "sigaltstack", /* 53 = sigaltstack */ "ioctl", /* 54 = ioctl */ "reboot", /* 55 = reboot */ "revoke", /* 56 = revoke */ "symlink", /* 57 = symlink */ "readlink", /* 58 = readlink */ "execve", /* 59 = execve */ "umask", /* 60 = umask */ "chroot", /* 61 = chroot */ "compat.fstat", /* 62 = old fstat */ "compat.getkerninfo", /* 63 = old getkerninfo */ "compat.getpagesize", /* 64 = old getpagesize */ "msync", /* 65 = msync */ "vfork", /* 66 = vfork */ "obs_vread", /* 67 = obsolete vread */ "obs_vwrite", /* 68 = obsolete vwrite */ "sbrk", /* 69 = sbrk */ "sstk", /* 70 = sstk */ "compat.mmap", /* 71 = old mmap */ "compat11.vadvise", /* 72 = freebsd11 vadvise */ "munmap", /* 73 = munmap */ "mprotect", /* 74 = mprotect */ "madvise", /* 75 = madvise */ "obs_vhangup", /* 76 = obsolete vhangup */ "obs_vlimit", /* 77 = obsolete vlimit */ "mincore", /* 78 = mincore */ "getgroups", /* 79 = getgroups */ "setgroups", /* 80 = setgroups */ "getpgrp", /* 81 = getpgrp */ "setpgid", /* 82 = setpgid */ "setitimer", /* 83 = setitimer */ "compat.wait", /* 84 = old wait */ "swapon", /* 85 = swapon */ "getitimer", /* 86 = getitimer */ "compat.gethostname", /* 87 = old gethostname */ "compat.sethostname", /* 88 = old sethostname */ "getdtablesize", /* 89 = getdtablesize */ "dup2", /* 90 = dup2 */ "#91", /* 91 = getdopt */ "fcntl", /* 92 = fcntl */ "select", /* 93 = select */ "#94", /* 94 = setdopt */ "fsync", /* 95 = fsync */ "setpriority", /* 96 = setpriority */ "socket", /* 97 = socket */ "connect", /* 98 = connect */ "compat.accept", /* 99 = old accept */ "getpriority", /* 100 = getpriority */ "compat.send", /* 101 = old send */ "compat.recv", /* 102 = old recv */ "compat.sigreturn", /* 103 = old sigreturn */ "bind", /* 104 = bind */ "setsockopt", /* 105 = setsockopt */ "listen", /* 106 = listen */ "obs_vtimes", /* 107 = obsolete vtimes */ "compat.sigvec", /* 108 = old sigvec */ "compat.sigblock", /* 109 = old sigblock */ "compat.sigsetmask", /* 110 = old sigsetmask */ "compat.sigsuspend", /* 111 = old sigsuspend */ "compat.sigstack", /* 112 = old sigstack */ "compat.recvmsg", /* 113 = old recvmsg */ "compat.sendmsg", /* 114 = old sendmsg */ "obs_vtrace", /* 115 = obsolete vtrace */ "gettimeofday", /* 116 = gettimeofday */ "getrusage", /* 117 = getrusage */ "getsockopt", /* 118 = getsockopt */ "#119", /* 119 = resuba */ "readv", /* 120 = readv */ "writev", /* 121 = writev */ "settimeofday", /* 122 = settimeofday */ "fchown", /* 123 = fchown */ "fchmod", /* 124 = fchmod */ "compat.recvfrom", /* 125 = old recvfrom */ "setreuid", /* 126 = setreuid */ "setregid", /* 127 = setregid */ "rename", /* 128 = rename */ "compat.truncate", /* 129 = old truncate */ "compat.ftruncate", /* 130 = old ftruncate */ "flock", /* 131 = flock */ "mkfifo", /* 132 = mkfifo */ "sendto", /* 133 = sendto */ "shutdown", /* 134 = shutdown */ "socketpair", /* 135 = socketpair */ "mkdir", /* 136 = mkdir */ "rmdir", /* 137 = rmdir */ "utimes", /* 138 = utimes */ "obs_4.2", /* 139 = obsolete 4.2 sigreturn */ "adjtime", /* 140 = adjtime */ "compat.getpeername", /* 141 = old getpeername */ "compat.gethostid", /* 142 = old gethostid */ "compat.sethostid", /* 143 = old sethostid */ "compat.getrlimit", /* 144 = old getrlimit */ "compat.setrlimit", /* 145 = old setrlimit */ "compat.killpg", /* 146 = old killpg */ "setsid", /* 147 = setsid */ "quotactl", /* 148 = quotactl */ "compat.quota", /* 149 = old quota */ "compat.getsockname", /* 150 = old getsockname */ "#151", /* 151 = sem_lock */ "#152", /* 152 = sem_wakeup */ "#153", /* 153 = asyncdaemon */ "nlm_syscall", /* 154 = nlm_syscall */ "nfssvc", /* 155 = nfssvc */ "compat.getdirentries", /* 156 = old getdirentries */ "compat4.statfs", /* 157 = freebsd4 statfs */ "compat4.fstatfs", /* 158 = freebsd4 fstatfs */ "#159", /* 159 = nosys */ "lgetfh", /* 160 = lgetfh */ "getfh", /* 161 = getfh */ "compat4.getdomainname", /* 162 = freebsd4 getdomainname */ "compat4.setdomainname", /* 163 = freebsd4 setdomainname */ "compat4.uname", /* 164 = freebsd4 uname */ "sysarch", /* 165 = sysarch */ "rtprio", /* 166 = rtprio */ "#167", /* 167 = nosys */ "#168", /* 168 = nosys */ "semsys", /* 169 = semsys */ "msgsys", /* 170 = msgsys */ "shmsys", /* 171 = shmsys */ "#172", /* 172 = nosys */ "compat6.pread", /* 173 = freebsd6 pread */ "compat6.pwrite", /* 174 = freebsd6 pwrite */ "setfib", /* 175 = setfib */ "ntp_adjtime", /* 176 = ntp_adjtime */ "#177", /* 177 = sfork */ "#178", /* 178 = getdescriptor */ "#179", /* 179 = setdescriptor */ "#180", /* 180 = nosys */ "setgid", /* 181 = setgid */ "setegid", /* 182 = setegid */ "seteuid", /* 183 = seteuid */ - "#184", /* 184 = lfs_bmapv */ - "#185", /* 185 = lfs_markv */ - "#186", /* 186 = lfs_segclean */ - "#187", /* 187 = lfs_segwait */ + "obs_lfs_bmapv", /* 184 = obsolete lfs_bmapv */ + "obs_lfs_markv", /* 185 = obsolete lfs_markv */ + "obs_lfs_segclean", /* 186 = obsolete lfs_segclean */ + "obs_lfs_segwait", /* 187 = obsolete lfs_segwait */ "compat11.stat", /* 188 = freebsd11 stat */ "compat11.fstat", /* 189 = freebsd11 fstat */ "compat11.lstat", /* 190 = freebsd11 lstat */ "pathconf", /* 191 = pathconf */ "fpathconf", /* 192 = fpathconf */ "#193", /* 193 = nosys */ "getrlimit", /* 194 = getrlimit */ "setrlimit", /* 195 = setrlimit */ "compat11.getdirentries", /* 196 = freebsd11 getdirentries */ "compat6.mmap", /* 197 = freebsd6 mmap */ "__syscall", /* 198 = __syscall */ "compat6.lseek", /* 199 = freebsd6 lseek */ "compat6.truncate", /* 200 = freebsd6 truncate */ "compat6.ftruncate", /* 201 = freebsd6 ftruncate */ "__sysctl", /* 202 = __sysctl */ "mlock", /* 203 = mlock */ "munlock", /* 204 = munlock */ "undelete", /* 205 = undelete */ "futimes", /* 206 = futimes */ "getpgid", /* 207 = getpgid */ "#208", /* 208 = nosys */ "poll", /* 209 = poll */ "lkmnosys", /* 210 = lkmnosys */ "lkmnosys", /* 211 = lkmnosys */ "lkmnosys", /* 212 = lkmnosys */ "lkmnosys", /* 213 = lkmnosys */ "lkmnosys", /* 214 = lkmnosys */ "lkmnosys", /* 215 = lkmnosys */ "lkmnosys", /* 216 = lkmnosys */ "lkmnosys", /* 217 = lkmnosys */ "lkmnosys", /* 218 = lkmnosys */ "lkmnosys", /* 219 = lkmnosys */ "compat7.__semctl", /* 220 = freebsd7 __semctl */ "semget", /* 221 = semget */ "semop", /* 222 = semop */ - "#223", /* 223 = semconfig */ + "obs_semconfig", /* 223 = obsolete semconfig */ "compat7.msgctl", /* 224 = freebsd7 msgctl */ "msgget", /* 225 = msgget */ "msgsnd", /* 226 = msgsnd */ "msgrcv", /* 227 = msgrcv */ "shmat", /* 228 = shmat */ "compat7.shmctl", /* 229 = freebsd7 shmctl */ "shmdt", /* 230 = shmdt */ "shmget", /* 231 = shmget */ "clock_gettime", /* 232 = clock_gettime */ "clock_settime", /* 233 = clock_settime */ "clock_getres", /* 234 = clock_getres */ "ktimer_create", /* 235 = ktimer_create */ "ktimer_delete", /* 236 = ktimer_delete */ "ktimer_settime", /* 237 = ktimer_settime */ "ktimer_gettime", /* 238 = ktimer_gettime */ "ktimer_getoverrun", /* 239 = ktimer_getoverrun */ "nanosleep", /* 240 = nanosleep */ "ffclock_getcounter", /* 241 = ffclock_getcounter */ "ffclock_setestimate", /* 242 = ffclock_setestimate */ "ffclock_getestimate", /* 243 = ffclock_getestimate */ "clock_nanosleep", /* 244 = clock_nanosleep */ "#245", /* 245 = nosys */ "#246", /* 246 = nosys */ "clock_getcpuclockid2", /* 247 = clock_getcpuclockid2 */ "ntp_gettime", /* 248 = ntp_gettime */ "#249", /* 249 = nosys */ "minherit", /* 250 = minherit */ "rfork", /* 251 = rfork */ "obs_openbsd_poll", /* 252 = obsolete openbsd_poll */ "issetugid", /* 253 = issetugid */ "lchown", /* 254 = lchown */ "aio_read", /* 255 = aio_read */ "aio_write", /* 256 = aio_write */ "lio_listio", /* 257 = lio_listio */ "#258", /* 258 = nosys */ "#259", /* 259 = nosys */ "#260", /* 260 = nosys */ "#261", /* 261 = nosys */ "#262", /* 262 = nosys */ "#263", /* 263 = nosys */ "#264", /* 264 = nosys */ "#265", /* 265 = nosys */ "#266", /* 266 = nosys */ "#267", /* 267 = nosys */ "#268", /* 268 = nosys */ "#269", /* 269 = nosys */ "#270", /* 270 = nosys */ "#271", /* 271 = nosys */ "compat11.getdents", /* 272 = freebsd11 getdents */ "#273", /* 273 = nosys */ "lchmod", /* 274 = lchmod */ "obs_netbsd_lchown", /* 275 = obsolete netbsd_lchown */ "lutimes", /* 276 = lutimes */ "obs_netbsd_msync", /* 277 = obsolete netbsd_msync */ "compat11.nstat", /* 278 = freebsd11 nstat */ "compat11.nfstat", /* 279 = freebsd11 nfstat */ "compat11.nlstat", /* 280 = freebsd11 nlstat */ "#281", /* 281 = nosys */ "#282", /* 282 = nosys */ "#283", /* 283 = nosys */ "#284", /* 284 = nosys */ "#285", /* 285 = nosys */ "#286", /* 286 = nosys */ "#287", /* 287 = nosys */ "#288", /* 288 = nosys */ "preadv", /* 289 = preadv */ "pwritev", /* 290 = pwritev */ "#291", /* 291 = nosys */ "#292", /* 292 = nosys */ "#293", /* 293 = nosys */ "#294", /* 294 = nosys */ "#295", /* 295 = nosys */ "#296", /* 296 = nosys */ "compat4.fhstatfs", /* 297 = freebsd4 fhstatfs */ "fhopen", /* 298 = fhopen */ "compat11.fhstat", /* 299 = freebsd11 fhstat */ "modnext", /* 300 = modnext */ "modstat", /* 301 = modstat */ "modfnext", /* 302 = modfnext */ "modfind", /* 303 = modfind */ "kldload", /* 304 = kldload */ "kldunload", /* 305 = kldunload */ "kldfind", /* 306 = kldfind */ "kldnext", /* 307 = kldnext */ "kldstat", /* 308 = kldstat */ "kldfirstmod", /* 309 = kldfirstmod */ "getsid", /* 310 = getsid */ "setresuid", /* 311 = setresuid */ "setresgid", /* 312 = setresgid */ "obs_signanosleep", /* 313 = obsolete signanosleep */ "aio_return", /* 314 = aio_return */ "aio_suspend", /* 315 = aio_suspend */ "aio_cancel", /* 316 = aio_cancel */ "aio_error", /* 317 = aio_error */ "compat6.aio_read", /* 318 = freebsd6 aio_read */ "compat6.aio_write", /* 319 = freebsd6 aio_write */ "compat6.lio_listio", /* 320 = freebsd6 lio_listio */ "yield", /* 321 = yield */ "obs_thr_sleep", /* 322 = obsolete thr_sleep */ "obs_thr_wakeup", /* 323 = obsolete thr_wakeup */ "mlockall", /* 324 = mlockall */ "munlockall", /* 325 = munlockall */ "__getcwd", /* 326 = __getcwd */ "sched_setparam", /* 327 = sched_setparam */ "sched_getparam", /* 328 = sched_getparam */ "sched_setscheduler", /* 329 = sched_setscheduler */ "sched_getscheduler", /* 330 = sched_getscheduler */ "sched_yield", /* 331 = sched_yield */ "sched_get_priority_max", /* 332 = sched_get_priority_max */ "sched_get_priority_min", /* 333 = sched_get_priority_min */ "sched_rr_get_interval", /* 334 = sched_rr_get_interval */ "utrace", /* 335 = utrace */ "compat4.sendfile", /* 336 = freebsd4 sendfile */ "kldsym", /* 337 = kldsym */ "jail", /* 338 = jail */ "nnpfs_syscall", /* 339 = nnpfs_syscall */ "sigprocmask", /* 340 = sigprocmask */ "sigsuspend", /* 341 = sigsuspend */ "compat4.sigaction", /* 342 = freebsd4 sigaction */ "sigpending", /* 343 = sigpending */ "compat4.sigreturn", /* 344 = freebsd4 sigreturn */ "sigtimedwait", /* 345 = sigtimedwait */ "sigwaitinfo", /* 346 = sigwaitinfo */ "__acl_get_file", /* 347 = __acl_get_file */ "__acl_set_file", /* 348 = __acl_set_file */ "__acl_get_fd", /* 349 = __acl_get_fd */ "__acl_set_fd", /* 350 = __acl_set_fd */ "__acl_delete_file", /* 351 = __acl_delete_file */ "__acl_delete_fd", /* 352 = __acl_delete_fd */ "__acl_aclcheck_file", /* 353 = __acl_aclcheck_file */ "__acl_aclcheck_fd", /* 354 = __acl_aclcheck_fd */ "extattrctl", /* 355 = extattrctl */ "extattr_set_file", /* 356 = extattr_set_file */ "extattr_get_file", /* 357 = extattr_get_file */ "extattr_delete_file", /* 358 = extattr_delete_file */ "aio_waitcomplete", /* 359 = aio_waitcomplete */ "getresuid", /* 360 = getresuid */ "getresgid", /* 361 = getresgid */ "kqueue", /* 362 = kqueue */ "compat11.kevent", /* 363 = freebsd11 kevent */ - "#364", /* 364 = __cap_get_proc */ - "#365", /* 365 = __cap_set_proc */ - "#366", /* 366 = __cap_get_fd */ - "#367", /* 367 = __cap_get_file */ - "#368", /* 368 = __cap_set_fd */ - "#369", /* 369 = __cap_set_file */ + "obs___cap_get_proc", /* 364 = obsolete __cap_get_proc */ + "obs___cap_set_proc", /* 365 = obsolete __cap_set_proc */ + "obs___cap_get_fd", /* 366 = obsolete __cap_get_fd */ + "obs___cap_get_file", /* 367 = obsolete __cap_get_file */ + "obs___cap_set_fd", /* 368 = obsolete __cap_set_fd */ + "obs___cap_set_file", /* 369 = obsolete __cap_set_file */ "#370", /* 370 = nosys */ "extattr_set_fd", /* 371 = extattr_set_fd */ "extattr_get_fd", /* 372 = extattr_get_fd */ "extattr_delete_fd", /* 373 = extattr_delete_fd */ "__setugid", /* 374 = __setugid */ - "#375", /* 375 = nfsclnt */ + "obs_nfsclnt", /* 375 = obsolete nfsclnt */ "eaccess", /* 376 = eaccess */ "afs3_syscall", /* 377 = afs3_syscall */ "nmount", /* 378 = nmount */ - "#379", /* 379 = kse_exit */ - "#380", /* 380 = kse_wakeup */ - "#381", /* 381 = kse_create */ - "#382", /* 382 = kse_thr_interrupt */ - "#383", /* 383 = kse_release */ + "obs_kse_exit", /* 379 = obsolete kse_exit */ + "obs_kse_wakeup", /* 380 = obsolete kse_wakeup */ + "obs_kse_create", /* 381 = obsolete kse_create */ + "obs_kse_thr_interrupt", /* 382 = obsolete kse_thr_interrupt */ + "obs_kse_release", /* 383 = obsolete kse_release */ "__mac_get_proc", /* 384 = __mac_get_proc */ "__mac_set_proc", /* 385 = __mac_set_proc */ "__mac_get_fd", /* 386 = __mac_get_fd */ "__mac_get_file", /* 387 = __mac_get_file */ "__mac_set_fd", /* 388 = __mac_set_fd */ "__mac_set_file", /* 389 = __mac_set_file */ "kenv", /* 390 = kenv */ "lchflags", /* 391 = lchflags */ "uuidgen", /* 392 = uuidgen */ "sendfile", /* 393 = sendfile */ "mac_syscall", /* 394 = mac_syscall */ "compat11.getfsstat", /* 395 = freebsd11 getfsstat */ "compat11.statfs", /* 396 = freebsd11 statfs */ "compat11.fstatfs", /* 397 = freebsd11 fstatfs */ "compat11.fhstatfs", /* 398 = freebsd11 fhstatfs */ "#399", /* 399 = nosys */ "ksem_close", /* 400 = ksem_close */ "ksem_post", /* 401 = ksem_post */ "ksem_wait", /* 402 = ksem_wait */ "ksem_trywait", /* 403 = ksem_trywait */ "ksem_init", /* 404 = ksem_init */ "ksem_open", /* 405 = ksem_open */ "ksem_unlink", /* 406 = ksem_unlink */ "ksem_getvalue", /* 407 = ksem_getvalue */ "ksem_destroy", /* 408 = ksem_destroy */ "__mac_get_pid", /* 409 = __mac_get_pid */ "__mac_get_link", /* 410 = __mac_get_link */ "__mac_set_link", /* 411 = __mac_set_link */ "extattr_set_link", /* 412 = extattr_set_link */ "extattr_get_link", /* 413 = extattr_get_link */ "extattr_delete_link", /* 414 = extattr_delete_link */ "__mac_execve", /* 415 = __mac_execve */ "sigaction", /* 416 = sigaction */ "sigreturn", /* 417 = sigreturn */ "#418", /* 418 = __xstat */ "#419", /* 419 = __xfstat */ "#420", /* 420 = __xlstat */ "getcontext", /* 421 = getcontext */ "setcontext", /* 422 = setcontext */ "swapcontext", /* 423 = swapcontext */ "swapoff", /* 424 = swapoff */ "__acl_get_link", /* 425 = __acl_get_link */ "__acl_set_link", /* 426 = __acl_set_link */ "__acl_delete_link", /* 427 = __acl_delete_link */ "__acl_aclcheck_link", /* 428 = __acl_aclcheck_link */ "sigwait", /* 429 = sigwait */ "thr_create", /* 430 = thr_create */ "thr_exit", /* 431 = thr_exit */ "thr_self", /* 432 = thr_self */ "thr_kill", /* 433 = thr_kill */ "#434", /* 434 = nosys */ "#435", /* 435 = nosys */ "jail_attach", /* 436 = jail_attach */ "extattr_list_fd", /* 437 = extattr_list_fd */ "extattr_list_file", /* 438 = extattr_list_file */ "extattr_list_link", /* 439 = extattr_list_link */ - "#440", /* 440 = kse_switchin */ + "obs_kse_switchin", /* 440 = obsolete kse_switchin */ "ksem_timedwait", /* 441 = ksem_timedwait */ "thr_suspend", /* 442 = thr_suspend */ "thr_wake", /* 443 = thr_wake */ "kldunloadf", /* 444 = kldunloadf */ "audit", /* 445 = audit */ "auditon", /* 446 = auditon */ "getauid", /* 447 = getauid */ "setauid", /* 448 = setauid */ "getaudit", /* 449 = getaudit */ "setaudit", /* 450 = setaudit */ "getaudit_addr", /* 451 = getaudit_addr */ "setaudit_addr", /* 452 = setaudit_addr */ "auditctl", /* 453 = auditctl */ "_umtx_op", /* 454 = _umtx_op */ "thr_new", /* 455 = thr_new */ "sigqueue", /* 456 = sigqueue */ "kmq_open", /* 457 = kmq_open */ "kmq_setattr", /* 458 = kmq_setattr */ "kmq_timedreceive", /* 459 = kmq_timedreceive */ "kmq_timedsend", /* 460 = kmq_timedsend */ "kmq_notify", /* 461 = kmq_notify */ "kmq_unlink", /* 462 = kmq_unlink */ "abort2", /* 463 = abort2 */ "thr_set_name", /* 464 = thr_set_name */ "aio_fsync", /* 465 = aio_fsync */ "rtprio_thread", /* 466 = rtprio_thread */ "#467", /* 467 = nosys */ "#468", /* 468 = nosys */ "#469", /* 469 = __getpath_fromfd */ "#470", /* 470 = __getpath_fromaddr */ "sctp_peeloff", /* 471 = sctp_peeloff */ "sctp_generic_sendmsg", /* 472 = sctp_generic_sendmsg */ "sctp_generic_sendmsg_iov", /* 473 = sctp_generic_sendmsg_iov */ "sctp_generic_recvmsg", /* 474 = sctp_generic_recvmsg */ "pread", /* 475 = pread */ "pwrite", /* 476 = pwrite */ "mmap", /* 477 = mmap */ "lseek", /* 478 = lseek */ "truncate", /* 479 = truncate */ "ftruncate", /* 480 = ftruncate */ "thr_kill2", /* 481 = thr_kill2 */ "shm_open", /* 482 = shm_open */ "shm_unlink", /* 483 = shm_unlink */ "cpuset", /* 484 = cpuset */ "cpuset_setid", /* 485 = cpuset_setid */ "cpuset_getid", /* 486 = cpuset_getid */ "cpuset_getaffinity", /* 487 = cpuset_getaffinity */ "cpuset_setaffinity", /* 488 = cpuset_setaffinity */ "faccessat", /* 489 = faccessat */ "fchmodat", /* 490 = fchmodat */ "fchownat", /* 491 = fchownat */ "fexecve", /* 492 = fexecve */ "compat11.fstatat", /* 493 = freebsd11 fstatat */ "futimesat", /* 494 = futimesat */ "linkat", /* 495 = linkat */ "mkdirat", /* 496 = mkdirat */ "mkfifoat", /* 497 = mkfifoat */ "compat11.mknodat", /* 498 = freebsd11 mknodat */ "openat", /* 499 = openat */ "readlinkat", /* 500 = readlinkat */ "renameat", /* 501 = renameat */ "symlinkat", /* 502 = symlinkat */ "unlinkat", /* 503 = unlinkat */ "posix_openpt", /* 504 = posix_openpt */ "gssd_syscall", /* 505 = gssd_syscall */ "jail_get", /* 506 = jail_get */ "jail_set", /* 507 = jail_set */ "jail_remove", /* 508 = jail_remove */ "closefrom", /* 509 = closefrom */ "__semctl", /* 510 = __semctl */ "msgctl", /* 511 = msgctl */ "shmctl", /* 512 = shmctl */ "lpathconf", /* 513 = lpathconf */ "obs_cap_new", /* 514 = obsolete cap_new */ "__cap_rights_get", /* 515 = __cap_rights_get */ "cap_enter", /* 516 = cap_enter */ "cap_getmode", /* 517 = cap_getmode */ "pdfork", /* 518 = pdfork */ "pdkill", /* 519 = pdkill */ "pdgetpid", /* 520 = pdgetpid */ "#521", /* 521 = pdwait4 */ "pselect", /* 522 = pselect */ "getloginclass", /* 523 = getloginclass */ "setloginclass", /* 524 = setloginclass */ "rctl_get_racct", /* 525 = rctl_get_racct */ "rctl_get_rules", /* 526 = rctl_get_rules */ "rctl_get_limits", /* 527 = rctl_get_limits */ "rctl_add_rule", /* 528 = rctl_add_rule */ "rctl_remove_rule", /* 529 = rctl_remove_rule */ "posix_fallocate", /* 530 = posix_fallocate */ "posix_fadvise", /* 531 = posix_fadvise */ "wait6", /* 532 = wait6 */ "cap_rights_limit", /* 533 = cap_rights_limit */ "cap_ioctls_limit", /* 534 = cap_ioctls_limit */ "cap_ioctls_get", /* 535 = cap_ioctls_get */ "cap_fcntls_limit", /* 536 = cap_fcntls_limit */ "cap_fcntls_get", /* 537 = cap_fcntls_get */ "bindat", /* 538 = bindat */ "connectat", /* 539 = connectat */ "chflagsat", /* 540 = chflagsat */ "accept4", /* 541 = accept4 */ "pipe2", /* 542 = pipe2 */ "aio_mlock", /* 543 = aio_mlock */ "procctl", /* 544 = procctl */ "ppoll", /* 545 = ppoll */ "futimens", /* 546 = futimens */ "utimensat", /* 547 = utimensat */ - "#548", /* 548 = numa_getaffinity */ - "#549", /* 549 = numa_setaffinity */ + "obs_numa_getaffinity", /* 548 = obsolete numa_getaffinity */ + "obs_numa_setaffinity", /* 549 = obsolete numa_setaffinity */ "fdatasync", /* 550 = fdatasync */ "fstat", /* 551 = fstat */ "fstatat", /* 552 = fstatat */ "fhstat", /* 553 = fhstat */ "getdirentries", /* 554 = getdirentries */ "statfs", /* 555 = statfs */ "fstatfs", /* 556 = fstatfs */ "getfsstat", /* 557 = getfsstat */ "fhstatfs", /* 558 = fhstatfs */ "mknodat", /* 559 = mknodat */ "kevent", /* 560 = kevent */ "cpuset_getdomain", /* 561 = cpuset_getdomain */ "cpuset_setdomain", /* 562 = cpuset_setdomain */ "getrandom", /* 563 = getrandom */ }; Index: projects/clang700-import/sys/kern/syscalls.master =================================================================== --- projects/clang700-import/sys/kern/syscalls.master (revision 339014) +++ projects/clang700-import/sys/kern/syscalls.master (revision 339015) @@ -1,1346 +1,1346 @@ $FreeBSD$ ; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94 ; ; System call name/number master file. ; Processed to created init_sysent.c, syscalls.c and syscall.h. ; Columns: number audit type name alt{name,tag,rtyp}/comments ; number system call number, must be in order ; audit the audit event associated with the system call ; A value of AUE_NULL means no auditing, but it also means that ; there is no audit event for the call at this time. For the ; case where the event exists, but we don't want auditing, the ; event should be #defined to AUE_NULL in audit_kevents.h. ; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6, ; COMPAT7, COMPAT11, NODEF, NOARGS, NOPROTO, NOSTD ; The COMPAT* options may be combined with one or more NO* ; options separated by '|' with no spaces (e.g. COMPAT|NOARGS) ; name pseudo-prototype of syscall routine ; If one of the following alts is different, then all appear: ; altname name of system call if different ; alttag name of args struct tag if different from [o]`name'"_args" ; altrtyp return type if not int (bogus - syscalls always return int) ; for UNIMPL/OBSOL, name continues with comments ; types: ; STD always included ; COMPAT included on COMPAT #ifdef ; COMPAT4 included on COMPAT_FREEBSD4 #ifdef (FreeBSD 4 compat) ; COMPAT6 included on COMPAT_FREEBSD6 #ifdef (FreeBSD 6 compat) ; COMPAT7 included on COMPAT_FREEBSD7 #ifdef (FreeBSD 7 compat) ; COMPAT10 included on COMPAT_FREEBSD10 #ifdef (FreeBSD 10 compat) ; COMPAT11 included on COMPAT11 #ifdef (FreeBSD 11 compat) ; OBSOL obsolete, not included in system, only specifies name ; UNIMPL not implemented, placeholder only ; NOSTD implemented but as a lkm that can be statically ; compiled in; sysent entry will be filled with lkmressys ; so the SYSCALL_MODULE macro works ; NOARGS same as STD except do not create structure in sys/sysproto.h ; NODEF same as STD except only have the entry in the syscall table ; added. Meaning - do not create structure or function ; prototype in sys/sysproto.h ; NOPROTO same as STD except do not create structure or ; function prototype in sys/sysproto.h. Does add a ; definition to syscall.h besides adding a sysent. ; NOTSTATIC syscall is loadable ; annotations: ; SAL 2.0 annotations are used to specify how system calls treat ; arguments that are passed using pointers. There are three basic ; annotations. ; ; _In_ Object pointed to will be read and not modified. ; _Out_ Object pointed to will be written and not read. ; _Inout_ Object pointed to will be written and read. ; ; These annotations are used alone when the pointer refers to a single ; object i.e. scalar types, structs, and pointers, and not NULL. Adding ; the _opt_ suffix, e.g. _In_opt_, implies that the pointer may also ; refer to NULL. ; ; For pointers to arrays, additional suffixes are added: ; ; _In_z_, _Out_z_, _Inout_z_: ; for a NUL terminated array e.g. a string. ; _In_reads_z_(n),_Out_writes_z_(n), _Inout_updates_z_(n): ; for a NUL terminated array e.g. a string, of known length n bytes. ; _In_reads_(n),_Out_writes_(n),_Inout_updates_(n): ; for an array of n elements. ; _In_reads_bytes_(n), _Out_writes_bytes_(n), _Inout_updates_bytes(n): ; for a buffer of n-bytes. ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master ; #ifdef's, etc. may be included, and are copied to the output files. #include #include #include ; Reserved/unimplemented system calls in the range 0-150 inclusive ; are reserved for use in future Berkeley releases. ; Additional system calls implemented in vendor and other ; redistributions should be placed in the reserved range at the end ; of the current calls. 0 AUE_NULL STD { int nosys(void); } syscall nosys_args int 1 AUE_EXIT STD { void sys_exit(int rval); } exit \ sys_exit_args void 2 AUE_FORK STD { int fork(void); } 3 AUE_READ STD { ssize_t read(int fd, \ _Out_writes_bytes_(nbyte) void *buf, \ size_t nbyte); } 4 AUE_WRITE STD { ssize_t write(int fd, \ _In_reads_bytes_(nbyte) const void *buf, \ size_t nbyte); } 5 AUE_OPEN_RWTC STD { int open( \ _In_z_ char *path, \ int flags, \ int mode); } ; XXX should be { int open(const char *path, int flags, ...); } ; but we're not ready for `const' or varargs. ; XXX man page says `mode_t mode'. 6 AUE_CLOSE STD { int close(int fd); } 7 AUE_WAIT4 STD { int wait4(int pid, \ _Out_opt_ int *status, \ int options, \ _Out_opt_ struct rusage *rusage); } 8 AUE_CREAT COMPAT { int creat(_In_z_ char *path, int mode); } 9 AUE_LINK STD { int link(_In_z_ char *path, \ _In_z_ char *link); } 10 AUE_UNLINK STD { int unlink(_In_z_ char *path); } 11 AUE_NULL OBSOL execv 12 AUE_CHDIR STD { int chdir(_In_z_ char *path); } 13 AUE_FCHDIR STD { int fchdir(int fd); } 14 AUE_MKNOD COMPAT11 { int mknod(_In_z_ char *path, int mode, \ int dev); } 15 AUE_CHMOD STD { int chmod(_In_z_ char *path, int mode); } 16 AUE_CHOWN STD { int chown(_In_z_ char *path, \ int uid, int gid); } 17 AUE_NULL STD { caddr_t break(_In_ char *nsize); } 18 AUE_GETFSSTAT COMPAT4 { int getfsstat( \ _Out_writes_bytes_opt_(bufsize) \ struct ostatfs *buf, \ long bufsize, int mode); } 19 AUE_LSEEK COMPAT { long lseek(int fd, long offset, \ int whence); } 20 AUE_GETPID STD { pid_t getpid(void); } 21 AUE_MOUNT STD { int mount(_In_z_ char *type, \ _In_z_ char *path, int flags, \ _In_opt_ caddr_t data); } ; XXX `path' should have type `const char *' but we're not ready for that. 22 AUE_UMOUNT STD { int unmount(_In_z_ char *path, int flags); } 23 AUE_SETUID STD { int setuid(uid_t uid); } 24 AUE_GETUID STD { uid_t getuid(void); } 25 AUE_GETEUID STD { uid_t geteuid(void); } 26 AUE_PTRACE STD { int ptrace(int req, pid_t pid, \ _Inout_opt_ caddr_t addr, int data); } 27 AUE_RECVMSG STD { int recvmsg(int s, \ _Inout_ struct msghdr *msg, int flags); } 28 AUE_SENDMSG STD { int sendmsg(int s, _In_ struct msghdr *msg, \ int flags); } 29 AUE_RECVFROM STD { int recvfrom(int s, \ _Out_writes_bytes_(len) caddr_t buf, \ size_t len, int flags, \ _Out_writes_bytes_opt_(*fromlenaddr) \ struct sockaddr * __restrict from, \ _Inout_opt_ \ __socklen_t * __restrict fromlenaddr); } 30 AUE_ACCEPT STD { int accept(int s, \ _Out_writes_bytes_opt_(*anamelen) \ struct sockaddr * __restrict name, \ _Inout_opt_ \ __socklen_t * __restrict anamelen); } 31 AUE_GETPEERNAME STD { int getpeername(int fdes, \ _Out_writes_bytes_(*alen) \ struct sockaddr * __restrict asa, \ _Inout_opt_ \ __socklen_t * __restrict alen); } 32 AUE_GETSOCKNAME STD { int getsockname(int fdes, \ _Out_writes_bytes_(*alen) \ struct sockaddr * __restrict asa, \ _Inout_ __socklen_t * __restrict alen); } 33 AUE_ACCESS STD { int access(_In_z_ char *path, int amode); } 34 AUE_CHFLAGS STD { int chflags(_In_z_ const char *path, \ u_long flags); } 35 AUE_FCHFLAGS STD { int fchflags(int fd, u_long flags); } 36 AUE_SYNC STD { int sync(void); } 37 AUE_KILL STD { int kill(int pid, int signum); } 38 AUE_STAT COMPAT { int stat(_In_z_ char *path, \ _Out_ struct ostat *ub); } 39 AUE_GETPPID STD { pid_t getppid(void); } 40 AUE_LSTAT COMPAT { int lstat(_In_z_ char *path, \ _Out_ struct ostat *ub); } 41 AUE_DUP STD { int dup(u_int fd); } 42 AUE_PIPE COMPAT10 { int pipe(void); } 43 AUE_GETEGID STD { gid_t getegid(void); } 44 AUE_PROFILE STD { int profil( \ _Out_writes_bytes_(size) caddr_t samples, \ size_t size, size_t offset, u_int scale); } 45 AUE_KTRACE STD { int ktrace(_In_z_ const char *fname, \ int ops, int facs, int pid); } 46 AUE_SIGACTION COMPAT { int sigaction(int signum, \ _In_opt_ struct osigaction *nsa, \ _Out_opt_ struct osigaction *osa); } 47 AUE_GETGID STD { gid_t getgid(void); } 48 AUE_SIGPROCMASK COMPAT { int sigprocmask(int how, osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it, and we return the old mask as the ; (int) return value. 49 AUE_GETLOGIN STD { int getlogin( \ _Out_writes_z_(namelen) char *namebuf, \ u_int namelen); } 50 AUE_SETLOGIN STD { int setlogin(_In_z_ char *namebuf); } 51 AUE_ACCT STD { int acct(_In_z_ char *path); } 52 AUE_SIGPENDING COMPAT { int sigpending(void); } 53 AUE_SIGALTSTACK STD { int sigaltstack(_In_opt_ stack_t *ss, \ _Out_opt_ stack_t *oss); } 54 AUE_IOCTL STD { int ioctl(int fd, u_long com, \ _Inout_opt_ caddr_t data); } 55 AUE_REBOOT STD { int reboot(int opt); } 56 AUE_REVOKE STD { int revoke(_In_z_ char *path); } 57 AUE_SYMLINK STD { int symlink(_In_z_ char *path, \ _In_z_ char *link); } 58 AUE_READLINK STD { ssize_t readlink(_In_z_ char *path, \ _Out_writes_z_(count) char *buf, \ size_t count); } 59 AUE_EXECVE STD { int execve( \ _In_z_ char *fname, \ _In_z_ char **argv, \ _In_z_ char **envv); } 60 AUE_UMASK STD { int umask(int newmask); } 61 AUE_CHROOT STD { int chroot(_In_z_ char *path); } 62 AUE_FSTAT COMPAT { int fstat(int fd, _Out_ struct ostat *sb); } 63 AUE_NULL COMPAT { int getkerninfo(int op, \ _Out_writes_bytes_opt(*size) char *where, \ _Inout_opt_ size_t *size, \ int arg); } getkerninfo getkerninfo_args int 64 AUE_NULL COMPAT { int getpagesize(void); } getpagesize \ getpagesize_args int 65 AUE_MSYNC STD { int msync(_In_ void *addr, size_t len, \ int flags); } 66 AUE_VFORK STD { int vfork(void); } 67 AUE_NULL OBSOL vread 68 AUE_NULL OBSOL vwrite 69 AUE_SBRK STD { int sbrk(int incr); } 70 AUE_SSTK STD { int sstk(int incr); } 71 AUE_MMAP COMPAT { int mmap(_In_ void *addr, int len, int prot, \ int flags, int fd, long pos); } 72 AUE_O_VADVISE COMPAT11 { int vadvise(int anom); } 73 AUE_MUNMAP STD { int munmap(_In_ void *addr, size_t len); } 74 AUE_MPROTECT STD { int mprotect(_In_ void *addr, \ size_t len, int prot); } 75 AUE_MADVISE STD { int madvise(_In_ void *addr, \ size_t len, int behav); } 76 AUE_NULL OBSOL vhangup 77 AUE_NULL OBSOL vlimit 78 AUE_MINCORE STD { int mincore( \ _In_ const void *addr, \ size_t len, \ _Out_writes_bytes_(len/PAGE_SIZE) \ char *vec); } 79 AUE_GETGROUPS STD { int getgroups(u_int gidsetsize, \ _Out_writes_opt_(gidsetsize) \ gid_t *gidset); } 80 AUE_SETGROUPS STD { int setgroups(u_int gidsetsize, \ _In_reads_(gidsetsize) gid_t *gidset); } 81 AUE_GETPGRP STD { int getpgrp(void); } 82 AUE_SETPGRP STD { int setpgid(int pid, int pgid); } 83 AUE_SETITIMER STD { int setitimer(u_int which, \ _In_ struct itimerval *itv, \ _Out_opt_ struct itimerval *oitv); } 84 AUE_WAIT4 COMPAT { int wait(void); } 85 AUE_SWAPON STD { int swapon(_In_z_ char *name); } 86 AUE_GETITIMER STD { int getitimer(u_int which, \ _Out_ struct itimerval *itv); } 87 AUE_SYSCTL COMPAT { int gethostname( \ _Out_writes_z_(len) char *hostname, \ u_int len); } gethostname \ gethostname_args int 88 AUE_SYSCTL COMPAT { int sethostname( \ _In_reads_z_(len) char *hostname, \ u_int len); } sethostname \ sethostname_args int 89 AUE_GETDTABLESIZE STD { int getdtablesize(void); } 90 AUE_DUP2 STD { int dup2(u_int from, u_int to); } 91 AUE_NULL UNIMPL getdopt 92 AUE_FCNTL STD { int fcntl(int fd, int cmd, long arg); } ; XXX should be { int fcntl(int fd, int cmd, ...); } ; but we're not ready for varargs. 93 AUE_SELECT STD { int select(int nd, \ _Inout_opt_ fd_set *in, \ _Inout_opt_ fd_set *ou, \ _Inout_opt_ fd_set *ex, \ _In_opt_ struct timeval *tv); } 94 AUE_NULL UNIMPL setdopt 95 AUE_FSYNC STD { int fsync(int fd); } 96 AUE_SETPRIORITY STD { int setpriority(int which, int who, \ int prio); } 97 AUE_SOCKET STD { int socket(int domain, int type, \ int protocol); } 98 AUE_CONNECT STD { int connect(int s, \ _In_reads_bytes_(namelen) caddr_t name, \ int namelen); } 99 AUE_ACCEPT COMPAT|NOARGS { int accept(int s, \ _Out_writes_bytes_opt_(*anamelen) \ caddr_t name, int *anamelen); } \ accept accept_args int 100 AUE_GETPRIORITY STD { int getpriority(int which, int who); } 101 AUE_SEND COMPAT { int send(int s, \ _In_reads_bytes_(len) caddr_t buf, \ int len, \ int flags); } 102 AUE_RECV COMPAT { int recv(int s, \ _Out_writes_bytes_(len) caddr_t buf, \ int len, int flags); } 103 AUE_SIGRETURN COMPAT { int sigreturn( \ _In_ struct osigcontext *sigcntxp); } 104 AUE_BIND STD { int bind(int s, \ _In_reads_bytes_(namelen) caddr_t name, \ int namelen); } 105 AUE_SETSOCKOPT STD { int setsockopt(int s, int level, int name, \ _In_reads_bytes_opt_(valsize) caddr_t val, \ int valsize); } 106 AUE_LISTEN STD { int listen(int s, int backlog); } 107 AUE_NULL OBSOL vtimes 108 AUE_NULL COMPAT { int sigvec(int signum, \ _In_opt_ struct sigvec *nsv, \ _Out_opt_ struct sigvec *osv); } 109 AUE_NULL COMPAT { int sigblock(int mask); } 110 AUE_NULL COMPAT { int sigsetmask(int mask); } 111 AUE_NULL COMPAT { int sigsuspend(osigset_t mask); } ; XXX note nonstandard (bogus) calling convention - the libc stub passes ; us the mask, not a pointer to it. 112 AUE_NULL COMPAT { int sigstack(_In_opt_ struct sigstack *nss, \ _Out_opt_ struct sigstack *oss); } 113 AUE_RECVMSG COMPAT { int recvmsg(int s, \ _Inout_ struct omsghdr *msg, \ int flags); } 114 AUE_SENDMSG COMPAT { int sendmsg(int s, _In_ caddr_t msg, \ int flags); } 115 AUE_NULL OBSOL vtrace 116 AUE_GETTIMEOFDAY STD { int gettimeofday( \ _Out_ struct timeval *tp, \ _Out_opt_ struct timezone *tzp); } 117 AUE_GETRUSAGE STD { int getrusage(int who, \ _Out_ struct rusage *rusage); } 118 AUE_GETSOCKOPT STD { int getsockopt(int s, int level, int name, \ _Out_writes_bytes_opt_(*avalsize) \ caddr_t val, _Inout_ int *avalsize); } 119 AUE_NULL UNIMPL resuba (BSD/OS 2.x) 120 AUE_READV STD { int readv(int fd, \ _Inout_updates_(iovcnt) struct iovec *iovp, \ u_int iovcnt); } 121 AUE_WRITEV STD { int writev(int fd, \ _In_reads_opt_(iovcnt) struct iovec *iovp, \ u_int iovcnt); } 122 AUE_SETTIMEOFDAY STD { int settimeofday( \ _In_ struct timeval *tv, \ _In_opt_ struct timezone *tzp); } 123 AUE_FCHOWN STD { int fchown(int fd, int uid, int gid); } 124 AUE_FCHMOD STD { int fchmod(int fd, int mode); } 125 AUE_RECVFROM COMPAT|NOARGS { int recvfrom(int s, \ _Out_writes_(len) caddr_t buf, \ size_t len, int flags, \ _Out_writes_bytes_(*fromlenaddr) \ caddr_t from, \ _Inout_ int *fromlenaddr); } \ recvfrom recvfrom_args int 126 AUE_SETREUID STD { int setreuid(int ruid, int euid); } 127 AUE_SETREGID STD { int setregid(int rgid, int egid); } 128 AUE_RENAME STD { int rename(_In_z_ char *from, \ _In_z_ char *to); } 129 AUE_TRUNCATE COMPAT { int truncate(_In_z_ char *path, \ long length); } 130 AUE_FTRUNCATE COMPAT { int ftruncate(int fd, long length); } 131 AUE_FLOCK STD { int flock(int fd, int how); } 132 AUE_MKFIFO STD { int mkfifo(_In_z_ char *path, int mode); } 133 AUE_SENDTO STD { int sendto(int s, \ _In_reads_bytes_(len) caddr_t buf, \ size_t len, int flags, \ _In_reads_bytes_opt_(tolen) caddr_t to, \ int tolen); } 134 AUE_SHUTDOWN STD { int shutdown(int s, int how); } 135 AUE_SOCKETPAIR STD { int socketpair(int domain, int type, \ int protocol, _Out_writes_(2) int *rsv); } 136 AUE_MKDIR STD { int mkdir(_In_z_ char *path, int mode); } 137 AUE_RMDIR STD { int rmdir(_In_z_ char *path); } 138 AUE_UTIMES STD { int utimes(_In_z_ char *path, \ _In_ struct timeval *tptr); } 139 AUE_NULL OBSOL 4.2 sigreturn 140 AUE_ADJTIME STD { int adjtime(_In_ struct timeval *delta, \ _Out_opt_ struct timeval *olddelta); } 141 AUE_GETPEERNAME COMPAT { int getpeername(int fdes, \ _Out_writes_bytes_(*alen) \ caddr_t asa, \ _Inout_opt_ int *alen); } 142 AUE_SYSCTL COMPAT { long gethostid(void); } 143 AUE_SYSCTL COMPAT { int sethostid(long hostid); } 144 AUE_GETRLIMIT COMPAT { int getrlimit(u_int which, _Out_ struct \ orlimit *rlp); } 145 AUE_SETRLIMIT COMPAT { int setrlimit(u_int which, \ _Out_ struct orlimit *rlp); } 146 AUE_KILLPG COMPAT { int killpg(int pgid, int signum); } 147 AUE_SETSID STD { int setsid(void); } 148 AUE_QUOTACTL STD { int quotactl( \ _In_z_ char *path, int cmd, int uid, \ _In_ caddr_t arg); } 149 AUE_O_QUOTA COMPAT { int quota(void); } 150 AUE_GETSOCKNAME COMPAT|NOARGS { int getsockname(int fdec, \ _Out_writes_bytes_(*alen) \ caddr_t asa, \ _Inout_ int *alen); } getsockname \ getsockname_args int ; Syscalls 151-180 inclusive are reserved for vendor-specific ; system calls. (This includes various calls added for compatibity ; with other Unix variants.) ; Some of these calls are now supported by BSD... 151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x) 152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x) 153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x) ; 154 is initialised by the NLM code, if present. 154 AUE_NULL NOSTD { int nlm_syscall(int debug_level, \ int grace_period, int addr_count, \ _In_reads_(addr_count) \ char **addrs); } ; 155 is initialized by the NFS code, if present. 155 AUE_NFS_SVC NOSTD { int nfssvc(int flag, _In_ caddr_t argp); } 156 AUE_GETDIRENTRIES COMPAT { int getdirentries(int fd, \ _Out_writes_bytes_(count) char *buf, \ u_int count, _Out_ long *basep); } 157 AUE_STATFS COMPAT4 { int statfs(_In_z_ char *path, \ _Out_ struct ostatfs *buf); } 158 AUE_FSTATFS COMPAT4 { int fstatfs(int fd, \ _Out_ struct ostatfs *buf); } 159 AUE_NULL UNIMPL nosys 160 AUE_LGETFH STD { int lgetfh(_In_z_ char *fname, \ _Out_ struct fhandle *fhp); } 161 AUE_NFS_GETFH STD { int getfh(_In_z_ char *fname, \ _Out_ struct fhandle *fhp); } 162 AUE_SYSCTL COMPAT4 { int getdomainname( \ _Out_writes_z_(len) char *domainname, \ int len); } 163 AUE_SYSCTL COMPAT4 { int setdomainname( \ _In_reads_z_(len) char *domainname, \ int len); } 164 AUE_NULL COMPAT4 { int uname(_Out_ struct utsname *name); } 165 AUE_SYSARCH STD { int sysarch(int op, _In_z_ char *parms); } 166 AUE_RTPRIO STD { int rtprio(int function, pid_t pid, \ _Inout_ struct rtprio *rtp); } 167 AUE_NULL UNIMPL nosys 168 AUE_NULL UNIMPL nosys 169 AUE_SEMSYS NOSTD { int semsys(int which, int a2, int a3, \ int a4, int a5); } ; XXX should be { int semsys(int which, ...); } 170 AUE_MSGSYS NOSTD { int msgsys(int which, int a2, int a3, \ int a4, int a5, int a6); } ; XXX should be { int msgsys(int which, ...); } 171 AUE_SHMSYS NOSTD { int shmsys(int which, int a2, int a3, \ int a4); } ; XXX should be { int shmsys(int which, ...); } 172 AUE_NULL UNIMPL nosys 173 AUE_PREAD COMPAT6 { ssize_t pread(int fd, \ _Out_writes_bytes_(nbyte) void *buf, \ size_t nbyte, int pad, off_t offset); } 174 AUE_PWRITE COMPAT6 { ssize_t pwrite(int fd, \ _In_reads_bytes_(nbyte) \ const void *buf, \ size_t nbyte, int pad, off_t offset); } 175 AUE_SETFIB STD { int setfib(int fibnum); } 176 AUE_NTP_ADJTIME STD { int ntp_adjtime(_Inout_ struct timex *tp); } 177 AUE_NULL UNIMPL sfork (BSD/OS 2.x) 178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x) 179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x) 180 AUE_NULL UNIMPL nosys ; Syscalls 181-199 are used by/reserved for BSD 181 AUE_SETGID STD { int setgid(gid_t gid); } 182 AUE_SETEGID STD { int setegid(gid_t egid); } 183 AUE_SETEUID STD { int seteuid(uid_t euid); } -184 AUE_NULL UNIMPL lfs_bmapv -185 AUE_NULL UNIMPL lfs_markv -186 AUE_NULL UNIMPL lfs_segclean -187 AUE_NULL UNIMPL lfs_segwait +184 AUE_NULL OBSOL lfs_bmapv +185 AUE_NULL OBSOL lfs_markv +186 AUE_NULL OBSOL lfs_segclean +187 AUE_NULL OBSOL lfs_segwait 188 AUE_STAT COMPAT11 { int stat(_In_z_ char *path, \ _Out_ struct freebsd11_stat *ub); } 189 AUE_FSTAT COMPAT11 { int fstat(int fd, \ _Out_ struct freebsd11_stat *sb); } 190 AUE_LSTAT COMPAT11 { int lstat(_In_z_ char *path, \ _Out_ struct freebsd11_stat *ub); } 191 AUE_PATHCONF STD { int pathconf(_In_z_ char *path, int name); } 192 AUE_FPATHCONF STD { int fpathconf(int fd, int name); } 193 AUE_NULL UNIMPL nosys 194 AUE_GETRLIMIT STD { int getrlimit(u_int which, \ _Out_ struct rlimit *rlp); } getrlimit \ __getrlimit_args int 195 AUE_SETRLIMIT STD { int setrlimit(u_int which, \ _In_ struct rlimit *rlp); } setrlimit \ __setrlimit_args int 196 AUE_GETDIRENTRIES COMPAT11 { int getdirentries(int fd, \ _Out_writes_bytes_(count) char *buf, \ u_int count, _Out_ long *basep); } 197 AUE_MMAP COMPAT6 { caddr_t mmap(_In_ caddr_t addr, size_t len, \ int prot, int flags, int fd, int pad, \ off_t pos); } 198 AUE_NULL NOPROTO { int nosys(void); } __syscall \ __syscall_args int 199 AUE_LSEEK COMPAT6 { off_t lseek(int fd, int pad, \ off_t offset, int whence); } 200 AUE_TRUNCATE COMPAT6 { int truncate(_In_z_ char *path, int pad, \ off_t length); } 201 AUE_FTRUNCATE COMPAT6 { int ftruncate(int fd, int pad, \ off_t length); } 202 AUE_SYSCTL STD { int __sysctl( \ _In_reads_(namelen) int *name, \ u_int namelen, \ _Out_writes_bytes_opt_(*oldlenp) \ void *old, \ _Inout_opt_ size_t *oldlenp, \ _In_reads_bytes_opt_(newlen) \ void *new, \ size_t newlen); } \ __sysctl sysctl_args int 203 AUE_MLOCK STD { int mlock(_In_ const void *addr, \ size_t len); } 204 AUE_MUNLOCK STD { int munlock(_In_ const void *addr, \ size_t len); } 205 AUE_UNDELETE STD { int undelete(_In_z_ char *path); } 206 AUE_FUTIMES STD { int futimes(int fd, \ _In_reads_(2) struct timeval *tptr); } 207 AUE_GETPGID STD { int getpgid(pid_t pid); } 208 AUE_NULL UNIMPL nosys 209 AUE_POLL STD { int poll( \ _Inout_updates_(nfds) \ struct pollfd *fds, \ u_int nfds, int timeout); } ; ; The following are reserved for loadable syscalls ; 210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int 220 AUE_SEMCTL COMPAT7|NOSTD { int __semctl(int semid, int semnum, \ int cmd, union semun_old *arg); } 221 AUE_SEMGET NOSTD { int semget(key_t key, int nsems, \ int semflg); } 222 AUE_SEMOP NOSTD { int semop(int semid, \ _In_reads_(nsops) struct sembuf *sops, \ size_t nsops); } -223 AUE_NULL UNIMPL semconfig +223 AUE_NULL OBSOL semconfig 224 AUE_MSGCTL COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \ struct msqid_ds_old *buf); } 225 AUE_MSGGET NOSTD { int msgget(key_t key, int msgflg); } 226 AUE_MSGSND NOSTD { int msgsnd(int msqid, \ _In_reads_bytes_(msgsz) const void *msgp, \ size_t msgsz, int msgflg); } 227 AUE_MSGRCV NOSTD { ssize_t msgrcv(int msqid, \ _Out_writes_bytes_(msgsz) void *msgp, \ size_t msgsz, long msgtyp, int msgflg); } 228 AUE_SHMAT NOSTD { int shmat(int shmid, \ _In_ const void *shmaddr, \ int shmflg); } 229 AUE_SHMCTL COMPAT7|NOSTD { int shmctl(int shmid, int cmd, \ struct shmid_ds_old *buf); } 230 AUE_SHMDT NOSTD { int shmdt(_In_ const void *shmaddr); } 231 AUE_SHMGET NOSTD { int shmget(key_t key, size_t size, \ int shmflg); } ; 232 AUE_NULL STD { int clock_gettime(clockid_t clock_id, \ _Out_ struct timespec *tp); } 233 AUE_CLOCK_SETTIME STD { int clock_settime(clockid_t clock_id, \ _In_ const struct timespec *tp); } 234 AUE_NULL STD { int clock_getres(clockid_t clock_id, \ _Out_ struct timespec *tp); } 235 AUE_NULL STD { int ktimer_create( \ clockid_t clock_id, \ _In_ struct sigevent *evp, \ _Out_ int *timerid); } 236 AUE_NULL STD { int ktimer_delete(int timerid); } 237 AUE_NULL STD { int ktimer_settime(int timerid, \ int flags, \ _In_ const struct itimerspec *value, \ _Out_opt_ struct itimerspec *ovalue); } 238 AUE_NULL STD { int ktimer_gettime(int timerid, \ _Out_ struct itimerspec *value); } 239 AUE_NULL STD { int ktimer_getoverrun(int timerid); } 240 AUE_NULL STD { int nanosleep( \ _In_ const struct timespec *rqtp, \ _Out_opt_ struct timespec *rmtp); } 241 AUE_NULL STD { int ffclock_getcounter( \ _Out_ ffcounter *ffcount); } 242 AUE_NULL STD { int ffclock_setestimate( \ _In_ struct ffclock_estimate *cest); } 243 AUE_NULL STD { int ffclock_getestimate( \ _Out_ struct ffclock_estimate *cest); } 244 AUE_NULL STD { int clock_nanosleep(clockid_t clock_id, \ int flags, \ _In_ const struct timespec *rqtp, \ _Out_opt_ struct timespec *rmtp); } 245 AUE_NULL UNIMPL nosys 246 AUE_NULL UNIMPL nosys 247 AUE_NULL STD { int clock_getcpuclockid2(id_t id, \ int which, _Out_ clockid_t *clock_id); } 248 AUE_NULL STD { int ntp_gettime( \ _Out_ struct ntptimeval *ntvp); } 249 AUE_NULL UNIMPL nosys ; syscall numbers initially used in OpenBSD 250 AUE_MINHERIT STD { int minherit( \ _In_ void *addr, \ size_t len, int inherit); } 251 AUE_RFORK STD { int rfork(int flags); } 252 AUE_POLL OBSOL openbsd_poll 253 AUE_ISSETUGID STD { int issetugid(void); } 254 AUE_LCHOWN STD { int lchown(_In_z_ char *path, int uid, \ int gid); } 255 AUE_AIO_READ STD { int aio_read( \ _Inout_ struct aiocb *aiocbp); } 256 AUE_AIO_WRITE STD { int aio_write( \ _Inout_ struct aiocb *aiocbp); } 257 AUE_LIO_LISTIO STD { int lio_listio(int mode, \ _Inout_updates_(nent) \ struct aiocb* const *acb_list, \ int nent, \ _In_opt_ struct sigevent *sig); } 258 AUE_NULL UNIMPL nosys 259 AUE_NULL UNIMPL nosys 260 AUE_NULL UNIMPL nosys 261 AUE_NULL UNIMPL nosys 262 AUE_NULL UNIMPL nosys 263 AUE_NULL UNIMPL nosys 264 AUE_NULL UNIMPL nosys 265 AUE_NULL UNIMPL nosys 266 AUE_NULL UNIMPL nosys 267 AUE_NULL UNIMPL nosys 268 AUE_NULL UNIMPL nosys 269 AUE_NULL UNIMPL nosys 270 AUE_NULL UNIMPL nosys 271 AUE_NULL UNIMPL nosys 272 AUE_O_GETDENTS COMPAT11 { int getdents(int fd, \ _Out_writes_bytes_(count) char *buf, \ size_t count); } 273 AUE_NULL UNIMPL nosys 274 AUE_LCHMOD STD { int lchmod(_In_z_ char *path, mode_t mode); } 275 AUE_NULL OBSOL netbsd_lchown 276 AUE_LUTIMES STD { int lutimes(_In_z_ char *path, \ _In_ struct timeval *tptr); } 277 AUE_NULL OBSOL netbsd_msync 278 AUE_STAT COMPAT11 { int nstat(_In_z_ char *path, \ _Out_ struct nstat *ub); } 279 AUE_FSTAT COMPAT11 { int nfstat(int fd, _Out_ struct nstat *sb); } 280 AUE_LSTAT COMPAT11 { int nlstat(_In_z_ char *path, \ _Out_ struct nstat *ub); } 281 AUE_NULL UNIMPL nosys 282 AUE_NULL UNIMPL nosys 283 AUE_NULL UNIMPL nosys 284 AUE_NULL UNIMPL nosys 285 AUE_NULL UNIMPL nosys 286 AUE_NULL UNIMPL nosys 287 AUE_NULL UNIMPL nosys 288 AUE_NULL UNIMPL nosys 289 AUE_PREADV STD { ssize_t preadv(int fd, \ _In_reads_(iovcnt) \ struct iovec *iovp, \ u_int iovcnt, off_t offset); } 290 AUE_PWRITEV STD { ssize_t pwritev(int fd, \ _In_reads_(iovcnt) struct iovec *iovp, \ u_int iovcnt, off_t offset); } 291 AUE_NULL UNIMPL nosys 292 AUE_NULL UNIMPL nosys 293 AUE_NULL UNIMPL nosys 294 AUE_NULL UNIMPL nosys 295 AUE_NULL UNIMPL nosys 296 AUE_NULL UNIMPL nosys 297 AUE_FHSTATFS COMPAT4 { int fhstatfs( \ _In_ const struct fhandle *u_fhp, \ _Out_ struct ostatfs *buf); } 298 AUE_FHOPEN STD { int fhopen( \ _In_ const struct fhandle *u_fhp, \ int flags); } 299 AUE_FHSTAT COMPAT11 { int fhstat( \ _In_ const struct fhandle *u_fhp, \ _Out_ struct freebsd11_stat *sb); } 300 AUE_NULL STD { int modnext(int modid); } 301 AUE_NULL STD { int modstat(int modid, \ _Out_ struct module_stat* stat); } 302 AUE_NULL STD { int modfnext(int modid); } 303 AUE_NULL STD { int modfind(_In_z_ const char *name); } 304 AUE_MODLOAD STD { int kldload(_In_z_ const char *file); } 305 AUE_MODUNLOAD STD { int kldunload(int fileid); } 306 AUE_NULL STD { int kldfind(_In_z_ const char *file); } 307 AUE_NULL STD { int kldnext(int fileid); } 308 AUE_NULL STD { int kldstat(int fileid, \ _Out_ struct kld_file_stat *stat); } 309 AUE_NULL STD { int kldfirstmod(int fileid); } 310 AUE_GETSID STD { int getsid(pid_t pid); } 311 AUE_SETRESUID STD { int setresuid(uid_t ruid, uid_t euid, \ uid_t suid); } 312 AUE_SETRESGID STD { int setresgid(gid_t rgid, gid_t egid, \ gid_t sgid); } 313 AUE_NULL OBSOL signanosleep 314 AUE_AIO_RETURN STD { ssize_t aio_return( \ _Inout_ struct aiocb *aiocbp); } 315 AUE_AIO_SUSPEND STD { int aio_suspend( \ _Inout_updates_(nent) \ struct aiocb * const * aiocbp, \ int nent, \ _In_opt_ \ const struct timespec *timeout); } 316 AUE_AIO_CANCEL STD { int aio_cancel(int fd, \ _In_opt_ struct aiocb *aiocbp); } 317 AUE_AIO_ERROR STD { int aio_error( \ _In_ struct aiocb *aiocbp); } 318 AUE_AIO_READ COMPAT6 { int aio_read( \ _Inout_ struct oaiocb *aiocbp); } 319 AUE_AIO_WRITE COMPAT6 { int aio_write( \ _Inout_ struct oaiocb *aiocbp); } 320 AUE_LIO_LISTIO COMPAT6 { int lio_listio(int mode, \ _Inout_updates_(nent) \ struct oaiocb * const *acb_list, \ int nent, \ _In_opt_ struct osigevent *sig); } 321 AUE_NULL STD { int yield(void); } 322 AUE_NULL OBSOL thr_sleep 323 AUE_NULL OBSOL thr_wakeup 324 AUE_MLOCKALL STD { int mlockall(int how); } 325 AUE_MUNLOCKALL STD { int munlockall(void); } 326 AUE_GETCWD STD { int __getcwd( \ _Out_writes_z_(buflen) char *buf, \ size_t buflen); } 327 AUE_NULL STD { int sched_setparam (pid_t pid, \ _In_ const struct sched_param *param); } 328 AUE_NULL STD { int sched_getparam (pid_t pid, \ _Out_ struct sched_param *param); } 329 AUE_NULL STD { int sched_setscheduler (pid_t pid, int \ policy, _In_ const struct sched_param \ *param); } 330 AUE_NULL STD { int sched_getscheduler (pid_t pid); } 331 AUE_NULL STD { int sched_yield (void); } 332 AUE_NULL STD { int sched_get_priority_max (int policy); } 333 AUE_NULL STD { int sched_get_priority_min (int policy); } 334 AUE_NULL STD { int sched_rr_get_interval (pid_t pid, \ _Out_ struct timespec *interval); } 335 AUE_NULL STD { int utrace( \ _In_reads_bytes_(len) const void *addr, \ size_t len); } 336 AUE_SENDFILE COMPAT4 { int sendfile(int fd, int s, \ off_t offset, size_t nbytes, \ _In_opt_ struct sf_hdtr *hdtr, \ _Out_opt_ off_t *sbytes, int flags); } 337 AUE_NULL STD { int kldsym(int fileid, int cmd, \ _In_ void *data); } 338 AUE_JAIL STD { int jail( \ _In_ struct jail *jail); } 339 AUE_NULL NOSTD|NOTSTATIC { int nnpfs_syscall(int operation, \ char *a_pathP, int a_opcode, \ void *a_paramsP, \ int a_followSymlinks); } 340 AUE_SIGPROCMASK STD { int sigprocmask(int how, \ _In_opt_ const sigset_t *set, \ _Out_opt_ sigset_t *oset); } 341 AUE_SIGSUSPEND STD { int sigsuspend( \ _In_ const sigset_t *sigmask); } 342 AUE_SIGACTION COMPAT4 { int sigaction(int sig, \ _In_opt_ const struct sigaction *act, \ _Out_opt_ struct sigaction *oact); } 343 AUE_SIGPENDING STD { int sigpending(_In_ sigset_t *set); } 344 AUE_SIGRETURN COMPAT4 { int sigreturn( \ _In_ const struct ucontext4 *sigcntxp); } 345 AUE_SIGWAIT STD { int sigtimedwait(_In_ const sigset_t *set, \ _Out_opt_ siginfo_t *info, \ _In_opt_ const struct timespec *timeout); } 346 AUE_NULL STD { int sigwaitinfo(_In_ const sigset_t *set, \ _Out_opt_ siginfo_t *info); } 347 AUE_ACL_GET_FILE STD { int __acl_get_file( \ _In_z_ const char *path, \ acl_type_t type, \ _Out_ struct acl *aclp); } 348 AUE_ACL_SET_FILE STD { int __acl_set_file( \ _In_z_ const char *path, \ acl_type_t type, _In_ struct acl *aclp); } 349 AUE_ACL_GET_FD STD { int __acl_get_fd(int filedes, \ acl_type_t type, _Out_ struct acl *aclp); } 350 AUE_ACL_SET_FD STD { int __acl_set_fd(int filedes, \ acl_type_t type, _In_ struct acl *aclp); } 351 AUE_ACL_DELETE_FILE STD { int __acl_delete_file( \ _In_z_ const char *path, \ acl_type_t type); } 352 AUE_ACL_DELETE_FD STD { int __acl_delete_fd(int filedes, \ acl_type_t type); } 353 AUE_ACL_CHECK_FILE STD { int __acl_aclcheck_file( \ _In_z_ const char *path, \ acl_type_t type, _In_ struct acl *aclp); } 354 AUE_ACL_CHECK_FD STD { int __acl_aclcheck_fd(int filedes, \ acl_type_t type, _In_ struct acl *aclp); } 355 AUE_EXTATTRCTL STD { int extattrctl(_In_z_ const char *path, \ int cmd, _In_z_opt_ const char *filename, \ int attrnamespace, \ _In_z_ const char *attrname); } 356 AUE_EXTATTR_SET_FILE STD { ssize_t extattr_set_file( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname, \ _In_reads_bytes_(nbytes) void *data, \ size_t nbytes); } 357 AUE_EXTATTR_GET_FILE STD { ssize_t extattr_get_file( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname, \ _Out_writes_bytes_(nbytes) void *data, \ size_t nbytes); } 358 AUE_EXTATTR_DELETE_FILE STD { int extattr_delete_file( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname); } 359 AUE_AIO_WAITCOMPLETE STD { ssize_t aio_waitcomplete( \ _Outptr_result_maybenull_ \ struct aiocb **aiocbp, \ _In_opt_ struct timespec *timeout); } 360 AUE_GETRESUID STD { int getresuid(_Out_opt_ uid_t *ruid, \ _Out_opt_ uid_t *euid, \ _Out_opt_ uid_t *suid); } 361 AUE_GETRESGID STD { int getresgid(_Out_opt_ gid_t *rgid, \ _Out_opt_ gid_t *egid, \ _Out_opt_ gid_t *sgid); } 362 AUE_KQUEUE STD { int kqueue(void); } 363 AUE_KEVENT COMPAT11 { int kevent(int fd, \ _In_reads_opt_(nchanges) \ struct kevent_freebsd11 *changelist, \ int nchanges, \ _Out_writes_opt_(nevents) \ struct kevent_freebsd11 *eventlist, \ int nevents, \ _In_opt_ const struct timespec *timeout); } -364 AUE_NULL UNIMPL __cap_get_proc -365 AUE_NULL UNIMPL __cap_set_proc -366 AUE_NULL UNIMPL __cap_get_fd -367 AUE_NULL UNIMPL __cap_get_file -368 AUE_NULL UNIMPL __cap_set_fd -369 AUE_NULL UNIMPL __cap_set_file +364 AUE_NULL OBSOL __cap_get_proc +365 AUE_NULL OBSOL __cap_set_proc +366 AUE_NULL OBSOL __cap_get_fd +367 AUE_NULL OBSOL __cap_get_file +368 AUE_NULL OBSOL __cap_set_fd +369 AUE_NULL OBSOL __cap_set_file 370 AUE_NULL UNIMPL nosys 371 AUE_EXTATTR_SET_FD STD { ssize_t extattr_set_fd(int fd, \ int attrnamespace, \ _In_z_ const char *attrname, \ _In_reads_bytes_(nbytes) void *data, \ size_t nbytes); } 372 AUE_EXTATTR_GET_FD STD { ssize_t extattr_get_fd(int fd, \ int attrnamespace, \ _In_z_ const char *attrname, \ _Out_writes_bytes_(nbytes) void *data, \ size_t nbytes); } 373 AUE_EXTATTR_DELETE_FD STD { int extattr_delete_fd(int fd, \ int attrnamespace, \ _In_z_ const char *attrname); } 374 AUE_SETUGID STD { int __setugid(int flag); } -375 AUE_NULL UNIMPL nfsclnt +375 AUE_NULL OBSOL nfsclnt 376 AUE_EACCESS STD { int eaccess(_In_z_ char *path, int amode); } 377 AUE_NULL NOSTD|NOTSTATIC { int afs3_syscall(long syscall, \ long parm1, long parm2, long parm3, \ long parm4, long parm5, long parm6); } 378 AUE_NMOUNT STD { int nmount( \ _In_reads_(iovcnt) struct iovec *iovp, \ unsigned int iovcnt, int flags); } -379 AUE_NULL UNIMPL kse_exit -380 AUE_NULL UNIMPL kse_wakeup -381 AUE_NULL UNIMPL kse_create -382 AUE_NULL UNIMPL kse_thr_interrupt -383 AUE_NULL UNIMPL kse_release +379 AUE_NULL OBSOL kse_exit +380 AUE_NULL OBSOL kse_wakeup +381 AUE_NULL OBSOL kse_create +382 AUE_NULL OBSOL kse_thr_interrupt +383 AUE_NULL OBSOL kse_release 384 AUE_NULL STD { int __mac_get_proc( \ _In_ struct mac *mac_p); } 385 AUE_NULL STD { int __mac_set_proc( \ _In_ struct mac *mac_p); } 386 AUE_NULL STD { int __mac_get_fd(int fd, \ _In_ struct mac *mac_p); } 387 AUE_NULL STD { int __mac_get_file( \ _In_z_ const char *path_p, \ _In_ struct mac *mac_p); } 388 AUE_NULL STD { int __mac_set_fd(int fd, \ _In_ struct mac *mac_p); } 389 AUE_NULL STD { int __mac_set_file( \ _In_z_ const char *path_p, \ _In_ struct mac *mac_p); } 390 AUE_NULL STD { int kenv(int what, \ _In_z_opt_ const char *name, \ _Inout_updates_opt_(len) \ char *value, int len); } 391 AUE_LCHFLAGS STD { int lchflags(_In_z_ const char *path, \ u_long flags); } 392 AUE_NULL STD { int uuidgen( \ _Out_writes_(count) struct uuid *store, \ int count); } 393 AUE_SENDFILE STD { int sendfile(int fd, int s, \ off_t offset, \ size_t nbytes, \ _In_opt_ struct sf_hdtr *hdtr, \ _Out_opt_ off_t *sbytes, int flags); } 394 AUE_NULL STD { int mac_syscall( \ _In_z_ const char *policy, \ int call, \ _In_opt_ void *arg); } 395 AUE_GETFSSTAT COMPAT11 { int getfsstat( \ _Out_writes_bytes_opt_(bufsize) \ struct freebsd11_statfs *buf, \ long bufsize, int mode); } 396 AUE_STATFS COMPAT11 { int statfs(_In_z_ char *path, \ _Out_ struct freebsd11_statfs *buf); } 397 AUE_FSTATFS COMPAT11 { int fstatfs(int fd, \ _Out_ struct freebsd11_statfs *buf); } 398 AUE_FHSTATFS COMPAT11 { int fhstatfs( \ _In_ const struct fhandle *u_fhp, \ _Out_ struct freebsd11_statfs *buf); } 399 AUE_NULL UNIMPL nosys 400 AUE_SEMCLOSE NOSTD { int ksem_close(semid_t id); } 401 AUE_SEMPOST NOSTD { int ksem_post(semid_t id); } 402 AUE_SEMWAIT NOSTD { int ksem_wait(semid_t id); } 403 AUE_SEMTRYWAIT NOSTD { int ksem_trywait(semid_t id); } 404 AUE_SEMINIT NOSTD { int ksem_init(_Out_ semid_t *idp, \ unsigned int value); } 405 AUE_SEMOPEN NOSTD { int ksem_open(_Out_ semid_t *idp, \ _In_z_ const char *name, int oflag, \ mode_t mode, unsigned int value); } 406 AUE_SEMUNLINK NOSTD { int ksem_unlink( \ _In_z_ const char *name); } 407 AUE_SEMGETVALUE NOSTD { int ksem_getvalue(semid_t id, \ _Out_ int *val); } 408 AUE_SEMDESTROY NOSTD { int ksem_destroy(semid_t id); } 409 AUE_NULL STD { int __mac_get_pid(pid_t pid, \ _In_ struct mac *mac_p); } 410 AUE_NULL STD { int __mac_get_link( \ _In_z_ const char *path_p, \ _In_ struct mac *mac_p); } 411 AUE_NULL STD { int __mac_set_link( \ _In_z_ const char *path_p, \ _In_ struct mac *mac_p); } 412 AUE_EXTATTR_SET_LINK STD { ssize_t extattr_set_link( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname, \ _In_reads_bytes_(nbytes) void *data, \ size_t nbytes); } 413 AUE_EXTATTR_GET_LINK STD { ssize_t extattr_get_link( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname, \ _Out_writes_bytes_(nbytes) void *data, \ size_t nbytes); } 414 AUE_EXTATTR_DELETE_LINK STD { int extattr_delete_link( \ _In_z_ const char *path, \ int attrnamespace, \ _In_z_ const char *attrname); } 415 AUE_NULL STD { int __mac_execve(_In_z_ char *fname, \ _In_ char **argv, \ _In_ char **envv, \ _In_ struct mac *mac_p); } 416 AUE_SIGACTION STD { int sigaction(int sig, \ _In_opt_ const struct sigaction *act, \ _Out_opt_ struct sigaction *oact); } 417 AUE_SIGRETURN STD { int sigreturn( \ _In_ const struct __ucontext *sigcntxp); } 418 AUE_NULL UNIMPL __xstat 419 AUE_NULL UNIMPL __xfstat 420 AUE_NULL UNIMPL __xlstat 421 AUE_NULL STD { int getcontext( \ _Out_ struct __ucontext *ucp); } 422 AUE_NULL STD { int setcontext( \ _In_ const struct __ucontext *ucp); } 423 AUE_NULL STD { int swapcontext( \ _Out_ struct __ucontext *oucp, \ _In_ const struct __ucontext *ucp); } 424 AUE_SWAPOFF STD { int swapoff(_In_z_ const char *name); } 425 AUE_ACL_GET_LINK STD { int __acl_get_link( \ _In_z_ const char *path, acl_type_t type, \ _Out_ struct acl *aclp); } 426 AUE_ACL_SET_LINK STD { int __acl_set_link( \ _In_z_ const char *path, acl_type_t type, \ _In_ struct acl *aclp); } 427 AUE_ACL_DELETE_LINK STD { int __acl_delete_link( \ _In_z_ const char *path, \ acl_type_t type); } 428 AUE_ACL_CHECK_LINK STD { int __acl_aclcheck_link( \ _In_z_ const char *path, acl_type_t type, \ _In_ struct acl *aclp); } 429 AUE_SIGWAIT STD { int sigwait(_In_ const sigset_t *set, \ _Out_ int *sig); } 430 AUE_THR_CREATE STD { int thr_create(_In_ ucontext_t *ctx, \ _Out_ long *id, int flags); } 431 AUE_THR_EXIT STD { void thr_exit(_Out_opt_ long *state); } 432 AUE_NULL STD { int thr_self(_Out_ long *id); } 433 AUE_THR_KILL STD { int thr_kill(long id, int sig); } 434 AUE_NULL UNIMPL nosys 435 AUE_NULL UNIMPL nosys 436 AUE_JAIL_ATTACH STD { int jail_attach(int jid); } 437 AUE_EXTATTR_LIST_FD STD { ssize_t extattr_list_fd(int fd, \ int attrnamespace, \ _Out_writes_bytes_opt_(nbytes) \ void *data, \ size_t nbytes); } 438 AUE_EXTATTR_LIST_FILE STD { ssize_t extattr_list_file( \ _In_z_ const char *path, \ int attrnamespace, \ _Out_writes_bytes_opt_(nbytes) \ void *data, size_t nbytes); } 439 AUE_EXTATTR_LIST_LINK STD { ssize_t extattr_list_link( \ _In_z_ const char *path, \ int attrnamespace, \ _Out_writes_bytes_opt_(nbytes) \ void *data, size_t nbytes); } -440 AUE_NULL UNIMPL kse_switchin +440 AUE_NULL OBSOL kse_switchin 441 AUE_SEMWAIT NOSTD { int ksem_timedwait(semid_t id, \ _In_opt_ const struct timespec *abstime); } 442 AUE_NULL STD { int thr_suspend( \ _In_opt_ const struct timespec *timeout); } 443 AUE_NULL STD { int thr_wake(long id); } 444 AUE_MODUNLOAD STD { int kldunloadf(int fileid, int flags); } 445 AUE_AUDIT STD { int audit( \ _In_reads_bytes_(length) \ const void *record, \ u_int length); } 446 AUE_AUDITON STD { int auditon(int cmd, \ _In_opt_ void *data, \ u_int length); } 447 AUE_GETAUID STD { int getauid(_Out_ uid_t *auid); } 448 AUE_SETAUID STD { int setauid(_In_ uid_t *auid); } 449 AUE_GETAUDIT STD { int getaudit( \ _Out_ struct auditinfo *auditinfo); } 450 AUE_SETAUDIT STD { int setaudit( \ _In_ struct auditinfo *auditinfo); } 451 AUE_GETAUDIT_ADDR STD { int getaudit_addr( \ _Out_writes_bytes_(length) \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 452 AUE_SETAUDIT_ADDR STD { int setaudit_addr( \ _In_reads_bytes_(length) \ struct auditinfo_addr *auditinfo_addr, \ u_int length); } 453 AUE_AUDITCTL STD { int auditctl(_In_z_ char *path); } 454 AUE_NULL STD { int _umtx_op(_Inout_ void *obj, int op, \ u_long val, _In_ void *uaddr1, \ _In_ void *uaddr2); } 455 AUE_THR_NEW STD { int thr_new(_In_ struct thr_param *param, \ int param_size); } 456 AUE_NULL STD { int sigqueue(pid_t pid, int signum, \ _In_ void *value); } 457 AUE_MQ_OPEN NOSTD { int kmq_open( \ _In_z_ const char *path, \ int flags, \ mode_t mode, \ _In_opt_ const struct mq_attr *attr); } 458 AUE_MQ_SETATTR NOSTD { int kmq_setattr(int mqd, \ _In_opt_ const struct mq_attr *attr, \ _Out_opt_ struct mq_attr *oattr); } 459 AUE_MQ_TIMEDRECEIVE NOSTD { int kmq_timedreceive( \ int mqd, \ _Out_writes_bytes_(msg_len) char *msg_ptr, \ size_t msg_len, \ _Out_opt_ unsigned *msg_prio, \ _In_opt_ \ const struct timespec *abs_timeout); } 460 AUE_MQ_TIMEDSEND NOSTD { int kmq_timedsend(int mqd, \ _In_reads_bytes_(msg_len) \ const char *msg_ptr, size_t msg_len, \ unsigned msg_prio, \ _In_opt_ \ const struct timespec *abs_timeout); } 461 AUE_MQ_NOTIFY NOSTD { int kmq_notify(int mqd, \ _In_opt_ const struct sigevent *sigev); } 462 AUE_MQ_UNLINK NOSTD { int kmq_unlink(_In_z_ const char *path); } 463 AUE_NULL STD { int abort2(_In_z_ const char *why, \ int nargs, \ _In_reads_(nargs) void **args); } 464 AUE_NULL STD { int thr_set_name(long id, \ _In_z_ const char *name); } 465 AUE_AIO_FSYNC STD { int aio_fsync(int op, \ _In_ struct aiocb *aiocbp); } 466 AUE_RTPRIO STD { int rtprio_thread(int function, \ lwpid_t lwpid, \ _Inout_ struct rtprio *rtp); } 467 AUE_NULL UNIMPL nosys 468 AUE_NULL UNIMPL nosys 469 AUE_NULL UNIMPL __getpath_fromfd 470 AUE_NULL UNIMPL __getpath_fromaddr 471 AUE_SCTP_PEELOFF NOSTD { int sctp_peeloff(int sd, \ uint32_t name); } 472 AUE_SCTP_GENERIC_SENDMSG NOSTD { int sctp_generic_sendmsg( \ int sd, \ _In_reads_bytes_(mlen) caddr_t msg, \ int mlen, \ _In_reads_bytes_(tolen) \ caddr_t to, \ __socklen_t tolen, \ _In_opt_ struct sctp_sndrcvinfo *sinfo, \ int flags); } 473 AUE_SCTP_GENERIC_SENDMSG_IOV NOSTD { \ int sctp_generic_sendmsg_iov( \ int sd, \ _In_reads_(iovlen) struct iovec *iov, \ int iovlen, \ _In_reads_bytes_(tolen) \ caddr_t to, \ __socklen_t tolen, \ _In_opt_ struct sctp_sndrcvinfo *sinfo, \ int flags); } 474 AUE_SCTP_GENERIC_RECVMSG NOSTD { int sctp_generic_recvmsg( \ int sd, \ _In_reads_(iovlen) struct iovec *iov, \ int iovlen, \ _Out_writes_bytes_(*fromlenaddr) \ struct sockaddr *from, \ _Out_ __socklen_t *fromlenaddr, \ _In_opt_ struct sctp_sndrcvinfo *sinfo, \ _Out_opt_ int *msg_flags); } 475 AUE_PREAD STD { ssize_t pread(int fd, \ _Out_writes_bytes_(nbyte) void *buf, \ size_t nbyte, off_t offset); } 476 AUE_PWRITE STD { ssize_t pwrite(int fd, \ _In_reads_bytes_(nbyte) \ const void *buf, \ size_t nbyte, off_t offset); } 477 AUE_MMAP STD { caddr_t mmap(_In_ caddr_t addr, size_t len, \ int prot, int flags, int fd, off_t pos); } 478 AUE_LSEEK STD { off_t lseek(int fd, off_t offset, \ int whence); } 479 AUE_TRUNCATE STD { int truncate(_In_z_ char *path, \ off_t length); } 480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); } 481 AUE_THR_KILL2 STD { int thr_kill2(pid_t pid, long id, int sig); } 482 AUE_SHMOPEN STD { int shm_open( \ _In_z_ const char *path, \ int flags, \ mode_t mode); } 483 AUE_SHMUNLINK STD { int shm_unlink(_In_z_ const char *path); } 484 AUE_NULL STD { int cpuset(_Out_ cpusetid_t *setid); } 485 AUE_NULL STD { int cpuset_setid(cpuwhich_t which, id_t id, \ cpusetid_t setid); } 486 AUE_NULL STD { int cpuset_getid(cpulevel_t level, \ cpuwhich_t which, id_t id, \ _Out_ cpusetid_t *setid); } 487 AUE_NULL STD { int cpuset_getaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, \ size_t cpusetsize, \ _Out_ cpuset_t *mask); } 488 AUE_NULL STD { int cpuset_setaffinity(cpulevel_t level, \ cpuwhich_t which, id_t id, \ size_t cpusetsize, \ _Out_ const cpuset_t *mask); } 489 AUE_FACCESSAT STD { int faccessat(int fd, _In_z_ char *path, \ int amode, int flag); } 490 AUE_FCHMODAT STD { int fchmodat(int fd, \ _In_z_ char *path, mode_t mode, \ int flag); } 491 AUE_FCHOWNAT STD { int fchownat(int fd, _In_z_ char *path, \ uid_t uid, gid_t gid, int flag); } 492 AUE_FEXECVE STD { int fexecve(int fd, \ _In_ char **argv, \ _In_ char **envv); } 493 AUE_FSTATAT COMPAT11 { int fstatat(int fd, _In_z_ char *path, \ _Out_ struct freebsd11_stat *buf, \ int flag); } 494 AUE_FUTIMESAT STD { int futimesat(int fd, \ _In_z_ char *path, \ _In_reads_(2) struct timeval *times); } 495 AUE_LINKAT STD { int linkat(int fd1, \ _In_z_ char *path1, \ int fd2, \ _In_z_ char *path2, \ int flag); } 496 AUE_MKDIRAT STD { int mkdirat(int fd, _In_z_ char *path, \ mode_t mode); } 497 AUE_MKFIFOAT STD { int mkfifoat(int fd, \ _In_z_ char *path, mode_t mode); } 498 AUE_MKNODAT COMPAT11 { int mknodat(int fd, _In_z_ char *path, \ mode_t mode, \ uint32_t dev); } ; XXX: see the comment for open 499 AUE_OPENAT_RWTC STD { int openat(int fd, _In_z_ char *path, \ int flag, mode_t mode); } 500 AUE_READLINKAT STD { int readlinkat(int fd, \ _In_z_ char *path, \ _Out_writes_bytes_(bufsize) char *buf, \ size_t bufsize); } 501 AUE_RENAMEAT STD { int renameat(int oldfd, \ _In_z_ char *old, \ int newfd, _In_z_ char *new); } 502 AUE_SYMLINKAT STD { int symlinkat(_In_z_ char *path1, int fd, \ _In_z_ char *path2); } 503 AUE_UNLINKAT STD { int unlinkat(int fd, _In_z_ char *path, \ int flag); } 504 AUE_POSIX_OPENPT STD { int posix_openpt(int flags); } ; 505 is initialised by the kgssapi code, if present. 505 AUE_NULL NOSTD { int gssd_syscall(_In_z_ char *path); } 506 AUE_JAIL_GET STD { int jail_get( \ _In_reads_(iovcnt) struct iovec *iovp, \ unsigned int iovcnt, int flags); } 507 AUE_JAIL_SET STD { int jail_set( \ _In_reads_(iovcnt) struct iovec *iovp, \ unsigned int iovcnt, int flags); } 508 AUE_JAIL_REMOVE STD { int jail_remove(int jid); } 509 AUE_CLOSEFROM STD { int closefrom(int lowfd); } 510 AUE_SEMCTL NOSTD { int __semctl(int semid, int semnum, \ int cmd, _Inout_ union semun *arg); } 511 AUE_MSGCTL NOSTD { int msgctl(int msqid, int cmd, \ _Inout_opt_ struct msqid_ds *buf); } 512 AUE_SHMCTL NOSTD { int shmctl(int shmid, int cmd, \ _Inout_opt_ struct shmid_ds *buf); } 513 AUE_LPATHCONF STD { int lpathconf(_In_z_ char *path, \ int name); } 514 AUE_NULL OBSOL cap_new 515 AUE_CAP_RIGHTS_GET STD { int __cap_rights_get(int version, \ int fd, _Out_ cap_rights_t *rightsp); } 516 AUE_CAP_ENTER STD { int cap_enter(void); } 517 AUE_CAP_GETMODE STD { int cap_getmode(_Out_ u_int *modep); } 518 AUE_PDFORK STD { int pdfork(_Out_ int *fdp, int flags); } 519 AUE_PDKILL STD { int pdkill(int fd, int signum); } 520 AUE_PDGETPID STD { int pdgetpid(int fd, _Out_ pid_t *pidp); } 521 AUE_PDWAIT UNIMPL pdwait4 522 AUE_SELECT STD { int pselect(int nd, \ _Inout_opt_ fd_set *in, \ _Inout_opt_ fd_set *ou, \ _Inout_opt_ fd_set *ex, \ _In_opt_ const struct timespec *ts, \ _In_opt_ const sigset_t *sm); } 523 AUE_GETLOGINCLASS STD { int getloginclass( \ _Out_writes_z_(namelen) char *namebuf, \ size_t namelen); } 524 AUE_SETLOGINCLASS STD { int setloginclass( \ _In_z_ const char *namebuf); } 525 AUE_NULL STD { int rctl_get_racct( \ _In_reads_bytes_(inbuflen) \ const void *inbufp, size_t inbuflen, \ _Out_writes_bytes_(outbuflen) \ void *outbufp, size_t outbuflen); } 526 AUE_NULL STD { int rctl_get_rules( \ _In_reads_bytes_(inbuflen) \ const void *inbufp, size_t inbuflen, \ _Out_writes_bytes_(outbuflen) \ void *outbufp, size_t outbuflen); } 527 AUE_NULL STD { int rctl_get_limits( \ _In_reads_bytes_(inbuflen) \ const void *inbufp, size_t inbuflen, \ _Out_writes_bytes_(outbuflen) \ void *outbufp, size_t outbuflen); } 528 AUE_NULL STD { int rctl_add_rule( \ _In_reads_bytes_(inbuflen) \ const void *inbufp, size_t inbuflen, \ _Out_writes_bytes_(outbuflen) \ void *outbufp, size_t outbuflen); } 529 AUE_NULL STD { int rctl_remove_rule( \ _In_reads_bytes_(inbuflen) \ const void *inbufp, size_t inbuflen, \ _Out_writes_bytes_(outbuflen) \ void *outbufp, size_t outbuflen); } 530 AUE_POSIX_FALLOCATE STD { int posix_fallocate(int fd, \ off_t offset, off_t len); } 531 AUE_POSIX_FADVISE STD { int posix_fadvise(int fd, \ off_t offset, \ off_t len, int advice); } 532 AUE_WAIT6 STD { int wait6(idtype_t idtype, id_t id, \ _Out_opt_ int *status, int options, \ _Out_opt_ struct __wrusage *wrusage, \ _Out_opt_ siginfo_t *info); } 533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \ _In_ cap_rights_t *rightsp); } 534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \ _In_reads_(ncmds) const u_long *cmds, \ size_t ncmds); } 535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \ _Out_writes_(maxcmds) u_long *cmds, \ size_t maxcmds); } 536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \ uint32_t fcntlrights); } 537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \ _Out_ uint32_t *fcntlrightsp); } 538 AUE_BINDAT STD { int bindat(int fd, int s, \ _In_reads_bytes_(namelen) \ caddr_t name, \ int namelen); } 539 AUE_CONNECTAT STD { int connectat(int fd, int s, \ _In_reads_bytes_(namelen) \ caddr_t name, \ int namelen); } 540 AUE_CHFLAGSAT STD { int chflagsat(int fd, \ _In_z_ const char *path, \ u_long flags, int atflag); } 541 AUE_ACCEPT STD { int accept4(int s, \ _Out_writes_bytes_opt_(*anamelen) \ struct sockaddr * __restrict name, \ _Inout_opt_ \ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE STD { int pipe2(_Out_writes_(2) int *fildes, \ int flags); } 543 AUE_AIO_MLOCK STD { int aio_mlock(_In_ struct aiocb *aiocbp); } 544 AUE_PROCCTL STD { int procctl(idtype_t idtype, id_t id, \ int com, _In_opt_ void *data); } 545 AUE_POLL STD { int ppoll( \ _Inout_updates_(nfds) struct pollfd *fds, \ u_int nfds, \ _In_opt_ const struct timespec *ts, \ _In_opt_ const sigset_t *set); } 546 AUE_FUTIMES STD { int futimens(int fd, \ _In_reads_(2) \ struct timespec *times); } 547 AUE_FUTIMESAT STD { int utimensat(int fd, \ _In_z_ char *path, \ _In_reads_(2) \ struct timespec *times, \ int flag); } -548 AUE_NULL UNIMPL numa_getaffinity -549 AUE_NULL UNIMPL numa_setaffinity +548 AUE_NULL OBSOL numa_getaffinity +549 AUE_NULL OBSOL numa_setaffinity 550 AUE_FSYNC STD { int fdatasync(int fd); } 551 AUE_FSTAT STD { int fstat(int fd, _Out_ struct stat *sb); } 552 AUE_FSTATAT STD { int fstatat(int fd, _In_z_ char *path, \ _Out_ struct stat *buf, int flag); } 553 AUE_FHSTAT STD { int fhstat(_In_ const struct fhandle *u_fhp, \ _Out_ struct stat *sb); } 554 AUE_GETDIRENTRIES STD { ssize_t getdirentries(int fd, \ _Out_writes_bytes_(count) char *buf, \ size_t count, _Out_ off_t *basep); } 555 AUE_STATFS STD { int statfs(_In_z_ char *path, \ _Out_ struct statfs *buf); } 556 AUE_FSTATFS STD { int fstatfs(int fd, \ _Out_ struct statfs *buf); } 557 AUE_GETFSSTAT STD { int getfsstat( \ _Out_writes_bytes_opt_(bufsize) \ struct statfs *buf, \ long bufsize, int mode); } 558 AUE_FHSTATFS STD { int fhstatfs( \ _In_ const struct fhandle *u_fhp, \ _Out_ struct statfs *buf); } 559 AUE_MKNODAT STD { int mknodat(int fd, _In_z_ char *path, \ mode_t mode, dev_t dev); } 560 AUE_KEVENT STD { int kevent(int fd, \ _In_reads_opt_(nchanges) \ struct kevent *changelist, \ int nchanges, \ _Out_writes_opt_(nevents) \ struct kevent *eventlist, int nevents, \ _In_opt_ const struct timespec *timeout); } 561 AUE_NULL STD { int cpuset_getdomain(cpulevel_t level, \ cpuwhich_t which, id_t id, \ size_t domainsetsize, \ _Out_writes_bytes_(domainsetsize) \ domainset_t *mask, \ _Out_ int *policy); } 562 AUE_NULL STD { int cpuset_setdomain(cpulevel_t level, \ cpuwhich_t which, id_t id, \ size_t domainsetsize, \ _In_ domainset_t *mask, \ int policy); } 563 AUE_NULL STD { int getrandom( \ _Out_writes_bytes_(buflen) void *buf, \ size_t buflen, unsigned int flags); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master ; vim: syntax=off Index: projects/clang700-import/sys/net/if.c =================================================================== --- projects/clang700-import/sys/net/if.c (revision 339014) +++ projects/clang700-import/sys/net/if.c (revision 339015) @@ -1,4586 +1,4585 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1980, 1986, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * @(#)if.c 8.5 (Berkeley) 1/9/95 * $FreeBSD$ */ #include "opt_inet6.h" #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #include #include #include #include #ifdef INET #include #include #endif /* INET */ #ifdef INET6 #include #include #endif /* INET6 */ #endif /* INET || INET6 */ #include /* * Consumers of struct ifreq such as tcpdump assume no pad between ifr_name * and ifr_ifru when it is used in SIOCGIFCONF. */ _Static_assert(sizeof(((struct ifreq *)0)->ifr_name) == offsetof(struct ifreq, ifr_ifru), "gap between ifr_name and ifr_ifru"); __read_mostly epoch_t net_epoch_preempt; __read_mostly epoch_t net_epoch; #ifdef COMPAT_FREEBSD32 #include #include struct ifreq_buffer32 { uint32_t length; /* (size_t) */ uint32_t buffer; /* (void *) */ }; /* * Interface request structure used for socket * ioctl's. All interface ioctl's must have parameter * definitions which begin with ifr_name. The * remainder may be interface specific. */ struct ifreq32 { char ifr_name[IFNAMSIZ]; /* if name, e.g. "en0" */ union { struct sockaddr ifru_addr; struct sockaddr ifru_dstaddr; struct sockaddr ifru_broadaddr; struct ifreq_buffer32 ifru_buffer; short ifru_flags[2]; short ifru_index; int ifru_jid; int ifru_metric; int ifru_mtu; int ifru_phys; int ifru_media; uint32_t ifru_data; int ifru_cap[2]; u_int ifru_fib; u_char ifru_vlan_pcp; } ifr_ifru; }; CTASSERT(sizeof(struct ifreq) == sizeof(struct ifreq32)); CTASSERT(__offsetof(struct ifreq, ifr_ifru) == __offsetof(struct ifreq32, ifr_ifru)); struct ifgroupreq32 { char ifgr_name[IFNAMSIZ]; u_int ifgr_len; union { char ifgru_group[IFNAMSIZ]; uint32_t ifgru_groups; } ifgr_ifgru; }; struct ifmediareq32 { char ifm_name[IFNAMSIZ]; int ifm_current; int ifm_mask; int ifm_status; int ifm_active; int ifm_count; uint32_t ifm_ulist; /* (int *) */ }; #define SIOCGIFMEDIA32 _IOC_NEWTYPE(SIOCGIFMEDIA, struct ifmediareq32) #define SIOCGIFXMEDIA32 _IOC_NEWTYPE(SIOCGIFXMEDIA, struct ifmediareq32) #define _CASE_IOC_IFGROUPREQ_32(cmd) \ case _IOC_NEWTYPE((cmd), struct ifgroupreq32): #else /* !COMPAT_FREEBSD32 */ #define _CASE_IOC_IFGROUPREQ_32(cmd) #endif /* !COMPAT_FREEBSD32 */ #define CASE_IOC_IFGROUPREQ(cmd) \ _CASE_IOC_IFGROUPREQ_32(cmd) \ case (cmd) union ifreq_union { struct ifreq ifr; #ifdef COMPAT_FREEBSD32 struct ifreq32 ifr32; #endif }; union ifgroupreq_union { struct ifgroupreq ifgr; #ifdef COMPAT_FREEBSD32 struct ifgroupreq32 ifgr32; #endif }; SYSCTL_NODE(_net, PF_LINK, link, CTLFLAG_RW, 0, "Link layers"); SYSCTL_NODE(_net_link, 0, generic, CTLFLAG_RW, 0, "Generic link-management"); SYSCTL_INT(_net_link, OID_AUTO, ifqmaxlen, CTLFLAG_RDTUN, &ifqmaxlen, 0, "max send queue size"); /* Log link state change events */ static int log_link_state_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_link_state_change, CTLFLAG_RW, &log_link_state_change, 0, "log interface link state change events"); /* Log promiscuous mode change events */ static int log_promisc_mode_change = 1; SYSCTL_INT(_net_link, OID_AUTO, log_promisc_mode_change, CTLFLAG_RDTUN, &log_promisc_mode_change, 1, "log promiscuous mode change events"); /* Interface description */ static unsigned int ifdescr_maxlen = 1024; SYSCTL_UINT(_net, OID_AUTO, ifdescr_maxlen, CTLFLAG_RW, &ifdescr_maxlen, 0, "administrative maximum length for interface description"); static MALLOC_DEFINE(M_IFDESCR, "ifdescr", "ifnet descriptions"); /* global sx for non-critical path ifdescr */ static struct sx ifdescr_sx; SX_SYSINIT(ifdescr_sx, &ifdescr_sx, "ifnet descr"); void (*ng_ether_link_state_p)(struct ifnet *ifp, int state); void (*lagg_linkstate_p)(struct ifnet *ifp, int state); /* These are external hooks for CARP. */ void (*carp_linkstate_p)(struct ifnet *ifp); void (*carp_demote_adj_p)(int, char *); int (*carp_master_p)(struct ifaddr *); #if defined(INET) || defined(INET6) int (*carp_forus_p)(struct ifnet *ifp, u_char *dhost); int (*carp_output_p)(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *sa); int (*carp_ioctl_p)(struct ifreq *, u_long, struct thread *); int (*carp_attach_p)(struct ifaddr *, int); void (*carp_detach_p)(struct ifaddr *, bool); #endif #ifdef INET int (*carp_iamatch_p)(struct ifaddr *, uint8_t **); #endif #ifdef INET6 struct ifaddr *(*carp_iamatch6_p)(struct ifnet *ifp, struct in6_addr *taddr6); caddr_t (*carp_macmatch6_p)(struct ifnet *ifp, struct mbuf *m, const struct in6_addr *taddr); #endif struct mbuf *(*tbr_dequeue_ptr)(struct ifaltq *, int) = NULL; /* * XXX: Style; these should be sorted alphabetically, and unprototyped * static functions should be prototyped. Currently they are sorted by * declaration order. */ static void if_attachdomain(void *); static void if_attachdomain1(struct ifnet *); static int ifconf(u_long, caddr_t); static void *if_grow(void); static void if_input_default(struct ifnet *, struct mbuf *); static int if_requestencap_default(struct ifnet *, struct if_encap_req *); static void if_route(struct ifnet *, int flag, int fam); static int if_setflag(struct ifnet *, int, int, int *, int); static int if_transmit(struct ifnet *ifp, struct mbuf *m); static void if_unroute(struct ifnet *, int flag, int fam); static void link_rtrequest(int, struct rtentry *, struct rt_addrinfo *); -static int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); static int if_delmulti_locked(struct ifnet *, struct ifmultiaddr *, int); static void do_link_state_change(void *, int); static int if_getgroup(struct ifgroupreq *, struct ifnet *); static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif #ifdef INET6 /* * XXX: declare here to avoid to include many inet6 related files.. * should be more generalized? */ extern void nd6_setmtu(struct ifnet *); #endif /* ipsec helper hooks */ VNET_DEFINE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DEFINE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); VNET_DEFINE(int, if_index); int ifqmaxlen = IFQ_MAXLEN; VNET_DEFINE(struct ifnethead, ifnet); /* depend on static init XXX */ VNET_DEFINE(struct ifgrouphead, ifg_head); VNET_DEFINE_STATIC(int, if_indexlim) = 8; /* Table of ifnet by index. */ VNET_DEFINE(struct ifnet **, ifindex_table); #define V_if_indexlim VNET(if_indexlim) #define V_ifindex_table VNET(ifindex_table) /* * The global network interface list (V_ifnet) and related state (such as * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and * an rwlock. Either may be acquired shared to stablize the list, but both * must be acquired writable to modify the list. This model allows us to * both stablize the interface list during interrupt thread processing, but * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for * use, so will use this pointer value to indicate reservation. */ #define IFNET_HOLD (void *)(uintptr_t)(-1) static if_com_alloc_t *if_com_alloc[256]; static if_com_free_t *if_com_free[256]; static MALLOC_DEFINE(M_IFNET, "ifnet", "interface internals"); MALLOC_DEFINE(M_IFADDR, "ifaddr", "interface address"); MALLOC_DEFINE(M_IFMADDR, "ether_multi", "link-level multicast address"); struct ifnet * ifnet_byindex_locked(u_short idx) { if (idx > V_if_index) return (NULL); if (V_ifindex_table[idx] == IFNET_HOLD) return (NULL); return (V_ifindex_table[idx]); } struct ifnet * ifnet_byindex(u_short idx) { struct ifnet *ifp; ifp = ifnet_byindex_locked(idx); return (ifp); } struct ifnet * ifnet_byindex_ref(u_short idx) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp == NULL || (ifp->if_flags & IFF_DYING)) { IFNET_RUNLOCK_NOSLEEP(); return (NULL); } if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } /* * Allocate an ifindex array entry; return 0 on success or an error on * failure. */ static u_short ifindex_alloc(void **old) { u_short idx; IFNET_WLOCK_ASSERT(); /* * Try to find an empty slot below V_if_index. If we fail, take the * next slot. */ for (idx = 1; idx <= V_if_index; idx++) { if (V_ifindex_table[idx] == NULL) break; } /* Catch if_index overflow. */ if (idx >= V_if_indexlim) { *old = if_grow(); return (USHRT_MAX); } if (idx > V_if_index) V_if_index = idx; return (idx); } static void ifindex_free_locked(u_short idx) { IFNET_WLOCK_ASSERT(); V_ifindex_table[idx] = NULL; while (V_if_index > 0 && V_ifindex_table[V_if_index] == NULL) V_if_index--; } static void ifindex_free(u_short idx) { IFNET_WLOCK(); ifindex_free_locked(idx); IFNET_WUNLOCK(); } static void ifnet_setbyindex(u_short idx, struct ifnet *ifp) { V_ifindex_table[idx] = ifp; } struct ifaddr * ifaddr_byindex(u_short idx) { struct ifnet *ifp; struct ifaddr *ifa = NULL; IFNET_RLOCK_NOSLEEP(); ifp = ifnet_byindex_locked(idx); if (ifp != NULL && (ifa = ifp->if_addr) != NULL) ifa_ref(ifa); IFNET_RUNLOCK_NOSLEEP(); return (ifa); } /* * Network interface utility routines. * * Routines with ifa_ifwith* names take sockaddr *'s as * parameters. */ static void vnet_if_init(const void *unused __unused) { void *old; CK_STAILQ_INIT(&V_ifnet); CK_STAILQ_INIT(&V_ifg_head); IFNET_WLOCK(); old = if_grow(); /* create initial table */ IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); vnet_if_clone_init(); } VNET_SYSINIT(vnet_if_init, SI_SUB_INIT_IF, SI_ORDER_SECOND, vnet_if_init, NULL); #ifdef VIMAGE static void vnet_if_uninit(const void *unused __unused) { VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifnet), ("%s:%d tailq &V_ifnet=%p " "not empty", __func__, __LINE__, &V_ifnet)); VNET_ASSERT(CK_STAILQ_EMPTY(&V_ifg_head), ("%s:%d tailq &V_ifg_head=%p " "not empty", __func__, __LINE__, &V_ifg_head)); free((caddr_t)V_ifindex_table, M_IFNET); } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { if (ifp->if_home_vnet != ifp->if_vnet) if_vmove(ifp, ifp->if_home_vnet); } } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); #endif static void * if_grow(void) { int oldlim; u_int n; struct ifnet **e; void *old; old = NULL; IFNET_WLOCK_ASSERT(); oldlim = V_if_indexlim; IFNET_WUNLOCK(); n = (oldlim << 1) * sizeof(*e); e = malloc(n, M_IFNET, M_WAITOK | M_ZERO); IFNET_WLOCK(); if (V_if_indexlim != oldlim) { free(e, M_IFNET); return (NULL); } if (V_ifindex_table != NULL) { memcpy((caddr_t)e, (caddr_t)V_ifindex_table, n/2); old = V_ifindex_table; } V_if_indexlim <<= 1; V_ifindex_table = e; return (old); } /* * Allocate a struct ifnet and an index for an interface. A layer 2 * common structure will also be allocated if an allocation routine is * registered for the passed type. */ struct ifnet * if_alloc(u_char type) { struct ifnet *ifp; u_short idx; void *old; ifp = malloc(sizeof(struct ifnet), M_IFNET, M_WAITOK|M_ZERO); restart: IFNET_WLOCK(); idx = ifindex_alloc(&old); if (__predict_false(idx == USHRT_MAX)) { IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); goto restart; } ifnet_setbyindex(idx, IFNET_HOLD); IFNET_WUNLOCK(); ifp->if_index = idx; ifp->if_type = type; ifp->if_alloctype = type; #ifdef VIMAGE ifp->if_vnet = curvnet; #endif if (if_com_alloc[type] != NULL) { ifp->if_l2com = if_com_alloc[type](type, ifp); if (ifp->if_l2com == NULL) { free(ifp, M_IFNET); ifindex_free(idx); return (NULL); } } IF_ADDR_LOCK_INIT(ifp); TASK_INIT(&ifp->if_linktask, 0, do_link_state_change, ifp); ifp->if_afdata_initialized = 0; IF_AFDATA_LOCK_INIT(ifp); CK_STAILQ_INIT(&ifp->if_addrhead); CK_STAILQ_INIT(&ifp->if_multiaddrs); CK_STAILQ_INIT(&ifp->if_groups); #ifdef MAC mac_ifnet_init(ifp); #endif ifq_init(&ifp->if_snd, ifp); refcount_init(&ifp->if_refcount, 1); /* Index reference. */ for (int i = 0; i < IFCOUNTERS; i++) ifp->if_counters[i] = counter_u64_alloc(M_WAITOK); ifp->if_get_counter = if_get_counter_default; ifp->if_pcp = IFNET_PCP_NONE; ifnet_setbyindex(ifp->if_index, ifp); return (ifp); } /* * Do the actual work of freeing a struct ifnet, and layer 2 common * structure. This call is made when the last reference to an * interface is released. */ static void if_free_internal(struct ifnet *ifp) { KASSERT((ifp->if_flags & IFF_DYING), ("if_free_internal: interface not dying")); if (if_com_free[ifp->if_alloctype] != NULL) if_com_free[ifp->if_alloctype](ifp->if_l2com, ifp->if_alloctype); #ifdef MAC mac_ifnet_destroy(ifp); #endif /* MAC */ if (ifp->if_description != NULL) free(ifp->if_description, M_IFDESCR); IF_AFDATA_DESTROY(ifp); IF_ADDR_LOCK_DESTROY(ifp); ifq_delete(&ifp->if_snd); for (int i = 0; i < IFCOUNTERS; i++) counter_u64_free(ifp->if_counters[i]); free(ifp, M_IFNET); } static void if_destroy(epoch_context_t ctx) { struct ifnet *ifp; ifp = __containerof(ctx, struct ifnet, if_epoch_ctx); if_free_internal(ifp); } /* * Deregister an interface and free the associated storage. */ void if_free(struct ifnet *ifp) { ifp->if_flags |= IFF_DYING; /* XXX: Locking */ CURVNET_SET_QUIET(ifp->if_vnet); IFNET_WLOCK(); KASSERT(ifp == ifnet_byindex_locked(ifp->if_index), ("%s: freeing unallocated ifnet", ifp->if_xname)); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); if (refcount_release(&ifp->if_refcount)) epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy); CURVNET_RESTORE(); } /* * Interfaces to keep an ifnet type-stable despite the possibility of the * driver calling if_free(). If there are additional references, we defer * freeing the underlying data structure. */ void if_ref(struct ifnet *ifp) { /* We don't assert the ifnet list lock here, but arguably should. */ refcount_acquire(&ifp->if_refcount); } void if_rele(struct ifnet *ifp) { if (!refcount_release(&ifp->if_refcount)) return; epoch_call(net_epoch_preempt, &ifp->if_epoch_ctx, if_destroy); } void ifq_init(struct ifaltq *ifq, struct ifnet *ifp) { mtx_init(&ifq->ifq_mtx, ifp->if_xname, "if send queue", MTX_DEF); if (ifq->ifq_maxlen == 0) ifq->ifq_maxlen = ifqmaxlen; ifq->altq_type = 0; ifq->altq_disc = NULL; ifq->altq_flags &= ALTQF_CANTCHANGE; ifq->altq_tbr = NULL; ifq->altq_ifp = ifp; } void ifq_delete(struct ifaltq *ifq) { mtx_destroy(&ifq->ifq_mtx); } /* * Perform generic interface initialization tasks and attach the interface * to the list of "active" interfaces. If vmove flag is set on entry * to if_attach_internal(), perform only a limited subset of initialization * tasks, given that we are moving from one vnet to another an ifnet which * has already been fully initialized. * * Note that if_detach_internal() removes group membership unconditionally * even when vmove flag is set, and if_attach_internal() adds only IFG_ALL. * Thus, when if_vmove() is applied to a cloned interface, group membership * is lost while a cloned one always joins a group whose name is * ifc->ifc_name. To recover this after if_detach_internal() and * if_attach_internal(), the cloner should be specified to * if_attach_internal() via ifc. If it is non-NULL, if_attach_internal() * attempts to join a group whose name is ifc->ifc_name. * * XXX: * - The decision to return void and thus require this function to * succeed is questionable. * - We should probably do more sanity checking. For instance we don't * do anything to insure if_xname is unique or non-empty. */ void if_attach(struct ifnet *ifp) { if_attach_internal(ifp, 0, NULL); } /* * Compute the least common TSO limit. */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *pmax) { /* * 1) If there is no limit currently, take the limit from * the network adapter. * * 2) If the network adapter has a limit below the current * limit, apply it. */ if (pmax->tsomaxbytes == 0 || (ifp->if_hw_tsomax != 0 && ifp->if_hw_tsomax < pmax->tsomaxbytes)) { pmax->tsomaxbytes = ifp->if_hw_tsomax; } if (pmax->tsomaxsegcount == 0 || (ifp->if_hw_tsomaxsegcount != 0 && ifp->if_hw_tsomaxsegcount < pmax->tsomaxsegcount)) { pmax->tsomaxsegcount = ifp->if_hw_tsomaxsegcount; } if (pmax->tsomaxsegsize == 0 || (ifp->if_hw_tsomaxsegsize != 0 && ifp->if_hw_tsomaxsegsize < pmax->tsomaxsegsize)) { pmax->tsomaxsegsize = ifp->if_hw_tsomaxsegsize; } } /* * Update TSO limit of a network adapter. * * Returns zero if no change. Else non-zero. */ int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *pmax) { int retval = 0; if (ifp->if_hw_tsomax != pmax->tsomaxbytes) { ifp->if_hw_tsomax = pmax->tsomaxbytes; retval++; } if (ifp->if_hw_tsomaxsegsize != pmax->tsomaxsegsize) { ifp->if_hw_tsomaxsegsize = pmax->tsomaxsegsize; retval++; } if (ifp->if_hw_tsomaxsegcount != pmax->tsomaxsegcount) { ifp->if_hw_tsomaxsegcount = pmax->tsomaxsegcount; retval++; } return (retval); } static void if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) { unsigned socksize, ifasize; int namelen, masklen; struct sockaddr_dl *sdl; struct ifaddr *ifa; if (ifp->if_index == 0 || ifp != ifnet_byindex(ifp->if_index)) panic ("%s: BUG: if_attach called without if_alloc'd input()\n", ifp->if_xname); #ifdef VIMAGE ifp->if_vnet = curvnet; if (ifp->if_home_vnet == NULL) ifp->if_home_vnet = curvnet; #endif if_addgroup(ifp, IFG_ALL); /* Restore group membership for cloned interfaces. */ if (vmove && ifc != NULL) if_clone_addgroup(ifp, ifc); getmicrotime(&ifp->if_lastchange); ifp->if_epoch = time_uptime; KASSERT((ifp->if_transmit == NULL && ifp->if_qflush == NULL) || (ifp->if_transmit != NULL && ifp->if_qflush != NULL), ("transmit and qflush must both either be set or both be NULL")); if (ifp->if_transmit == NULL) { ifp->if_transmit = if_transmit; ifp->if_qflush = if_qflush; } if (ifp->if_input == NULL) ifp->if_input = if_input_default; if (ifp->if_requestencap == NULL) ifp->if_requestencap = if_requestencap_default; if (!vmove) { #ifdef MAC mac_ifnet_create(ifp); #endif /* * Create a Link Level name for this device. */ namelen = strlen(ifp->if_xname); /* * Always save enough space for any possiable name so we * can do a rename in place later. */ masklen = offsetof(struct sockaddr_dl, sdl_data[0]) + IFNAMSIZ; socksize = masklen + ifp->if_addrlen; if (socksize < sizeof(*sdl)) socksize = sizeof(*sdl); socksize = roundup2(socksize, sizeof(long)); ifasize = sizeof(*ifa) + 2 * socksize; ifa = ifa_alloc(ifasize, M_WAITOK); sdl = (struct sockaddr_dl *)(ifa + 1); sdl->sdl_len = socksize; sdl->sdl_family = AF_LINK; bcopy(ifp->if_xname, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl->sdl_index = ifp->if_index; sdl->sdl_type = ifp->if_type; ifp->if_addr = ifa; ifa->ifa_ifp = ifp; ifa->ifa_rtrequest = link_rtrequest; ifa->ifa_addr = (struct sockaddr *)sdl; sdl = (struct sockaddr_dl *)(socksize + (caddr_t)sdl); ifa->ifa_netmask = (struct sockaddr *)sdl; sdl->sdl_len = masklen; while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; CK_STAILQ_INSERT_HEAD(&ifp->if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp->if_broadcastaddr = NULL; if (ifp->if_type == IFT_ETHER) { ifp->if_hw_addr = malloc(ifp->if_addrlen, M_IFADDR, M_WAITOK | M_ZERO); } #if defined(INET) || defined(INET6) /* Use defaults for TSO, if nothing is set */ if (ifp->if_hw_tsomax == 0 && ifp->if_hw_tsomaxsegcount == 0 && ifp->if_hw_tsomaxsegsize == 0) { /* * The TSO defaults needs to be such that an * NFS mbuf list of 35 mbufs totalling just * below 64K works and that a chain of mbufs * can be defragged into at most 32 segments: */ ifp->if_hw_tsomax = min(IP_MAXPACKET, (32 * MCLBYTES) - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); ifp->if_hw_tsomaxsegcount = 35; ifp->if_hw_tsomaxsegsize = 2048; /* 2K */ /* XXX some drivers set IFCAP_TSO after ethernet attach */ if (ifp->if_capabilities & IFCAP_TSO) { if_printf(ifp, "Using defaults for TSO: %u/%u/%u\n", ifp->if_hw_tsomax, ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize); } } #endif } #ifdef VIMAGE else { /* * Update the interface index in the link layer address * of the interface. */ for (ifa = ifp->if_addr; ifa != NULL; ifa = CK_STAILQ_NEXT(ifa, ifa_link)) { if (ifa->ifa_addr->sa_family == AF_LINK) { sdl = (struct sockaddr_dl *)ifa->ifa_addr; sdl->sdl_index = ifp->if_index; } } } #endif IFNET_WLOCK(); CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); #ifdef VIMAGE curvnet->vnet_ifcnt++; #endif IFNET_WUNLOCK(); if (domain_init_status >= 2) if_attachdomain1(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "ATTACH", NULL); /* Announce the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); } static void if_epochalloc(void *dummy __unused) { net_epoch_preempt = epoch_alloc(EPOCH_PREEMPT); net_epoch = epoch_alloc(0); } SYSINIT(ifepochalloc, SI_SUB_TASKQ + 1, SI_ORDER_ANY, if_epochalloc, NULL); static void if_attachdomain(void *dummy) { struct ifnet *ifp; CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) if_attachdomain1(ifp); } SYSINIT(domainifattach, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_SECOND, if_attachdomain, NULL); static void if_attachdomain1(struct ifnet *ifp) { struct domain *dp; /* * Since dp->dom_ifattach calls malloc() with M_WAITOK, we * cannot lock ifp->if_afdata initialization, entirely. */ IF_AFDATA_LOCK(ifp); if (ifp->if_afdata_initialized >= domain_init_status) { IF_AFDATA_UNLOCK(ifp); log(LOG_WARNING, "%s called more than once on %s\n", __func__, ifp->if_xname); return; } ifp->if_afdata_initialized = domain_init_status; IF_AFDATA_UNLOCK(ifp); /* address family dependent data region */ bzero(ifp->if_afdata, sizeof(ifp->if_afdata)); for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_ifattach) ifp->if_afdata[dp->dom_family] = (*dp->dom_ifattach)(ifp); } } /* * Remove any unicast or broadcast network addresses from an interface. */ void if_purgeaddrs(struct ifnet *ifp) { struct ifaddr *ifa, *next; NET_EPOCH_ENTER(); CK_STAILQ_FOREACH_SAFE(ifa, &ifp->if_addrhead, ifa_link, next) { if (ifa->ifa_addr->sa_family == AF_LINK) continue; #ifdef INET /* XXX: Ugly!! ad hoc just for INET */ if (ifa->ifa_addr->sa_family == AF_INET) { struct ifaliasreq ifr; bzero(&ifr, sizeof(ifr)); ifr.ifra_addr = *ifa->ifa_addr; if (ifa->ifa_dstaddr) ifr.ifra_broadaddr = *ifa->ifa_dstaddr; if (in_control(NULL, SIOCDIFADDR, (caddr_t)&ifr, ifp, NULL) == 0) continue; } #endif /* INET */ #ifdef INET6 if (ifa->ifa_addr->sa_family == AF_INET6) { in6_purgeaddr(ifa); /* ifp_addrhead is already updated */ continue; } #endif /* INET6 */ IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } NET_EPOCH_EXIT(); } /* * Remove any multicast network addresses from an interface when an ifnet * is going away. */ static void if_purgemaddrs(struct ifnet *ifp) { struct ifmultiaddr *ifma; IF_ADDR_WLOCK(ifp); while (!CK_STAILQ_EMPTY(&ifp->if_multiaddrs)) { ifma = CK_STAILQ_FIRST(&ifp->if_multiaddrs); CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); if_delmulti_locked(ifp, ifma, 1); } IF_ADDR_WUNLOCK(ifp); } /* * Detach an interface, removing it from the list of "active" interfaces. * If vmove flag is set on entry to if_detach_internal(), perform only a * limited subset of cleanup tasks, given that we are moving an ifnet from * one vnet to another, where it must be fully operational. * * XXXRW: There are some significant questions about event ordering, and * how to prevent things from starting to use the interface during detach. */ void if_detach(struct ifnet *ifp) { CURVNET_SET_QUIET(ifp->if_vnet); if_detach_internal(ifp, 0, NULL); CURVNET_RESTORE(); } /* * The vmove flag, if set, indicates that we are called from a callpath * that is moving an interface to a different vnet instance. * * The shutdown flag, if set, indicates that we are called in the * process of shutting down a vnet instance. Currently only the * vnet_if_return SYSUNINIT function sets it. Note: we can be called * on a vnet instance shutdown without this flag being set, e.g., when * the cloned interfaces are destoyed as first thing of teardown. */ static int if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) { struct ifaddr *ifa; int i; struct domain *dp; struct ifnet *iter; int found = 0; #ifdef VIMAGE int shutdown; shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif IFNET_WLOCK(); CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) if (iter == ifp) { CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); found = 1; break; } IFNET_WUNLOCK(); if (!found) { /* * While we would want to panic here, we cannot * guarantee that the interface is indeed still on * the list given we don't hold locks all the way. */ return (ENOENT); #if 0 if (vmove) panic("%s: ifp=%p not on the ifnet tailq %p", __func__, ifp, &V_ifnet); else return; /* XXX this should panic as well? */ #endif } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ #ifdef VIMAGE curvnet->vnet_ifcnt--; #endif epoch_wait_preempt(net_epoch_preempt); /* * In any case (destroy or vmove) detach us from the groups * and remove/wait for pending events on the taskq. * XXX-BZ in theory an interface could still enqueue a taskq change? */ if_delgroups(ifp); taskqueue_drain(taskqueue_swi, &ifp->if_linktask); /* * Check if this is a cloned interface or not. Must do even if * shutting down as a if_vmove_reclaim() would move the ifp and * the if_clone_addgroup() will have a corrupted string overwise * from a gibberish pointer. */ if (vmove && ifcp != NULL) *ifcp = if_clone_findifc(ifp); if_down(ifp); #ifdef VIMAGE /* * On VNET shutdown abort here as the stack teardown will do all * the work top-down for us. */ if (shutdown) { /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same * abort as if we did not find the ifnet anymore. * if_detach() calls us in void context and does not care * about an early abort notification, so life is splendid :) */ goto finish_vnet_shutdown; } #endif /* * At this point we are not tearing down a VNET and are either * going to destroy or vmove the interface and have to cleanup * accordingly. */ /* * Remove routes and flush queues. */ #ifdef ALTQ if (ALTQ_IS_ENABLED(&ifp->if_snd)) altq_disable(&ifp->if_snd); if (ALTQ_IS_ATTACHED(&ifp->if_snd)) altq_detach(&ifp->if_snd); #endif if_purgeaddrs(ifp); #ifdef INET in_ifdetach(ifp); #endif #ifdef INET6 /* * Remove all IPv6 kernel structs related to ifp. This should be done * before removing routing entries below, since IPv6 interface direct * routes are expected to be removed by the IPv6-specific kernel API. * Otherwise, the kernel will detect some inconsistency and bark it. */ in6_ifdetach(ifp); #endif if_purgemaddrs(ifp); /* Announce that the interface is gone. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, "DETACH", NULL); if (!vmove) { /* * Prevent further calls into the device driver via ifnet. */ if_dead(ifp); /* * Remove link ifaddr pointer and maybe decrement if_index. * Clean up all addresses. */ free(ifp->if_hw_addr, M_IFADDR); ifp->if_hw_addr = NULL; ifp->if_addr = NULL; /* We can now free link ifaddr. */ IF_ADDR_WLOCK(ifp); if (!CK_STAILQ_EMPTY(&ifp->if_addrhead)) { ifa = CK_STAILQ_FIRST(&ifp->if_addrhead); CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link); IF_ADDR_WUNLOCK(ifp); ifa_free(ifa); } else IF_ADDR_WUNLOCK(ifp); } rt_flushifroutes(ifp); #ifdef VIMAGE finish_vnet_shutdown: #endif /* * We cannot hold the lock over dom_ifdetach calls as they might * sleep, for example trying to drain a callout, thus open up the * theoretical race with re-attaching. */ IF_AFDATA_LOCK(ifp); i = ifp->if_afdata_initialized; ifp->if_afdata_initialized = 0; IF_AFDATA_UNLOCK(ifp); for (dp = domains; i > 0 && dp; dp = dp->dom_next) { if (dp->dom_ifdetach && ifp->if_afdata[dp->dom_family]) { (*dp->dom_ifdetach)(ifp, ifp->if_afdata[dp->dom_family]); ifp->if_afdata[dp->dom_family] = NULL; } } return (0); } #ifdef VIMAGE /* * if_vmove() performs a limited version of if_detach() in current * vnet and if_attach()es the ifnet to the vnet specified as 2nd arg. * An attempt is made to shrink if_index in current vnet, find an * unused if_index in target vnet and calls if_grow() if necessary, * and finally find an unused if_xname for the target vnet. */ static void if_vmove(struct ifnet *ifp, struct vnet *new_vnet) { struct if_clone *ifc; u_int bif_dlt, bif_hdrlen; void *old; int rc; /* * if_detach_internal() will call the eventhandler to notify * interface departure. That will detach if_bpf. We need to * safe the dlt and hdrlen so we can re-attach it later. */ bpf_get_bp_params(ifp->if_bpf, &bif_dlt, &bif_hdrlen); /* * Detach from current vnet, but preserve LLADDR info, do not * mark as dead etc. so that the ifnet can be reattached later. * If we cannot find it, we lost the race to someone else. */ rc = if_detach_internal(ifp, 1, &ifc); if (rc != 0) return; /* * Unlink the ifnet from ifindex_table[] in current vnet, and shrink * the if_index for that vnet if possible. * * NOTE: IFNET_WLOCK/IFNET_WUNLOCK() are assumed to be unvirtualized, * or we'd lock on one vnet and unlock on another. */ IFNET_WLOCK(); ifindex_free_locked(ifp->if_index); IFNET_WUNLOCK(); /* * Perform interface-specific reassignment tasks, if provided by * the driver. */ if (ifp->if_reassign != NULL) ifp->if_reassign(ifp, new_vnet, NULL); /* * Switch to the context of the target vnet. */ CURVNET_SET_QUIET(new_vnet); restart: IFNET_WLOCK(); ifp->if_index = ifindex_alloc(&old); if (__predict_false(ifp->if_index == USHRT_MAX)) { IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(old, M_IFNET); goto restart; } ifnet_setbyindex(ifp->if_index, ifp); IFNET_WUNLOCK(); if_attach_internal(ifp, 1, ifc); if (ifp->if_bpf == NULL) bpfattach(ifp, bif_dlt, bif_hdrlen); CURVNET_RESTORE(); } /* * Move an ifnet to or from another child prison/vnet, specified by the jail id. */ static int if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) { struct prison *pr; struct ifnet *difp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Do not try to move the iface from and to the same prison. */ if (pr->pr_vnet == ifp->if_vnet) { prison_free(pr); return (EEXIST); } /* Make sure the named iface does not exists in the dst. prison/vnet. */ /* XXX Lock interfaces to avoid races. */ CURVNET_SET_QUIET(pr->pr_vnet); difp = ifunit(ifname); if (difp != NULL) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } CURVNET_RESTORE(); /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } static int if_vmove_reclaim(struct thread *td, char *ifname, int jid) { struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; int shutdown; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); pr = prison_find_child(td->td_ucred->cr_prison, jid); sx_sunlock(&allprison_lock); if (pr == NULL) return (ENXIO); prison_hold_locked(pr); mtx_unlock(&pr->pr_mtx); /* Make sure the named iface exists in the source prison/vnet. */ CURVNET_SET(pr->pr_vnet); ifp = ifunit(ifname); /* XXX Lock to avoid races. */ if (ifp == NULL) { CURVNET_RESTORE(); prison_free(pr); return (ENXIO); } /* Do not try to move the iface from and to the same prison. */ vnet_dst = TD_TO_VNET(td); if (vnet_dst == ifp->if_vnet) { CURVNET_RESTORE(); prison_free(pr); return (EEXIST); } /* Make sure the VNET is stable. */ shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); prison_free(pr); return (EBUSY); } /* Get interface back from child jail/vnet. */ if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); /* Report the new if_xname back to the userland. */ sprintf(ifname, "%s", ifp->if_xname); prison_free(pr); return (0); } #endif /* VIMAGE */ /* * Add a group to an interface */ int if_addgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_group *ifg = NULL; struct ifg_member *ifgm; int new = 0; if (groupname[0] && groupname[strlen(groupname) - 1] >= '0' && groupname[strlen(groupname) - 1] <= '9') return (EINVAL); IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) { IFNET_WUNLOCK(); return (EEXIST); } if ((ifgl = (struct ifg_list *)malloc(sizeof(struct ifg_list), M_TEMP, M_NOWAIT)) == NULL) { IFNET_WUNLOCK(); return (ENOMEM); } if ((ifgm = (struct ifg_member *)malloc(sizeof(struct ifg_member), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, groupname)) break; if (ifg == NULL) { if ((ifg = (struct ifg_group *)malloc(sizeof(struct ifg_group), M_TEMP, M_NOWAIT)) == NULL) { free(ifgl, M_TEMP); free(ifgm, M_TEMP); IFNET_WUNLOCK(); return (ENOMEM); } strlcpy(ifg->ifg_group, groupname, sizeof(ifg->ifg_group)); ifg->ifg_refcnt = 0; CK_STAILQ_INIT(&ifg->ifg_members); CK_STAILQ_INSERT_TAIL(&V_ifg_head, ifg, ifg_next); new = 1; } ifg->ifg_refcnt++; ifgl->ifgl_group = ifg; ifgm->ifgm_ifp = ifp; IF_ADDR_WLOCK(ifp); CK_STAILQ_INSERT_TAIL(&ifg->ifg_members, ifgm, ifgm_next); CK_STAILQ_INSERT_TAIL(&ifp->if_groups, ifgl, ifgl_next); IF_ADDR_WUNLOCK(ifp); IFNET_WUNLOCK(); if (new) EVENTHANDLER_INVOKE(group_attach_event, ifg); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove a group from an interface */ int if_delgroup(struct ifnet *ifp, const char *groupname) { struct ifg_list *ifgl; struct ifg_member *ifgm; int freeifgl; IFNET_WLOCK(); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) if (!strcmp(ifgl->ifgl_group->ifg_group, groupname)) break; if (ifgl == NULL) { IFNET_WUNLOCK(); return (ENOENT); } freeifgl = 0; IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); IF_ADDR_WUNLOCK(ifp); CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next); if (--ifgl->ifgl_group->ifg_refcnt == 0) { CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next); freeifgl = 1; } IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); if (freeifgl) { EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } free(ifgm, M_TEMP); free(ifgl, M_TEMP); EVENTHANDLER_INVOKE(group_change_event, groupname); return (0); } /* * Remove an interface from all groups */ static void if_delgroups(struct ifnet *ifp) { struct ifg_list *ifgl; struct ifg_member *ifgm; char groupname[IFNAMSIZ]; int ifglfree; IFNET_WLOCK(); while (!CK_STAILQ_EMPTY(&ifp->if_groups)) { ifgl = CK_STAILQ_FIRST(&ifp->if_groups); strlcpy(groupname, ifgl->ifgl_group->ifg_group, IFNAMSIZ); IF_ADDR_WLOCK(ifp); CK_STAILQ_REMOVE(&ifp->if_groups, ifgl, ifg_list, ifgl_next); IF_ADDR_WUNLOCK(ifp); CK_STAILQ_FOREACH(ifgm, &ifgl->ifgl_group->ifg_members, ifgm_next) if (ifgm->ifgm_ifp == ifp) break; if (ifgm != NULL) CK_STAILQ_REMOVE(&ifgl->ifgl_group->ifg_members, ifgm, ifg_member, ifgm_next); ifglfree = 0; if (--ifgl->ifgl_group->ifg_refcnt == 0) { CK_STAILQ_REMOVE(&V_ifg_head, ifgl->ifgl_group, ifg_group, ifg_next); ifglfree = 1; } IFNET_WUNLOCK(); epoch_wait_preempt(net_epoch_preempt); free(ifgm, M_TEMP); if (ifglfree) { EVENTHANDLER_INVOKE(group_detach_event, ifgl->ifgl_group); free(ifgl->ifgl_group, M_TEMP); } EVENTHANDLER_INVOKE(group_change_event, groupname); IFNET_WLOCK(); } IFNET_WUNLOCK(); } static char * ifgr_group_get(void *ifgrp) { union ifgroupreq_union *ifgrup; ifgrup = ifgrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (&ifgrup->ifgr32.ifgr_ifgru.ifgru_group[0]); #endif return (&ifgrup->ifgr.ifgr_ifgru.ifgru_group[0]); } static struct ifg_req * ifgr_groups_get(void *ifgrp) { union ifgroupreq_union *ifgrup; ifgrup = ifgrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((struct ifg_req *)(uintptr_t) ifgrup->ifgr32.ifgr_ifgru.ifgru_groups); #endif return (ifgrup->ifgr.ifgr_ifgru.ifgru_groups); } /* * Stores all groups from an interface in memory pointed to by ifgr. */ static int if_getgroup(struct ifgroupreq *ifgr, struct ifnet *ifp) { int len, error; struct ifg_list *ifgl; struct ifg_req ifgrq, *ifgp; if (ifgr->ifgr_len == 0) { IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) ifgr->ifgr_len += sizeof(struct ifg_req); IF_ADDR_RUNLOCK(ifp); return (0); } len = ifgr->ifgr_len; ifgp = ifgr_groups_get(ifgr); /* XXX: wire */ IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifgl, &ifp->if_groups, ifgl_next) { if (len < sizeof(ifgrq)) { IF_ADDR_RUNLOCK(ifp); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_group, ifgl->ifgl_group->ifg_group, sizeof(ifgrq.ifgrq_group)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IF_ADDR_RUNLOCK(ifp); return (error); } len -= sizeof(ifgrq); ifgp++; } IF_ADDR_RUNLOCK(ifp); return (0); } /* * Stores all members of a group in memory pointed to by igfr */ static int if_getgroupmembers(struct ifgroupreq *ifgr) { struct ifg_group *ifg; struct ifg_member *ifgm; struct ifg_req ifgrq, *ifgp; int len, error; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifg, &V_ifg_head, ifg_next) if (!strcmp(ifg->ifg_group, ifgr->ifgr_name)) break; if (ifg == NULL) { IFNET_RUNLOCK(); return (ENOENT); } if (ifgr->ifgr_len == 0) { CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) ifgr->ifgr_len += sizeof(ifgrq); IFNET_RUNLOCK(); return (0); } len = ifgr->ifgr_len; ifgp = ifgr_groups_get(ifgr); CK_STAILQ_FOREACH(ifgm, &ifg->ifg_members, ifgm_next) { if (len < sizeof(ifgrq)) { IFNET_RUNLOCK(); return (EINVAL); } bzero(&ifgrq, sizeof ifgrq); strlcpy(ifgrq.ifgrq_member, ifgm->ifgm_ifp->if_xname, sizeof(ifgrq.ifgrq_member)); if ((error = copyout(&ifgrq, ifgp, sizeof(struct ifg_req)))) { IFNET_RUNLOCK(); return (error); } len -= sizeof(ifgrq); ifgp++; } IFNET_RUNLOCK(); return (0); } /* * Return counter values from counter(9)s stored in ifnet. */ uint64_t if_get_counter_default(struct ifnet *ifp, ift_counter cnt) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); return (counter_u64_fetch(ifp->if_counters[cnt])); } /* * Increase an ifnet counter. Usually used for counters shared * between the stack and a driver, but function supports them all. */ void if_inc_counter(struct ifnet *ifp, ift_counter cnt, int64_t inc) { KASSERT(cnt < IFCOUNTERS, ("%s: invalid cnt %d", __func__, cnt)); counter_u64_add(ifp->if_counters[cnt], inc); } /* * Copy data from ifnet to userland API structure if_data. */ void if_data_copy(struct ifnet *ifp, struct if_data *ifd) { ifd->ifi_type = ifp->if_type; ifd->ifi_physical = 0; ifd->ifi_addrlen = ifp->if_addrlen; ifd->ifi_hdrlen = ifp->if_hdrlen; ifd->ifi_link_state = ifp->if_link_state; ifd->ifi_vhid = 0; ifd->ifi_datalen = sizeof(struct if_data); ifd->ifi_mtu = ifp->if_mtu; ifd->ifi_metric = ifp->if_metric; ifd->ifi_baudrate = ifp->if_baudrate; ifd->ifi_hwassist = ifp->if_hwassist; ifd->ifi_epoch = ifp->if_epoch; ifd->ifi_lastchange = ifp->if_lastchange; ifd->ifi_ipackets = ifp->if_get_counter(ifp, IFCOUNTER_IPACKETS); ifd->ifi_ierrors = ifp->if_get_counter(ifp, IFCOUNTER_IERRORS); ifd->ifi_opackets = ifp->if_get_counter(ifp, IFCOUNTER_OPACKETS); ifd->ifi_oerrors = ifp->if_get_counter(ifp, IFCOUNTER_OERRORS); ifd->ifi_collisions = ifp->if_get_counter(ifp, IFCOUNTER_COLLISIONS); ifd->ifi_ibytes = ifp->if_get_counter(ifp, IFCOUNTER_IBYTES); ifd->ifi_obytes = ifp->if_get_counter(ifp, IFCOUNTER_OBYTES); ifd->ifi_imcasts = ifp->if_get_counter(ifp, IFCOUNTER_IMCASTS); ifd->ifi_omcasts = ifp->if_get_counter(ifp, IFCOUNTER_OMCASTS); ifd->ifi_iqdrops = ifp->if_get_counter(ifp, IFCOUNTER_IQDROPS); ifd->ifi_oqdrops = ifp->if_get_counter(ifp, IFCOUNTER_OQDROPS); ifd->ifi_noproto = ifp->if_get_counter(ifp, IFCOUNTER_NOPROTO); } /* * Wrapper functions for struct ifnet address list locking macros. These are * used by kernel modules to avoid encoding programming interface or binary * interface assumptions that may be violated when kernel-internal locking * approaches change. */ void if_addr_rlock(struct ifnet *ifp) { MPASS(*(uint64_t *)&ifp->if_addr_et == 0); epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et); } void if_addr_runlock(struct ifnet *ifp) { epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et); #ifdef INVARIANTS bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker)); #endif } void if_maddr_rlock(if_t ifp) { MPASS(*(uint64_t *)&ifp->if_maddr_et == 0); epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et); } void if_maddr_runlock(if_t ifp) { epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et); #ifdef INVARIANTS bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker)); #endif } /* * Initialization, destruction and refcounting functions for ifaddrs. */ struct ifaddr * ifa_alloc(size_t size, int flags) { struct ifaddr *ifa; KASSERT(size >= sizeof(struct ifaddr), ("%s: invalid size %zu", __func__, size)); ifa = malloc(size, M_IFADDR, M_ZERO | flags); if (ifa == NULL) return (NULL); if ((ifa->ifa_opackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ipackets = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_obytes = counter_u64_alloc(flags)) == NULL) goto fail; if ((ifa->ifa_ibytes = counter_u64_alloc(flags)) == NULL) goto fail; refcount_init(&ifa->ifa_refcnt, 1); return (ifa); fail: /* free(NULL) is okay */ counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); return (NULL); } void ifa_ref(struct ifaddr *ifa) { refcount_acquire(&ifa->ifa_refcnt); } static void ifa_destroy(epoch_context_t ctx) { struct ifaddr *ifa; ifa = __containerof(ctx, struct ifaddr, ifa_epoch_ctx); counter_u64_free(ifa->ifa_opackets); counter_u64_free(ifa->ifa_ipackets); counter_u64_free(ifa->ifa_obytes); counter_u64_free(ifa->ifa_ibytes); free(ifa, M_IFADDR); } void ifa_free(struct ifaddr *ifa) { if (refcount_release(&ifa->ifa_refcnt)) epoch_call(net_epoch_preempt, &ifa->ifa_epoch_ctx, ifa_destroy); } static int ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, struct sockaddr *ia) { int error; struct rt_addrinfo info; struct sockaddr_dl null_sdl; struct ifnet *ifp; ifp = ifa->ifa_ifp; bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; info.rti_info[RTAX_DST] = ia; info.rti_info[RTAX_GATEWAY] = (struct sockaddr *)&null_sdl; link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); if (error != 0 && !(cmd == RTM_ADD && error == EEXIST) && !(cmd == RTM_DELETE && error == ENOENT)) if_printf(ifp, "%s failed: %d\n", otype, error); return (error); } int ifa_add_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_ADD, "insertion", ifa, ia)); } int ifa_del_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_DELETE, "deletion", ifa, ia)); } int ifa_switch_loopback_route(struct ifaddr *ifa, struct sockaddr *ia) { return (ifa_maintain_loopback_route(RTM_CHANGE, "switch", ifa, ia)); } /* * XXX: Because sockaddr_dl has deeper structure than the sockaddr * structs used to represent other address families, it is necessary * to perform a different comparison. */ #define sa_dl_equal(a1, a2) \ ((((const struct sockaddr_dl *)(a1))->sdl_len == \ ((const struct sockaddr_dl *)(a2))->sdl_len) && \ (bcmp(CLLADDR((const struct sockaddr_dl *)(a1)), \ CLLADDR((const struct sockaddr_dl *)(a2)), \ ((const struct sockaddr_dl *)(a1))->sdl_alen) == 0)) /* * Locate an interface based on a complete address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithaddr(const struct sockaddr *addr) { struct ifnet *ifp; struct ifaddr *ifa; MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (sa_equal(addr, ifa->ifa_addr)) { goto done; } /* IP6 doesn't have broadcast */ if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } int ifa_ifwithaddr_check(const struct sockaddr *addr) { int rc; NET_EPOCH_ENTER(); rc = (ifa_ifwithaddr(addr) != NULL); NET_EPOCH_EXIT(); return (rc); } /* * Locate an interface based on the broadcast address. */ /* ARGSUSED */ struct ifaddr * ifa_ifwithbroadaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if ((ifp->if_flags & IFF_BROADCAST) && ifa->ifa_broadaddr && ifa->ifa_broadaddr->sa_len != 0 && sa_equal(ifa->ifa_broadaddr, addr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Locate the point to point interface with a given destination address. */ /*ARGSUSED*/ struct ifaddr * ifa_ifwithdstaddr(const struct sockaddr *addr, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((ifp->if_flags & IFF_POINTOPOINT) == 0) continue; if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != addr->sa_family) continue; if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } } ifa = NULL; done: return (ifa); } /* * Find an interface on a specific network. If many, choice * is most specific found. */ struct ifaddr * ifa_ifwithnet(const struct sockaddr *addr, int ignore_ptp, int fibnum) { struct ifnet *ifp; struct ifaddr *ifa; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; const char *addr_data = addr->sa_data, *cplim; MPASS(in_epoch(net_epoch_preempt)); /* * AF_LINK addresses can be looked up directly by their index number, * so do that if we can. */ if (af == AF_LINK) { const struct sockaddr_dl *sdl = (const struct sockaddr_dl *)addr; if (sdl->sdl_index && sdl->sdl_index <= V_if_index) return (ifaddr_byindex(sdl->sdl_index)); } /* * Scan though each interface, looking for ones that have addresses * in this address family and the requested fib. Maintain a reference * on ifa_maybe once we find one, as we release the IF_ADDR_RLOCK() that * kept it stable when we move onto the next interface. */ CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if ((fibnum != RT_ALL_FIBS) && (ifp->if_fib != fibnum)) continue; CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { const char *cp, *cp2, *cp3; if (ifa->ifa_addr->sa_family != af) next: continue; if (af == AF_INET && ifp->if_flags & IFF_POINTOPOINT && !ignore_ptp) { /* * This is a bit broken as it doesn't * take into account that the remote end may * be a single node in the network we are * looking for. * The trouble is that we don't know the * netmask for the remote end. */ if (ifa->ifa_dstaddr != NULL && sa_equal(addr, ifa->ifa_dstaddr)) { goto done; } } else { /* * Scan all the bits in the ifa's address. * If a bit dissagrees with what we are * looking for, mask it with the netmask * to see if it really matters. * (A byte at a time) */ if (ifa->ifa_netmask == 0) continue; cp = addr_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; while (cp3 < cplim) if ((*cp++ ^ *cp2++) & *cp3++) goto next; /* next address! */ /* * If the netmask of what we just found * is more specific than what we had before * (if we had one), or if the virtual status * of new prefix is better than of the old one, * then remember the new one before continuing * to search for an even better one. */ if (ifa_maybe == NULL || ifa_preferred(ifa_maybe, ifa) || rn_refines((caddr_t)ifa->ifa_netmask, (caddr_t)ifa_maybe->ifa_netmask)) { ifa_maybe = ifa; } } } } ifa = ifa_maybe; ifa_maybe = NULL; done: return (ifa); } /* * Find an interface address specific to an interface best matching * a given address. */ struct ifaddr * ifaof_ifpforaddr(const struct sockaddr *addr, struct ifnet *ifp) { struct ifaddr *ifa; const char *cp, *cp2, *cp3; char *cplim; struct ifaddr *ifa_maybe = NULL; u_int af = addr->sa_family; if (af >= AF_MAX) return (NULL); MPASS(in_epoch(net_epoch_preempt)); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != af) continue; if (ifa_maybe == NULL) ifa_maybe = ifa; if (ifa->ifa_netmask == 0) { if (sa_equal(addr, ifa->ifa_addr) || (ifa->ifa_dstaddr && sa_equal(addr, ifa->ifa_dstaddr))) goto done; continue; } if (ifp->if_flags & IFF_POINTOPOINT) { if (sa_equal(addr, ifa->ifa_dstaddr)) goto done; } else { cp = addr->sa_data; cp2 = ifa->ifa_addr->sa_data; cp3 = ifa->ifa_netmask->sa_data; cplim = ifa->ifa_netmask->sa_len + (char *)ifa->ifa_netmask; for (; cp3 < cplim; cp3++) if ((*cp++ ^ *cp2++) & *cp3) break; if (cp3 == cplim) goto done; } } ifa = ifa_maybe; done: return (ifa); } /* * See whether new ifa is better than current one: * 1) A non-virtual one is preferred over virtual. * 2) A virtual in master state preferred over any other state. * * Used in several address selecting functions. */ int ifa_preferred(struct ifaddr *cur, struct ifaddr *next) { return (cur->ifa_carp && (!next->ifa_carp || ((*carp_master_p)(next) && !(*carp_master_p)(cur)))); } #include /* * Default action when installing a route with a Link Level gateway. * Lookup an appropriate real ifa to point to. * This should be moved to /sys/net/link.c eventually. */ static void link_rtrequest(int cmd, struct rtentry *rt, struct rt_addrinfo *info) { struct ifaddr *ifa, *oifa; struct sockaddr *dst; struct ifnet *ifp; if (cmd != RTM_ADD || ((ifa = rt->rt_ifa) == NULL) || ((ifp = ifa->ifa_ifp) == NULL) || ((dst = rt_key(rt)) == NULL)) return; NET_EPOCH_ENTER(); ifa = ifaof_ifpforaddr(dst, ifp); if (ifa) { oifa = rt->rt_ifa; if (oifa != ifa) { ifa_free(oifa); ifa_ref(ifa); } rt->rt_ifa = ifa; if (ifa->ifa_rtrequest && ifa->ifa_rtrequest != link_rtrequest) ifa->ifa_rtrequest(cmd, rt, info); } NET_EPOCH_EXIT(); } struct sockaddr_dl * link_alloc_sdl(size_t size, int flags) { return (malloc(size, M_TEMP, flags)); } void link_free_sdl(struct sockaddr *sa) { free(sa, M_TEMP); } /* * Fills in given sdl with interface basic info. * Returns pointer to filled sdl. */ struct sockaddr_dl * link_init_sdl(struct ifnet *ifp, struct sockaddr *paddr, u_char iftype) { struct sockaddr_dl *sdl; sdl = (struct sockaddr_dl *)paddr; memset(sdl, 0, sizeof(struct sockaddr_dl)); sdl->sdl_len = sizeof(struct sockaddr_dl); sdl->sdl_family = AF_LINK; sdl->sdl_index = ifp->if_index; sdl->sdl_type = iftype; return (sdl); } /* * Mark an interface down and notify protocols of * the transition. */ static void if_unroute(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_unroute: flag != IFF_UP")); ifp->if_flags &= ~flag; getmicrotime(&ifp->if_lastchange); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFDOWN, ifa->ifa_addr); ifp->if_qflush(ifp); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); } /* * Mark an interface up and notify protocols of * the transition. */ static void if_route(struct ifnet *ifp, int flag, int fam) { struct ifaddr *ifa; KASSERT(flag == IFF_UP, ("if_route: flag != IFF_UP")); ifp->if_flags |= flag; getmicrotime(&ifp->if_lastchange); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) if (fam == PF_UNSPEC || (fam == ifa->ifa_addr->sa_family)) pfctlinput(PRC_IFUP, ifa->ifa_addr); if (ifp->if_carp) (*carp_linkstate_p)(ifp); rt_ifmsg(ifp); #ifdef INET6 in6_if_up(ifp); #endif } void (*vlan_link_state_p)(struct ifnet *); /* XXX: private from if_vlan */ void (*vlan_trunk_cap_p)(struct ifnet *); /* XXX: private from if_vlan */ struct ifnet *(*vlan_trunkdev_p)(struct ifnet *); struct ifnet *(*vlan_devat_p)(struct ifnet *, uint16_t); int (*vlan_tag_p)(struct ifnet *, uint16_t *); int (*vlan_pcp_p)(struct ifnet *, uint16_t *); int (*vlan_setcookie_p)(struct ifnet *, void *); void *(*vlan_cookie_p)(struct ifnet *); /* * Handle a change in the interface link state. To avoid LORs * between driver lock and upper layer locks, as well as possible * recursions, we post event to taskqueue, and all job * is done in static do_link_state_change(). */ void if_link_state_change(struct ifnet *ifp, int link_state) { /* Return if state hasn't changed. */ if (ifp->if_link_state == link_state) return; ifp->if_link_state = link_state; taskqueue_enqueue(taskqueue_swi, &ifp->if_linktask); } static void do_link_state_change(void *arg, int pending) { struct ifnet *ifp = (struct ifnet *)arg; int link_state = ifp->if_link_state; CURVNET_SET(ifp->if_vnet); /* Notify that the link state has changed. */ rt_ifmsg(ifp); if (ifp->if_vlantrunk != NULL) (*vlan_link_state_p)(ifp); if ((ifp->if_type == IFT_ETHER || ifp->if_type == IFT_L2VLAN) && ifp->if_l2com != NULL) (*ng_ether_link_state_p)(ifp, link_state); if (ifp->if_carp) (*carp_linkstate_p)(ifp); if (ifp->if_bridge) ifp->if_bridge_linkstate(ifp); if (ifp->if_lagg) (*lagg_linkstate_p)(ifp, link_state); if (IS_DEFAULT_VNET(curvnet)) devctl_notify("IFNET", ifp->if_xname, (link_state == LINK_STATE_UP) ? "LINK_UP" : "LINK_DOWN", NULL); if (pending > 1) if_printf(ifp, "%d link states coalesced\n", pending); if (log_link_state_change) if_printf(ifp, "link state changed to %s\n", (link_state == LINK_STATE_UP) ? "UP" : "DOWN" ); EVENTHANDLER_INVOKE(ifnet_link_event, ifp, link_state); CURVNET_RESTORE(); } /* * Mark an interface down and notify protocols of * the transition. */ void if_down(struct ifnet *ifp) { EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_DOWN); if_unroute(ifp, IFF_UP, AF_UNSPEC); } /* * Mark an interface up and notify protocols of * the transition. */ void if_up(struct ifnet *ifp) { if_route(ifp, IFF_UP, AF_UNSPEC); EVENTHANDLER_INVOKE(ifnet_event, ifp, IFNET_EVENT_UP); } /* * Flush an interface queue. */ void if_qflush(struct ifnet *ifp) { struct mbuf *m, *n; struct ifaltq *ifq; ifq = &ifp->if_snd; IFQ_LOCK(ifq); #ifdef ALTQ if (ALTQ_IS_ENABLED(ifq)) ALTQ_PURGE(ifq); #endif n = ifq->ifq_head; while ((m = n) != NULL) { n = m->m_nextpkt; m_freem(m); } ifq->ifq_head = 0; ifq->ifq_tail = 0; ifq->ifq_len = 0; IFQ_UNLOCK(ifq); } /* * Map interface name to interface structure pointer, with or without * returning a reference. */ struct ifnet * ifunit_ref(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0 && !(ifp->if_flags & IFF_DYING)) break; } if (ifp != NULL) if_ref(ifp); IFNET_RUNLOCK_NOSLEEP(); return (ifp); } struct ifnet * ifunit(const char *name) { struct ifnet *ifp; IFNET_RLOCK_NOSLEEP(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { if (strncmp(name, ifp->if_xname, IFNAMSIZ) == 0) break; } IFNET_RUNLOCK_NOSLEEP(); return (ifp); } static void * ifr_buffer_get_buffer(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.buffer); } static void ifr_buffer_set_buffer_null(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.buffer = 0; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.buffer = NULL; } static size_t ifr_buffer_get_length(void *data) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return (ifrup->ifr32.ifr_ifru.ifru_buffer.length); #endif return (ifrup->ifr.ifr_ifru.ifru_buffer.length); } static void ifr_buffer_set_length(void *data, size_t len) { union ifreq_union *ifrup; ifrup = data; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) ifrup->ifr32.ifr_ifru.ifru_buffer.length = len; else #endif ifrup->ifr.ifr_ifru.ifru_buffer.length = len; } void * ifr_data_get_ptr(void *ifrp) { union ifreq_union *ifrup; ifrup = ifrp; #ifdef COMPAT_FREEBSD32 if (SV_CURPROC_FLAG(SV_ILP32)) return ((void *)(uintptr_t) ifrup->ifr32.ifr_ifru.ifru_data); #endif return (ifrup->ifr.ifr_ifru.ifru_data); } /* * Hardware specific interface ioctls. */ -static int +int ifhwioctl(u_long cmd, struct ifnet *ifp, caddr_t data, struct thread *td) { struct ifreq *ifr; int error = 0, do_ifup = 0; int new_flags, temp_flags; size_t namelen, onamelen; size_t descrlen; char *descrbuf, *odescrbuf; char new_name[IFNAMSIZ]; struct ifaddr *ifa; struct sockaddr_dl *sdl; ifr = (struct ifreq *)data; switch (cmd) { case SIOCGIFINDEX: ifr->ifr_index = ifp->if_index; break; case SIOCGIFFLAGS: temp_flags = ifp->if_flags | ifp->if_drv_flags; ifr->ifr_flags = temp_flags & 0xffff; ifr->ifr_flagshigh = temp_flags >> 16; break; case SIOCGIFCAP: ifr->ifr_reqcap = ifp->if_capabilities; ifr->ifr_curcap = ifp->if_capenable; break; #ifdef MAC case SIOCGIFMAC: error = mac_ifnet_ioctl_get(td->td_ucred, ifr, ifp); break; #endif case SIOCGIFMETRIC: ifr->ifr_metric = ifp->if_metric; break; case SIOCGIFMTU: ifr->ifr_mtu = ifp->if_mtu; break; case SIOCGIFPHYS: /* XXXGL: did this ever worked? */ ifr->ifr_phys = 0; break; case SIOCGIFDESCR: error = 0; sx_slock(&ifdescr_sx); if (ifp->if_description == NULL) error = ENOMSG; else { /* space for terminating nul */ descrlen = strlen(ifp->if_description) + 1; if (ifr_buffer_get_length(ifr) < descrlen) ifr_buffer_set_buffer_null(ifr); else error = copyout(ifp->if_description, ifr_buffer_get_buffer(ifr), descrlen); ifr_buffer_set_length(ifr, descrlen); } sx_sunlock(&ifdescr_sx); break; case SIOCSIFDESCR: error = priv_check(td, PRIV_NET_SETIFDESCR); if (error) return (error); /* * Copy only (length-1) bytes to make sure that * if_description is always nul terminated. The * length parameter is supposed to count the * terminating nul in. */ if (ifr_buffer_get_length(ifr) > ifdescr_maxlen) return (ENAMETOOLONG); else if (ifr_buffer_get_length(ifr) == 0) descrbuf = NULL; else { descrbuf = malloc(ifr_buffer_get_length(ifr), M_IFDESCR, M_WAITOK | M_ZERO); error = copyin(ifr_buffer_get_buffer(ifr), descrbuf, ifr_buffer_get_length(ifr) - 1); if (error) { free(descrbuf, M_IFDESCR); break; } } sx_xlock(&ifdescr_sx); odescrbuf = ifp->if_description; ifp->if_description = descrbuf; sx_xunlock(&ifdescr_sx); getmicrotime(&ifp->if_lastchange); free(odescrbuf, M_IFDESCR); break; case SIOCGIFFIB: ifr->ifr_fib = ifp->if_fib; break; case SIOCSIFFIB: error = priv_check(td, PRIV_NET_SETIFFIB); if (error) return (error); if (ifr->ifr_fib >= rt_numfibs) return (EINVAL); ifp->if_fib = ifr->ifr_fib; break; case SIOCSIFFLAGS: error = priv_check(td, PRIV_NET_SETIFFLAGS); if (error) return (error); /* * Currently, no driver owned flags pass the IFF_CANTCHANGE * check, so we don't need special handling here yet. */ new_flags = (ifr->ifr_flags & 0xffff) | (ifr->ifr_flagshigh << 16); if (ifp->if_flags & IFF_UP && (new_flags & IFF_UP) == 0) { if_down(ifp); } else if (new_flags & IFF_UP && (ifp->if_flags & IFF_UP) == 0) { do_ifup = 1; } /* See if permanently promiscuous mode bit is about to flip */ if ((ifp->if_flags ^ new_flags) & IFF_PPROMISC) { if (new_flags & IFF_PPROMISC) ifp->if_flags |= IFF_PROMISC; else if (ifp->if_pcount == 0) ifp->if_flags &= ~IFF_PROMISC; if (log_promisc_mode_change) if_printf(ifp, "permanently promiscuous mode %s\n", ((new_flags & IFF_PPROMISC) ? "enabled" : "disabled")); } ifp->if_flags = (ifp->if_flags & IFF_CANTCHANGE) | (new_flags &~ IFF_CANTCHANGE); if (ifp->if_ioctl) { (void) (*ifp->if_ioctl)(ifp, cmd, data); } if (do_ifup) if_up(ifp); getmicrotime(&ifp->if_lastchange); break; case SIOCSIFCAP: error = priv_check(td, PRIV_NET_SETIFCAP); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); if (ifr->ifr_reqcap & ~ifp->if_capabilities) return (EINVAL); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; #ifdef MAC case SIOCSIFMAC: error = mac_ifnet_ioctl_set(td->td_ucred, ifr, ifp); break; #endif case SIOCSIFNAME: error = priv_check(td, PRIV_NET_SETIFNAME); if (error) return (error); error = copyinstr(ifr_data_get_ptr(ifr), new_name, IFNAMSIZ, NULL); if (error != 0) return (error); if (new_name[0] == '\0') return (EINVAL); if (new_name[IFNAMSIZ-1] != '\0') { new_name[IFNAMSIZ-1] = '\0'; if (strlen(new_name) == IFNAMSIZ-1) return (EINVAL); } if (ifunit(new_name) != NULL) return (EEXIST); /* * XXX: Locking. Nothing else seems to lock if_flags, * and there are numerous other races with the * ifunit() checks not being atomic with namespace * changes (renames, vmoves, if_attach, etc). */ ifp->if_flags |= IFF_RENAMING; /* Announce the departure of the interface. */ rt_ifannouncemsg(ifp, IFAN_DEPARTURE); EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); if_printf(ifp, "changing name to '%s'\n", new_name); IF_ADDR_WLOCK(ifp); strlcpy(ifp->if_xname, new_name, sizeof(ifp->if_xname)); ifa = ifp->if_addr; sdl = (struct sockaddr_dl *)ifa->ifa_addr; namelen = strlen(new_name); onamelen = sdl->sdl_nlen; /* * Move the address if needed. This is safe because we * allocate space for a name of length IFNAMSIZ when we * create this in if_attach(). */ if (namelen != onamelen) { bcopy(sdl->sdl_data + onamelen, sdl->sdl_data + namelen, sdl->sdl_alen); } bcopy(new_name, sdl->sdl_data, namelen); sdl->sdl_nlen = namelen; sdl = (struct sockaddr_dl *)ifa->ifa_netmask; bzero(sdl->sdl_data, onamelen); while (namelen != 0) sdl->sdl_data[--namelen] = 0xff; IF_ADDR_WUNLOCK(ifp); EVENTHANDLER_INVOKE(ifnet_arrival_event, ifp); /* Announce the return of the interface. */ rt_ifannouncemsg(ifp, IFAN_ARRIVAL); ifp->if_flags &= ~IFF_RENAMING; break; #ifdef VIMAGE case SIOCSIFVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error) return (error); error = if_vmove_loan(td, ifp, ifr->ifr_name, ifr->ifr_jid); break; #endif case SIOCSIFMETRIC: error = priv_check(td, PRIV_NET_SETIFMETRIC); if (error) return (error); ifp->if_metric = ifr->ifr_metric; getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYS: error = priv_check(td, PRIV_NET_SETIFPHYS); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFMTU: { u_long oldmtu = ifp->if_mtu; error = priv_check(td, PRIV_NET_SETIFMTU); if (error) return (error); if (ifr->ifr_mtu < IF_MINMTU || ifr->ifr_mtu > IF_MAXMTU) return (EINVAL); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) { getmicrotime(&ifp->if_lastchange); rt_ifmsg(ifp); #ifdef INET NETDUMP_REINIT(ifp); #endif } /* * If the link MTU changed, do network layer specific procedure. */ if (ifp->if_mtu != oldmtu) { #ifdef INET6 nd6_setmtu(ifp); #endif rt_updatemtu(ifp); } break; } case SIOCADDMULTI: case SIOCDELMULTI: if (cmd == SIOCADDMULTI) error = priv_check(td, PRIV_NET_ADDMULTI); else error = priv_check(td, PRIV_NET_DELMULTI); if (error) return (error); /* Don't allow group membership on non-multicast interfaces. */ if ((ifp->if_flags & IFF_MULTICAST) == 0) return (EOPNOTSUPP); /* Don't let users screw up protocols' entries. */ if (ifr->ifr_addr.sa_family != AF_LINK) return (EINVAL); if (cmd == SIOCADDMULTI) { struct ifmultiaddr *ifma; /* * Userland is only permitted to join groups once * via the if_addmulti() KPI, because it cannot hold * struct ifmultiaddr * between calls. It may also * lose a race while we check if the membership * already exists. */ IF_ADDR_RLOCK(ifp); ifma = if_findmulti(ifp, &ifr->ifr_addr); IF_ADDR_RUNLOCK(ifp); if (ifma != NULL) error = EADDRINUSE; else error = if_addmulti(ifp, &ifr->ifr_addr, &ifma); } else { error = if_delmulti(ifp, &ifr->ifr_addr); } if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCSIFPHYADDR: case SIOCDIFPHYADDR: #ifdef INET6 case SIOCSIFPHYADDR_IN6: #endif case SIOCSIFMEDIA: case SIOCSIFGENERIC: error = priv_check(td, PRIV_NET_HWIOCTL); if (error) return (error); if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); if (error == 0) getmicrotime(&ifp->if_lastchange); break; case SIOCGIFSTATUS: case SIOCGIFPSRCADDR: case SIOCGIFPDSTADDR: case SIOCGIFMEDIA: case SIOCGIFXMEDIA: case SIOCGIFGENERIC: case SIOCGIFRSSKEY: case SIOCGIFRSSHASH: if (ifp->if_ioctl == NULL) return (EOPNOTSUPP); error = (*ifp->if_ioctl)(ifp, cmd, data); break; case SIOCSIFLLADDR: error = priv_check(td, PRIV_NET_SETLLADDR); if (error) return (error); error = if_setlladdr(ifp, ifr->ifr_addr.sa_data, ifr->ifr_addr.sa_len); break; case SIOCGHWADDR: error = if_gethwaddr(ifp, ifr); break; CASE_IOC_IFGROUPREQ(SIOCAIFGROUP): error = priv_check(td, PRIV_NET_ADDIFGROUP); if (error) return (error); if ((error = if_addgroup(ifp, ifgr_group_get((struct ifgroupreq *)data)))) return (error); break; CASE_IOC_IFGROUPREQ(SIOCGIFGROUP): if ((error = if_getgroup((struct ifgroupreq *)data, ifp))) return (error); break; CASE_IOC_IFGROUPREQ(SIOCDIFGROUP): error = priv_check(td, PRIV_NET_DELIFGROUP); if (error) return (error); if ((error = if_delgroup(ifp, ifgr_group_get((struct ifgroupreq *)data)))) return (error); break; default: error = ENOIOCTL; break; } return (error); } #ifdef COMPAT_FREEBSD32 struct ifconf32 { int32_t ifc_len; union { uint32_t ifcu_buf; uint32_t ifcu_req; } ifc_ifcu; }; #define SIOCGIFCONF32 _IOWR('i', 36, struct ifconf32) #endif #ifdef COMPAT_FREEBSD32 static void ifmr_init(struct ifmediareq *ifmr, caddr_t data) { struct ifmediareq32 *ifmr32; ifmr32 = (struct ifmediareq32 *)data; memcpy(ifmr->ifm_name, ifmr32->ifm_name, sizeof(ifmr->ifm_name)); ifmr->ifm_current = ifmr32->ifm_current; ifmr->ifm_mask = ifmr32->ifm_mask; ifmr->ifm_status = ifmr32->ifm_status; ifmr->ifm_active = ifmr32->ifm_active; ifmr->ifm_count = ifmr32->ifm_count; ifmr->ifm_ulist = (int *)(uintptr_t)ifmr32->ifm_ulist; } static void ifmr_update(const struct ifmediareq *ifmr, caddr_t data) { struct ifmediareq32 *ifmr32; ifmr32 = (struct ifmediareq32 *)data; ifmr32->ifm_current = ifmr->ifm_current; ifmr32->ifm_mask = ifmr->ifm_mask; ifmr32->ifm_status = ifmr->ifm_status; ifmr32->ifm_active = ifmr->ifm_active; ifmr32->ifm_count = ifmr->ifm_count; } #endif /* * Interface ioctls. */ int ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) { #ifdef COMPAT_FREEBSD32 caddr_t saved_data = NULL; struct ifmediareq ifmr; struct ifmediareq *ifmrp; #endif struct ifnet *ifp; struct ifreq *ifr; int error; int oif_flags; #ifdef VIMAGE int shutdown; #endif CURVNET_SET(so->so_vnet); #ifdef VIMAGE /* Make sure the VNET is stable. */ shutdown = (so->so_vnet->vnet_state > SI_SUB_VNET && so->so_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; if (shutdown) { CURVNET_RESTORE(); return (EBUSY); } #endif switch (cmd) { case SIOCGIFCONF: error = ifconf(cmd, data); CURVNET_RESTORE(); return (error); #ifdef COMPAT_FREEBSD32 case SIOCGIFCONF32: { struct ifconf32 *ifc32; struct ifconf ifc; ifc32 = (struct ifconf32 *)data; ifc.ifc_len = ifc32->ifc_len; ifc.ifc_buf = PTRIN(ifc32->ifc_buf); error = ifconf(SIOCGIFCONF, (void *)&ifc); CURVNET_RESTORE(); if (error == 0) ifc32->ifc_len = ifc.ifc_len; return (error); } #endif } #ifdef COMPAT_FREEBSD32 ifmrp = NULL; switch (cmd) { case SIOCGIFMEDIA32: case SIOCGIFXMEDIA32: ifmrp = &ifmr; ifmr_init(ifmrp, data); cmd = _IOC_NEWTYPE(cmd, struct ifmediareq); saved_data = data; data = (caddr_t)ifmrp; } #endif ifr = (struct ifreq *)data; switch (cmd) { #ifdef VIMAGE case SIOCSIFRVNET: error = priv_check(td, PRIV_NET_SETIFVNET); if (error == 0) error = if_vmove_reclaim(td, ifr->ifr_name, ifr->ifr_jid); goto out_noref; #endif case SIOCIFCREATE: case SIOCIFCREATE2: error = priv_check(td, PRIV_NET_IFCREATE); if (error == 0) error = if_clone_create(ifr->ifr_name, sizeof(ifr->ifr_name), cmd == SIOCIFCREATE2 ? ifr_data_get_ptr(ifr) : NULL); goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); if (error == 0) error = if_clone_destroy(ifr->ifr_name); goto out_noref; case SIOCIFGCLONERS: error = if_clone_list((struct if_clonereq *)data); goto out_noref; CASE_IOC_IFGROUPREQ(SIOCGIFGMEMB): error = if_getgroupmembers((struct ifgroupreq *)data); goto out_noref; #if defined(INET) || defined(INET6) case SIOCSVH: case SIOCGVH: if (carp_ioctl_p == NULL) error = EPROTONOSUPPORT; else error = (*carp_ioctl_p)(ifr, cmd, td); goto out_noref; #endif } ifp = ifunit_ref(ifr->ifr_name); if (ifp == NULL) { error = ENXIO; goto out_noref; } error = ifhwioctl(cmd, ifp, data, td); if (error != ENOIOCTL) goto out_ref; oif_flags = ifp->if_flags; if (so->so_proto == NULL) { error = EOPNOTSUPP; goto out_ref; } /* * Pass the request on to the socket control method, and if the * latter returns EOPNOTSUPP, directly to the interface. * * Make an exception for the legacy SIOCSIF* requests. Drivers * trust SIOCSIFADDR et al to come from an already privileged * layer, and do not perform any credentials checks or input * validation. */ error = ((*so->so_proto->pr_usrreqs->pru_control)(so, cmd, data, ifp, td)); if (error == EOPNOTSUPP && ifp != NULL && ifp->if_ioctl != NULL && cmd != SIOCSIFADDR && cmd != SIOCSIFBRDADDR && cmd != SIOCSIFDSTADDR && cmd != SIOCSIFNETMASK) error = (*ifp->if_ioctl)(ifp, cmd, data); if ((oif_flags ^ ifp->if_flags) & IFF_UP) { #ifdef INET6 if (ifp->if_flags & IFF_UP) in6_if_up(ifp); #endif } out_ref: if_rele(ifp); out_noref: #ifdef COMPAT_FREEBSD32 if (ifmrp != NULL) { KASSERT((cmd == SIOCGIFMEDIA || cmd == SIOCGIFXMEDIA), ("ifmrp non-NULL, but cmd is not an ifmedia req 0x%lx", cmd)); data = saved_data; ifmr_update(ifmrp, data); } #endif CURVNET_RESTORE(); return (error); } /* * The code common to handling reference counted flags, * e.g., in ifpromisc() and if_allmulti(). * The "pflag" argument can specify a permanent mode flag to check, * such as IFF_PPROMISC for promiscuous mode; should be 0 if none. * * Only to be used on stack-owned flags, not driver-owned flags. */ static int if_setflag(struct ifnet *ifp, int flag, int pflag, int *refcount, int onswitch) { struct ifreq ifr; int error; int oldflags, oldcount; /* Sanity checks to catch programming errors */ KASSERT((flag & (IFF_DRV_OACTIVE|IFF_DRV_RUNNING)) == 0, ("%s: setting driver-owned flag %d", __func__, flag)); if (onswitch) KASSERT(*refcount >= 0, ("%s: increment negative refcount %d for flag %d", __func__, *refcount, flag)); else KASSERT(*refcount > 0, ("%s: decrement non-positive refcount %d for flag %d", __func__, *refcount, flag)); /* In case this mode is permanent, just touch refcount */ if (ifp->if_flags & pflag) { *refcount += onswitch ? 1 : -1; return (0); } /* Save ifnet parameters for if_ioctl() may fail */ oldcount = *refcount; oldflags = ifp->if_flags; /* * See if we aren't the only and touching refcount is enough. * Actually toggle interface flag if we are the first or last. */ if (onswitch) { if ((*refcount)++) return (0); ifp->if_flags |= flag; } else { if (--(*refcount)) return (0); ifp->if_flags &= ~flag; } /* Call down the driver since we've changed interface flags */ if (ifp->if_ioctl == NULL) { error = EOPNOTSUPP; goto recover; } ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; error = (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); if (error) goto recover; /* Notify userland that interface flags have changed */ rt_ifmsg(ifp); return (0); recover: /* Recover after driver error */ *refcount = oldcount; ifp->if_flags = oldflags; return (error); } /* * Set/clear promiscuous mode on interface ifp based on the truth value * of pswitch. The calls are reference counted so that only the first * "on" request actually has an effect, as does the final "off" request. * Results are undefined if the "off" and "on" requests are not matched. */ int ifpromisc(struct ifnet *ifp, int pswitch) { int error; int oldflags = ifp->if_flags; error = if_setflag(ifp, IFF_PROMISC, IFF_PPROMISC, &ifp->if_pcount, pswitch); /* If promiscuous mode status has changed, log a message */ if (error == 0 && ((ifp->if_flags ^ oldflags) & IFF_PROMISC) && log_promisc_mode_change) if_printf(ifp, "promiscuous mode %s\n", (ifp->if_flags & IFF_PROMISC) ? "enabled" : "disabled"); return (error); } /* * Return interface configuration * of system. List may be used * in later ioctl's (above) to get * other information. */ /*ARGSUSED*/ static int ifconf(u_long cmd, caddr_t data) { struct ifconf *ifc = (struct ifconf *)data; struct ifnet *ifp; struct ifaddr *ifa; struct ifreq ifr; struct sbuf *sb; int error, full = 0, valid_len, max_len; /* Limit initial buffer size to MAXPHYS to avoid DoS from userspace. */ max_len = MAXPHYS - 1; /* Prevent hostile input from being able to crash the system */ if (ifc->ifc_len <= 0) return (EINVAL); again: if (ifc->ifc_len <= max_len) { max_len = ifc->ifc_len; full = 1; } sb = sbuf_new(NULL, NULL, max_len + 1, SBUF_FIXEDLEN); max_len = 0; valid_len = 0; IFNET_RLOCK(); CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) { int addrs; /* * Zero the ifr to make sure we don't disclose the contents * of the stack. */ memset(&ifr, 0, sizeof(ifr)); if (strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)) >= sizeof(ifr.ifr_name)) { sbuf_delete(sb); IFNET_RUNLOCK(); return (ENAMETOOLONG); } addrs = 0; IF_ADDR_RLOCK(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { struct sockaddr *sa = ifa->ifa_addr; if (prison_if(curthread->td_ucred, sa) != 0) continue; addrs++; if (sa->sa_len <= sizeof(*sa)) { if (sa->sa_len < sizeof(*sa)) { memset(&ifr.ifr_ifru.ifru_addr, 0, sizeof(ifr.ifr_ifru.ifru_addr)); memcpy(&ifr.ifr_ifru.ifru_addr, sa, sa->sa_len); } else ifr.ifr_ifru.ifru_addr = *sa; sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); } else { sbuf_bcat(sb, &ifr, offsetof(struct ifreq, ifr_addr)); max_len += offsetof(struct ifreq, ifr_addr); sbuf_bcat(sb, sa, sa->sa_len); max_len += sa->sa_len; } if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } IF_ADDR_RUNLOCK(ifp); if (addrs == 0) { sbuf_bcat(sb, &ifr, sizeof(ifr)); max_len += sizeof(ifr); if (sbuf_error(sb) == 0) valid_len = sbuf_len(sb); } } IFNET_RUNLOCK(); /* * If we didn't allocate enough space (uncommon), try again. If * we have already allocated as much space as we are allowed, * return what we've got. */ if (valid_len != max_len && !full) { sbuf_delete(sb); goto again; } ifc->ifc_len = valid_len; sbuf_finish(sb); error = copyout(sbuf_data(sb), ifc->ifc_req, ifc->ifc_len); sbuf_delete(sb); return (error); } /* * Just like ifpromisc(), but for all-multicast-reception mode. */ int if_allmulti(struct ifnet *ifp, int onswitch) { return (if_setflag(ifp, IFF_ALLMULTI, 0, &ifp->if_amcount, onswitch)); } struct ifmultiaddr * if_findmulti(struct ifnet *ifp, const struct sockaddr *sa) { struct ifmultiaddr *ifma; IF_ADDR_LOCK_ASSERT(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (sa->sa_family == AF_LINK) { if (sa_dl_equal(ifma->ifma_addr, sa)) break; } else { if (sa_equal(ifma->ifma_addr, sa)) break; } } return ifma; } /* * Allocate a new ifmultiaddr and initialize based on passed arguments. We * make copies of passed sockaddrs. The ifmultiaddr will not be added to * the ifnet multicast address list here, so the caller must do that and * other setup work (such as notifying the device driver). The reference * count is initialized to 1. */ static struct ifmultiaddr * if_allocmulti(struct ifnet *ifp, struct sockaddr *sa, struct sockaddr *llsa, int mflags) { struct ifmultiaddr *ifma; struct sockaddr *dupsa; ifma = malloc(sizeof *ifma, M_IFMADDR, mflags | M_ZERO); if (ifma == NULL) return (NULL); dupsa = malloc(sa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma, M_IFMADDR); return (NULL); } bcopy(sa, dupsa, sa->sa_len); ifma->ifma_addr = dupsa; ifma->ifma_ifp = ifp; ifma->ifma_refcount = 1; ifma->ifma_protospec = NULL; if (llsa == NULL) { ifma->ifma_lladdr = NULL; return (ifma); } dupsa = malloc(llsa->sa_len, M_IFMADDR, mflags); if (dupsa == NULL) { free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); return (NULL); } bcopy(llsa, dupsa, llsa->sa_len); ifma->ifma_lladdr = dupsa; return (ifma); } /* * if_freemulti: free ifmultiaddr structure and possibly attached related * addresses. The caller is responsible for implementing reference * counting, notifying the driver, handling routing messages, and releasing * any dependent link layer state. */ #ifdef MCAST_VERBOSE extern void kdb_backtrace(void); #endif static void if_freemulti_internal(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti: refcount %d", ifma->ifma_refcount)); if (ifma->ifma_lladdr != NULL) free(ifma->ifma_lladdr, M_IFMADDR); #ifdef MCAST_VERBOSE kdb_backtrace(); printf("%s freeing ifma: %p\n", __func__, ifma); #endif free(ifma->ifma_addr, M_IFMADDR); free(ifma, M_IFMADDR); } static void if_destroymulti(epoch_context_t ctx) { struct ifmultiaddr *ifma; ifma = __containerof(ctx, struct ifmultiaddr, ifma_epoch_ctx); if_freemulti_internal(ifma); } void if_freemulti(struct ifmultiaddr *ifma) { KASSERT(ifma->ifma_refcount == 0, ("if_freemulti_epoch: refcount %d", ifma->ifma_refcount)); epoch_call(net_epoch_preempt, &ifma->ifma_epoch_ctx, if_destroymulti); } /* * Register an additional multicast address with a network interface. * * - If the address is already present, bump the reference count on the * address and return. * - If the address is not link-layer, look up a link layer address. * - Allocate address structures for one or both addresses, and attach to the * multicast address list on the interface. If automatically adding a link * layer address, the protocol address will own a reference to the link * layer address, to be freed when it is freed. * - Notify the network device driver of an addition to the multicast address * list. * * 'sa' points to caller-owned memory with the desired multicast address. * * 'retifma' will be used to return a pointer to the resulting multicast * address reference, if desired. */ int if_addmulti(struct ifnet *ifp, struct sockaddr *sa, struct ifmultiaddr **retifma) { struct ifmultiaddr *ifma, *ll_ifma; struct sockaddr *llsa; struct sockaddr_dl sdl; int error; #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif #ifdef INET6 IN6_MULTI_LIST_UNLOCK_ASSERT(); #endif /* * If the address is already present, return a new reference to it; * otherwise, allocate storage and set up a new address. */ IF_ADDR_WLOCK(ifp); ifma = if_findmulti(ifp, sa); if (ifma != NULL) { ifma->ifma_refcount++; if (retifma != NULL) *retifma = ifma; IF_ADDR_WUNLOCK(ifp); return (0); } /* * The address isn't already present; resolve the protocol address * into a link layer address, and then look that up, bump its * refcount or allocate an ifma for that also. * Most link layer resolving functions returns address data which * fits inside default sockaddr_dl structure. However callback * can allocate another sockaddr structure, in that case we need to * free it later. */ llsa = NULL; ll_ifma = NULL; if (ifp->if_resolvemulti != NULL) { /* Provide called function with buffer size information */ sdl.sdl_len = sizeof(sdl); llsa = (struct sockaddr *)&sdl; error = ifp->if_resolvemulti(ifp, &llsa, sa); if (error) goto unlock_out; } /* * Allocate the new address. Don't hook it up yet, as we may also * need to allocate a link layer multicast address. */ ifma = if_allocmulti(ifp, sa, llsa, M_NOWAIT); if (ifma == NULL) { error = ENOMEM; goto free_llsa_out; } /* * If a link layer address is found, we'll need to see if it's * already present in the address list, or allocate is as well. * When this block finishes, the link layer address will be on the * list. */ if (llsa != NULL) { ll_ifma = if_findmulti(ifp, llsa); if (ll_ifma == NULL) { ll_ifma = if_allocmulti(ifp, llsa, NULL, M_NOWAIT); if (ll_ifma == NULL) { --ifma->ifma_refcount; if_freemulti(ifma); error = ENOMEM; goto free_llsa_out; } ll_ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ll_ifma, ifma_link); } else ll_ifma->ifma_refcount++; ifma->ifma_llifma = ll_ifma; } /* * We now have a new multicast address, ifma, and possibly a new or * referenced link layer address. Add the primary address to the * ifnet address list. */ ifma->ifma_flags |= IFMA_F_ENQUEUED; CK_STAILQ_INSERT_HEAD(&ifp->if_multiaddrs, ifma, ifma_link); if (retifma != NULL) *retifma = ifma; /* * Must generate the message while holding the lock so that 'ifma' * pointer is still valid. */ rt_newmaddrmsg(RTM_NEWMADDR, ifma); IF_ADDR_WUNLOCK(ifp); /* * We are certain we have added something, so call down to the * interface to let them know about it. */ if (ifp->if_ioctl != NULL) { (void) (*ifp->if_ioctl)(ifp, SIOCADDMULTI, 0); } if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); return (0); free_llsa_out: if ((llsa != NULL) && (llsa != (struct sockaddr *)&sdl)) link_free_sdl(llsa); unlock_out: IF_ADDR_WUNLOCK(ifp); return (error); } /* * Delete a multicast group membership by network-layer group address. * * Returns ENOENT if the entry could not be found. If ifp no longer * exists, results are undefined. This entry point should only be used * from subsystems which do appropriate locking to hold ifp for the * duration of the call. * Network-layer protocol domains must use if_delmulti_ifma(). */ int if_delmulti(struct ifnet *ifp, struct sockaddr *sa) { struct ifmultiaddr *ifma; int lastref; #ifdef INVARIANTS struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); KASSERT(ifp != NULL, ("%s: ifnet went away", __func__)); #endif if (ifp == NULL) return (ENOENT); IF_ADDR_WLOCK(ifp); lastref = 0; ifma = if_findmulti(ifp, sa); if (ifma != NULL) lastref = if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); if (ifma == NULL) return (ENOENT); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } return (0); } /* * Delete all multicast group membership for an interface. * Should be used to quickly flush all multicast filters. */ void if_delallmulti(struct ifnet *ifp) { struct ifmultiaddr *ifma; struct ifmultiaddr *next; IF_ADDR_WLOCK(ifp); CK_STAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, next) if_delmulti_locked(ifp, ifma, 0); IF_ADDR_WUNLOCK(ifp); } void if_delmulti_ifma(struct ifmultiaddr *ifma) { if_delmulti_ifma_flags(ifma, 0); } /* * Delete a multicast group membership by group membership pointer. * Network-layer protocol domains must use this routine. * * It is safe to call this routine if the ifp disappeared. */ void if_delmulti_ifma_flags(struct ifmultiaddr *ifma, int flags) { struct ifnet *ifp; int lastref; MCDPRINTF("%s freeing ifma: %p\n", __func__, ifma); #ifdef INET IN_MULTI_LIST_UNLOCK_ASSERT(); #endif ifp = ifma->ifma_ifp; #ifdef DIAGNOSTIC if (ifp == NULL) { printf("%s: ifma_ifp seems to be detached\n", __func__); } else { struct ifnet *oifp; IFNET_RLOCK_NOSLEEP(); CK_STAILQ_FOREACH(oifp, &V_ifnet, if_link) if (ifp == oifp) break; if (ifp != oifp) ifp = NULL; IFNET_RUNLOCK_NOSLEEP(); } #endif /* * If and only if the ifnet instance exists: Acquire the address lock. */ if (ifp != NULL) IF_ADDR_WLOCK(ifp); lastref = if_delmulti_locked(ifp, ifma, flags); if (ifp != NULL) { /* * If and only if the ifnet instance exists: * Release the address lock. * If the group was left: update the hardware hash filter. */ IF_ADDR_WUNLOCK(ifp); if (lastref && ifp->if_ioctl != NULL) { (void)(*ifp->if_ioctl)(ifp, SIOCDELMULTI, 0); } } } /* * Perform deletion of network-layer and/or link-layer multicast address. * * Return 0 if the reference count was decremented. * Return 1 if the final reference was released, indicating that the * hardware hash filter should be reprogrammed. */ static int if_delmulti_locked(struct ifnet *ifp, struct ifmultiaddr *ifma, int detaching) { struct ifmultiaddr *ll_ifma; if (ifp != NULL && ifma->ifma_ifp != NULL) { KASSERT(ifma->ifma_ifp == ifp, ("%s: inconsistent ifp %p", __func__, ifp)); IF_ADDR_WLOCK_ASSERT(ifp); } ifp = ifma->ifma_ifp; MCDPRINTF("%s freeing %p from %s \n", __func__, ifma, ifp ? ifp->if_xname : ""); /* * If the ifnet is detaching, null out references to ifnet, * so that upper protocol layers will notice, and not attempt * to obtain locks for an ifnet which no longer exists. The * routing socket announcement must happen before the ifnet * instance is detached from the system. */ if (detaching) { #ifdef DIAGNOSTIC printf("%s: detaching ifnet instance %p\n", __func__, ifp); #endif /* * ifp may already be nulled out if we are being reentered * to delete the ll_ifma. */ if (ifp != NULL) { rt_newmaddrmsg(RTM_DELMADDR, ifma); ifma->ifma_ifp = NULL; } } if (--ifma->ifma_refcount > 0) return 0; if (ifp != NULL && detaching == 0 && (ifma->ifma_flags & IFMA_F_ENQUEUED)) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } /* * If this ifma is a network-layer ifma, a link-layer ifma may * have been associated with it. Release it first if so. */ ll_ifma = ifma->ifma_llifma; if (ll_ifma != NULL) { KASSERT(ifma->ifma_lladdr != NULL, ("%s: llifma w/o lladdr", __func__)); if (detaching) ll_ifma->ifma_ifp = NULL; /* XXX */ if (--ll_ifma->ifma_refcount == 0) { if (ifp != NULL) { if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; } } if_freemulti(ll_ifma); } } #ifdef INVARIANTS if (ifp) { struct ifmultiaddr *ifmatmp; CK_STAILQ_FOREACH(ifmatmp, &ifp->if_multiaddrs, ifma_link) MPASS(ifma != ifmatmp); } #endif if_freemulti(ifma); /* * The last reference to this instance of struct ifmultiaddr * was released; the hardware should be notified of this change. */ return 1; } /* * Set the link layer address on an interface. * * At this time we only support certain types of interfaces, * and we don't allow the length of the address to change. * * Set noinline to be dtrace-friendly */ __noinline int if_setlladdr(struct ifnet *ifp, const u_char *lladdr, int len) { struct sockaddr_dl *sdl; struct ifaddr *ifa; struct ifreq ifr; int rc; rc = 0; NET_EPOCH_ENTER(); ifa = ifp->if_addr; if (ifa == NULL) { rc = EINVAL; goto out; } sdl = (struct sockaddr_dl *)ifa->ifa_addr; if (sdl == NULL) { rc = EINVAL; goto out; } if (len != sdl->sdl_alen) { /* don't allow length to change */ rc = EINVAL; goto out; } switch (ifp->if_type) { case IFT_ETHER: case IFT_XETHER: case IFT_L2VLAN: case IFT_BRIDGE: case IFT_IEEE8023ADLAG: bcopy(lladdr, LLADDR(sdl), len); break; default: rc = ENODEV; goto out; } /* * If the interface is already up, we need * to re-init it in order to reprogram its * address filter. */ NET_EPOCH_EXIT(); if ((ifp->if_flags & IFF_UP) != 0) { if (ifp->if_ioctl) { ifp->if_flags &= ~IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); ifp->if_flags |= IFF_UP; ifr.ifr_flags = ifp->if_flags & 0xffff; ifr.ifr_flagshigh = ifp->if_flags >> 16; (*ifp->if_ioctl)(ifp, SIOCSIFFLAGS, (caddr_t)&ifr); } } EVENTHANDLER_INVOKE(iflladdr_event, ifp); return (0); out: NET_EPOCH_EXIT(); return (rc); } /* * Compat function for handling basic encapsulation requests. * Not converted stacks (FDDI, IB, ..) supports traditional * output model: ARP (and other similar L2 protocols) are handled * inside output routine, arpresolve/nd6_resolve() returns MAC * address instead of full prepend. * * This function creates calculated header==MAC for IPv4/IPv6 and * returns EAFNOSUPPORT (which is then handled in ARP code) for other * address families. */ static int if_requestencap_default(struct ifnet *ifp, struct if_encap_req *req) { if (req->rtype != IFENCAP_LL) return (EOPNOTSUPP); if (req->bufsize < req->lladdr_len) return (ENOMEM); switch (req->family) { case AF_INET: case AF_INET6: break; default: return (EAFNOSUPPORT); } /* Copy lladdr to storage as is */ memmove(req->buf, req->lladdr, req->lladdr_len); req->bufsize = req->lladdr_len; req->lladdr_off = 0; return (0); } /* * Tunnel interfaces can nest, also they may cause infinite recursion * calls when misconfigured. We'll prevent this by detecting loops. * High nesting level may cause stack exhaustion. We'll prevent this * by introducing upper limit. * * Return 0, if tunnel nesting count is equal or less than limit. */ int if_tunnel_check_nesting(struct ifnet *ifp, struct mbuf *m, uint32_t cookie, int limit) { struct m_tag *mtag; int count; count = 1; mtag = NULL; while ((mtag = m_tag_locate(m, cookie, 0, mtag)) != NULL) { if (*(struct ifnet **)(mtag + 1) == ifp) { log(LOG_NOTICE, "%s: loop detected\n", if_name(ifp)); return (EIO); } count++; } if (count > limit) { log(LOG_NOTICE, "%s: if_output recursively called too many times(%d)\n", if_name(ifp), count); return (EIO); } mtag = m_tag_alloc(cookie, 0, sizeof(struct ifnet *), M_NOWAIT); if (mtag == NULL) return (ENOMEM); *(struct ifnet **)(mtag + 1) = ifp; m_tag_prepend(m, mtag); return (0); } /* * Get the link layer address that was read from the hardware at attach. * * This is only set by Ethernet NICs (IFT_ETHER), but laggX interfaces re-type * their component interfaces as IFT_IEEE8023ADLAG. */ int if_gethwaddr(struct ifnet *ifp, struct ifreq *ifr) { if (ifp->if_hw_addr == NULL) return (ENODEV); switch (ifp->if_type) { case IFT_ETHER: case IFT_IEEE8023ADLAG: bcopy(ifp->if_hw_addr, ifr->ifr_addr.sa_data, ifp->if_addrlen); return (0); default: return (ENODEV); } } /* * The name argument must be a pointer to storage which will last as * long as the interface does. For physical devices, the result of * device_get_name(dev) is a good choice and for pseudo-devices a * static string works well. */ void if_initname(struct ifnet *ifp, const char *name, int unit) { ifp->if_dname = name; ifp->if_dunit = unit; if (unit != IF_DUNIT_NONE) snprintf(ifp->if_xname, IFNAMSIZ, "%s%d", name, unit); else strlcpy(ifp->if_xname, name, IFNAMSIZ); } int if_printf(struct ifnet *ifp, const char *fmt, ...) { char if_fmt[256]; va_list ap; snprintf(if_fmt, sizeof(if_fmt), "%s: %s", ifp->if_xname, fmt); va_start(ap, fmt); vlog(LOG_INFO, if_fmt, ap); va_end(ap); return (0); } void if_start(struct ifnet *ifp) { (*(ifp)->if_start)(ifp); } /* * Backwards compatibility interface for drivers * that have not implemented it */ static int if_transmit(struct ifnet *ifp, struct mbuf *m) { int error; IFQ_HANDOFF(ifp, m, error); return (error); } static void if_input_default(struct ifnet *ifp __unused, struct mbuf *m) { m_freem(m); } int if_handoff(struct ifqueue *ifq, struct mbuf *m, struct ifnet *ifp, int adjust) { int active = 0; IF_LOCK(ifq); if (_IF_QFULL(ifq)) { IF_UNLOCK(ifq); if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1); m_freem(m); return (0); } if (ifp != NULL) { if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len + adjust); if (m->m_flags & (M_BCAST|M_MCAST)) if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1); active = ifp->if_drv_flags & IFF_DRV_OACTIVE; } _IF_ENQUEUE(ifq, m); IF_UNLOCK(ifq); if (ifp != NULL && !active) (*(ifp)->if_start)(ifp); return (1); } void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f) { KASSERT(if_com_alloc[type] == NULL, ("if_register_com_alloc: %d already registered", type)); KASSERT(if_com_free[type] == NULL, ("if_register_com_alloc: %d free already registered", type)); if_com_alloc[type] = a; if_com_free[type] = f; } void if_deregister_com_alloc(u_char type) { KASSERT(if_com_alloc[type] != NULL, ("if_deregister_com_alloc: %d not registered", type)); KASSERT(if_com_free[type] != NULL, ("if_deregister_com_alloc: %d free not registered", type)); if_com_alloc[type] = NULL; if_com_free[type] = NULL; } /* API for driver access to network stack owned ifnet.*/ uint64_t if_setbaudrate(struct ifnet *ifp, uint64_t baudrate) { uint64_t oldbrate; oldbrate = ifp->if_baudrate; ifp->if_baudrate = baudrate; return (oldbrate); } uint64_t if_getbaudrate(if_t ifp) { return (((struct ifnet *)ifp)->if_baudrate); } int if_setcapabilities(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capabilities = capabilities; return (0); } int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit) { ((struct ifnet *)ifp)->if_capabilities |= setbit; ((struct ifnet *)ifp)->if_capabilities &= ~clearbit; return (0); } int if_getcapabilities(if_t ifp) { return ((struct ifnet *)ifp)->if_capabilities; } int if_setcapenable(if_t ifp, int capabilities) { ((struct ifnet *)ifp)->if_capenable = capabilities; return (0); } int if_setcapenablebit(if_t ifp, int setcap, int clearcap) { if(setcap) ((struct ifnet *)ifp)->if_capenable |= setcap; if(clearcap) ((struct ifnet *)ifp)->if_capenable &= ~clearcap; return (0); } const char * if_getdname(if_t ifp) { return ((struct ifnet *)ifp)->if_dname; } int if_togglecapenable(if_t ifp, int togglecap) { ((struct ifnet *)ifp)->if_capenable ^= togglecap; return (0); } int if_getcapenable(if_t ifp) { return ((struct ifnet *)ifp)->if_capenable; } /* * This is largely undesirable because it ties ifnet to a device, but does * provide flexiblity for an embedded product vendor. Should be used with * the understanding that it violates the interface boundaries, and should be * a last resort only. */ int if_setdev(if_t ifp, void *dev) { return (0); } int if_setdrvflagbits(if_t ifp, int set_flags, int clear_flags) { ((struct ifnet *)ifp)->if_drv_flags |= set_flags; ((struct ifnet *)ifp)->if_drv_flags &= ~clear_flags; return (0); } int if_getdrvflags(if_t ifp) { return ((struct ifnet *)ifp)->if_drv_flags; } int if_setdrvflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_drv_flags = flags; return (0); } int if_setflags(if_t ifp, int flags) { ((struct ifnet *)ifp)->if_flags = flags; return (0); } int if_setflagbits(if_t ifp, int set, int clear) { ((struct ifnet *)ifp)->if_flags |= set; ((struct ifnet *)ifp)->if_flags &= ~clear; return (0); } int if_getflags(if_t ifp) { return ((struct ifnet *)ifp)->if_flags; } int if_clearhwassist(if_t ifp) { ((struct ifnet *)ifp)->if_hwassist = 0; return (0); } int if_sethwassistbits(if_t ifp, int toset, int toclear) { ((struct ifnet *)ifp)->if_hwassist |= toset; ((struct ifnet *)ifp)->if_hwassist &= ~toclear; return (0); } int if_sethwassist(if_t ifp, int hwassist_bit) { ((struct ifnet *)ifp)->if_hwassist = hwassist_bit; return (0); } int if_gethwassist(if_t ifp) { return ((struct ifnet *)ifp)->if_hwassist; } int if_setmtu(if_t ifp, int mtu) { ((struct ifnet *)ifp)->if_mtu = mtu; return (0); } int if_getmtu(if_t ifp) { return ((struct ifnet *)ifp)->if_mtu; } int if_getmtu_family(if_t ifp, int family) { struct domain *dp; for (dp = domains; dp; dp = dp->dom_next) { if (dp->dom_family == family && dp->dom_ifmtu != NULL) return (dp->dom_ifmtu((struct ifnet *)ifp)); } return (((struct ifnet *)ifp)->if_mtu); } int if_setsoftc(if_t ifp, void *softc) { ((struct ifnet *)ifp)->if_softc = softc; return (0); } void * if_getsoftc(if_t ifp) { return ((struct ifnet *)ifp)->if_softc; } void if_setrcvif(struct mbuf *m, if_t ifp) { m->m_pkthdr.rcvif = (struct ifnet *)ifp; } void if_setvtag(struct mbuf *m, uint16_t tag) { m->m_pkthdr.ether_vtag = tag; } uint16_t if_getvtag(struct mbuf *m) { return (m->m_pkthdr.ether_vtag); } int if_sendq_empty(if_t ifp) { return IFQ_DRV_IS_EMPTY(&((struct ifnet *)ifp)->if_snd); } struct ifaddr * if_getifaddr(if_t ifp) { return ((struct ifnet *)ifp)->if_addr; } int if_getamcount(if_t ifp) { return ((struct ifnet *)ifp)->if_amcount; } int if_setsendqready(if_t ifp) { IFQ_SET_READY(&((struct ifnet *)ifp)->if_snd); return (0); } int if_setsendqlen(if_t ifp, int tx_desc_count) { IFQ_SET_MAXLEN(&((struct ifnet *)ifp)->if_snd, tx_desc_count); ((struct ifnet *)ifp)->if_snd.ifq_drv_maxlen = tx_desc_count; return (0); } int if_vlantrunkinuse(if_t ifp) { return ((struct ifnet *)ifp)->if_vlantrunk != NULL?1:0; } int if_input(if_t ifp, struct mbuf* sendmp) { (*((struct ifnet *)ifp)->if_input)((struct ifnet *)ifp, sendmp); return (0); } /* XXX */ #ifndef ETH_ADDR_LEN #define ETH_ADDR_LEN 6 #endif int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max) { struct ifmultiaddr *ifma; uint8_t *lmta = (uint8_t *)mta; int mcnt = 0; CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; if (mcnt == max) break; bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &lmta[mcnt * ETH_ADDR_LEN], ETH_ADDR_LEN); mcnt++; } *cnt = mcnt; return (0); } int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max) { int error; if_maddr_rlock(ifp); error = if_setupmultiaddr(ifp, mta, cnt, max); if_maddr_runlock(ifp); return (error); } int if_multiaddr_count(if_t ifp, int max) { struct ifmultiaddr *ifma; int count; count = 0; if_maddr_rlock(ifp); CK_STAILQ_FOREACH(ifma, &((struct ifnet *)ifp)->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; count++; if (count == max) break; } if_maddr_runlock(ifp); return (count); } int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg) { struct ifmultiaddr *ifma; int cnt = 0; if_maddr_rlock(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) cnt += filter(arg, ifma, cnt); if_maddr_runlock(ifp); return (cnt); } struct mbuf * if_dequeue(if_t ifp) { struct mbuf *m; IFQ_DRV_DEQUEUE(&((struct ifnet *)ifp)->if_snd, m); return (m); } int if_sendq_prepend(if_t ifp, struct mbuf *m) { IFQ_DRV_PREPEND(&((struct ifnet *)ifp)->if_snd, m); return (0); } int if_setifheaderlen(if_t ifp, int len) { ((struct ifnet *)ifp)->if_hdrlen = len; return (0); } caddr_t if_getlladdr(if_t ifp) { return (IF_LLADDR((struct ifnet *)ifp)); } void * if_gethandle(u_char type) { return (if_alloc(type)); } void if_bpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; BPF_MTAP(ifp, m); } void if_etherbpfmtap(if_t ifh, struct mbuf *m) { struct ifnet *ifp = (struct ifnet *)ifh; ETHER_BPF_MTAP(ifp, m); } void if_vlancap(if_t ifh) { struct ifnet *ifp = (struct ifnet *)ifh; VLAN_CAPABILITIES(ifp); } int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax) { ((struct ifnet *)ifp)->if_hw_tsomax = if_hw_tsomax; return (0); } int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount) { ((struct ifnet *)ifp)->if_hw_tsomaxsegcount = if_hw_tsomaxsegcount; return (0); } int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize) { ((struct ifnet *)ifp)->if_hw_tsomaxsegsize = if_hw_tsomaxsegsize; return (0); } u_int if_gethwtsomax(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomax); } u_int if_gethwtsomaxsegcount(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegcount); } u_int if_gethwtsomaxsegsize(if_t ifp) { return (((struct ifnet *)ifp)->if_hw_tsomaxsegsize); } void if_setinitfn(if_t ifp, void (*init_fn)(void *)) { ((struct ifnet *)ifp)->if_init = init_fn; } void if_setioctlfn(if_t ifp, int (*ioctl_fn)(if_t, u_long, caddr_t)) { ((struct ifnet *)ifp)->if_ioctl = (void *)ioctl_fn; } void if_setstartfn(if_t ifp, void (*start_fn)(if_t)) { ((struct ifnet *)ifp)->if_start = (void *)start_fn; } void if_settransmitfn(if_t ifp, if_transmit_fn_t start_fn) { ((struct ifnet *)ifp)->if_transmit = start_fn; } void if_setqflushfn(if_t ifp, if_qflush_fn_t flush_fn) { ((struct ifnet *)ifp)->if_qflush = flush_fn; } void if_setgetcounterfn(if_t ifp, if_get_counter_t fn) { ifp->if_get_counter = fn; } /* Revisit these - These are inline functions originally. */ int drbr_inuse_drv(if_t ifh, struct buf_ring *br) { return drbr_inuse(ifh, br); } struct mbuf* drbr_dequeue_drv(if_t ifh, struct buf_ring *br) { return drbr_dequeue(ifh, br); } int drbr_needs_enqueue_drv(if_t ifh, struct buf_ring *br) { return drbr_needs_enqueue(ifh, br); } int drbr_enqueue_drv(if_t ifh, struct buf_ring *br, struct mbuf *m) { return drbr_enqueue(ifh, br, m); } Index: projects/clang700-import/sys/net/if_tap.c =================================================================== --- projects/clang700-import/sys/net/if_tap.c (revision 339014) +++ projects/clang700-import/sys/net/if_tap.c (revision 339015) @@ -1,1114 +1,1127 @@ /*- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD * * Copyright (C) 1999-2000 by Maksim Yevmenkin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * BASED ON: * ------------------------------------------------------------------------- * * Copyright (c) 1988, Julian Onions * Nottingham University 1987. */ /* * $FreeBSD$ * $Id: if_tap.c,v 0.21 2000/07/23 21:46:02 max Exp $ */ #include "opt_inet.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define CDEV_NAME "tap" #define TAPDEBUG if (tapdebug) printf static const char tapname[] = "tap"; static const char vmnetname[] = "vmnet"; #define TAPMAXUNIT 0x7fff #define VMNET_DEV_MASK CLONE_FLAG0 /* module */ static int tapmodevent(module_t, int, void *); /* device */ static void tapclone(void *, struct ucred *, char *, int, struct cdev **); static void tapcreate(struct cdev *); /* network interface */ static void tapifstart(struct ifnet *); static int tapifioctl(struct ifnet *, u_long, caddr_t); static void tapifinit(void *); static int tap_clone_create(struct if_clone *, int, caddr_t); static void tap_clone_destroy(struct ifnet *); static struct if_clone *tap_cloner; static int vmnet_clone_create(struct if_clone *, int, caddr_t); static void vmnet_clone_destroy(struct ifnet *); static struct if_clone *vmnet_cloner; /* character device */ static d_open_t tapopen; static d_close_t tapclose; static d_read_t tapread; static d_write_t tapwrite; static d_ioctl_t tapioctl; static d_poll_t tappoll; static d_kqfilter_t tapkqfilter; /* kqueue(2) */ static int tapkqread(struct knote *, long); static int tapkqwrite(struct knote *, long); static void tapkqdetach(struct knote *); static struct filterops tap_read_filterops = { .f_isfd = 1, .f_attach = NULL, .f_detach = tapkqdetach, .f_event = tapkqread, }; static struct filterops tap_write_filterops = { .f_isfd = 1, .f_attach = NULL, .f_detach = tapkqdetach, .f_event = tapkqwrite, }; static struct cdevsw tap_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDMINOR, .d_open = tapopen, .d_close = tapclose, .d_read = tapread, .d_write = tapwrite, .d_ioctl = tapioctl, .d_poll = tappoll, .d_name = CDEV_NAME, .d_kqfilter = tapkqfilter, }; /* * All global variables in if_tap.c are locked with tapmtx, with the * exception of tapdebug, which is accessed unlocked; tapclones is * static at runtime. */ static struct mtx tapmtx; static int tapdebug = 0; /* debug flag */ static int tapuopen = 0; /* allow user open() */ static int tapuponopen = 0; /* IFF_UP on open() */ static int tapdclone = 1; /* enable devfs cloning */ static SLIST_HEAD(, tap_softc) taphead; /* first device */ static struct clonedevs *tapclones; MALLOC_DECLARE(M_TAP); MALLOC_DEFINE(M_TAP, CDEV_NAME, "Ethernet tunnel interface"); SYSCTL_INT(_debug, OID_AUTO, if_tap_debug, CTLFLAG_RW, &tapdebug, 0, ""); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, OID_AUTO, tap, CTLFLAG_RW, 0, "Ethernet tunnel software network interface"); SYSCTL_INT(_net_link_tap, OID_AUTO, user_open, CTLFLAG_RW, &tapuopen, 0, "Allow user to open /dev/tap (based on node permissions)"); SYSCTL_INT(_net_link_tap, OID_AUTO, up_on_open, CTLFLAG_RW, &tapuponopen, 0, "Bring interface up when /dev/tap is opened"); SYSCTL_INT(_net_link_tap, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tapdclone, 0, "Enable legacy devfs interface creation"); SYSCTL_INT(_net_link_tap, OID_AUTO, debug, CTLFLAG_RW, &tapdebug, 0, ""); DEV_MODULE(if_tap, tapmodevent, NULL); static int tap_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct cdev *dev; int i; /* Find any existing device, or allocate new unit number. */ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, 0); if (i) { dev = make_dev(&tap_cdevsw, unit, UID_ROOT, GID_WHEEL, 0600, "%s%d", tapname, unit); } tapcreate(dev); return (0); } /* vmnet devices are tap devices in disguise */ static int vmnet_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct cdev *dev; int i; /* Find any existing device, or allocate new unit number. */ i = clone_create(&tapclones, &tap_cdevsw, &unit, &dev, VMNET_DEV_MASK); if (i) { dev = make_dev(&tap_cdevsw, unit | VMNET_DEV_MASK, UID_ROOT, GID_WHEEL, 0600, "%s%d", vmnetname, unit); } tapcreate(dev); return (0); } static void tap_destroy(struct tap_softc *tp) { struct ifnet *ifp = tp->tap_ifp; CURVNET_SET(ifp->if_vnet); destroy_dev(tp->tap_dev); seldrain(&tp->tap_rsel); knlist_clear(&tp->tap_rsel.si_note, 0); knlist_destroy(&tp->tap_rsel.si_note); ether_ifdetach(ifp); if_free(ifp); mtx_destroy(&tp->tap_mtx); free(tp, M_TAP); CURVNET_RESTORE(); } static void tap_clone_destroy(struct ifnet *ifp) { struct tap_softc *tp = ifp->if_softc; mtx_lock(&tapmtx); SLIST_REMOVE(&taphead, tp, tap_softc, tap_next); mtx_unlock(&tapmtx); tap_destroy(tp); } /* vmnet devices are tap devices in disguise */ static void vmnet_clone_destroy(struct ifnet *ifp) { tap_clone_destroy(ifp); } /* * tapmodevent * * module event handler */ static int tapmodevent(module_t mod, int type, void *data) { static eventhandler_tag eh_tag = NULL; struct tap_softc *tp = NULL; struct ifnet *ifp = NULL; switch (type) { case MOD_LOAD: /* intitialize device */ mtx_init(&tapmtx, "tapmtx", NULL, MTX_DEF); SLIST_INIT(&taphead); clone_setup(&tapclones); eh_tag = EVENTHANDLER_REGISTER(dev_clone, tapclone, 0, 1000); if (eh_tag == NULL) { clone_cleanup(&tapclones); mtx_destroy(&tapmtx); return (ENOMEM); } tap_cloner = if_clone_simple(tapname, tap_clone_create, tap_clone_destroy, 0); vmnet_cloner = if_clone_simple(vmnetname, vmnet_clone_create, vmnet_clone_destroy, 0); return (0); case MOD_UNLOAD: /* * The EBUSY algorithm here can't quite atomically * guarantee that this is race-free since we have to * release the tap mtx to deregister the clone handler. */ mtx_lock(&tapmtx); SLIST_FOREACH(tp, &taphead, tap_next) { mtx_lock(&tp->tap_mtx); if (tp->tap_flags & TAP_OPEN) { mtx_unlock(&tp->tap_mtx); mtx_unlock(&tapmtx); return (EBUSY); } mtx_unlock(&tp->tap_mtx); } mtx_unlock(&tapmtx); EVENTHANDLER_DEREGISTER(dev_clone, eh_tag); if_clone_detach(tap_cloner); if_clone_detach(vmnet_cloner); drain_dev_clone_events(); mtx_lock(&tapmtx); while ((tp = SLIST_FIRST(&taphead)) != NULL) { SLIST_REMOVE_HEAD(&taphead, tap_next); mtx_unlock(&tapmtx); ifp = tp->tap_ifp; TAPDEBUG("detaching %s\n", ifp->if_xname); tap_destroy(tp); mtx_lock(&tapmtx); } mtx_unlock(&tapmtx); clone_cleanup(&tapclones); mtx_destroy(&tapmtx); break; default: return (EOPNOTSUPP); } return (0); } /* tapmodevent */ /* * DEVFS handler * * We need to support two kind of devices - tap and vmnet */ static void tapclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) { char devname[SPECNAMELEN + 1]; int i, unit, append_unit; int extra; if (*dev != NULL) return; if (!tapdclone || (!tapuopen && priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0)) return; unit = 0; append_unit = 0; extra = 0; /* We're interested in only tap/vmnet devices. */ if (strcmp(name, tapname) == 0) { unit = -1; } else if (strcmp(name, vmnetname) == 0) { unit = -1; extra = VMNET_DEV_MASK; } else if (dev_stdclone(name, NULL, tapname, &unit) != 1) { if (dev_stdclone(name, NULL, vmnetname, &unit) != 1) { return; } else { extra = VMNET_DEV_MASK; } } if (unit == -1) append_unit = 1; CURVNET_SET(CRED_TO_VNET(cred)); /* find any existing device, or allocate new unit number */ i = clone_create(&tapclones, &tap_cdevsw, &unit, dev, extra); if (i) { if (append_unit) { /* * We were passed 'tun' or 'tap', with no unit specified * so we'll need to append it now. */ namelen = snprintf(devname, sizeof(devname), "%s%d", name, unit); name = devname; } *dev = make_dev_credf(MAKEDEV_REF, &tap_cdevsw, unit | extra, cred, UID_ROOT, GID_WHEEL, 0600, "%s", name); } if_clone_create(name, namelen, NULL); CURVNET_RESTORE(); } /* tapclone */ /* * tapcreate * * to create interface */ static void tapcreate(struct cdev *dev) { struct ifnet *ifp = NULL; struct tap_softc *tp = NULL; unsigned short macaddr_hi; uint32_t macaddr_mid; int unit; const char *name = NULL; u_char eaddr[6]; /* allocate driver storage and create device */ tp = malloc(sizeof(*tp), M_TAP, M_WAITOK | M_ZERO); mtx_init(&tp->tap_mtx, "tap_mtx", NULL, MTX_DEF); mtx_lock(&tapmtx); SLIST_INSERT_HEAD(&taphead, tp, tap_next); mtx_unlock(&tapmtx); unit = dev2unit(dev); /* select device: tap or vmnet */ if (unit & VMNET_DEV_MASK) { name = vmnetname; tp->tap_flags |= TAP_VMNET; } else name = tapname; unit &= TAPMAXUNIT; TAPDEBUG("tapcreate(%s%d). minor = %#x\n", name, unit, dev2unit(dev)); /* generate fake MAC address: 00 bd xx xx xx unit_no */ macaddr_hi = htons(0x00bd); macaddr_mid = (uint32_t) ticks; bcopy(&macaddr_hi, eaddr, sizeof(short)); bcopy(&macaddr_mid, &eaddr[2], sizeof(uint32_t)); eaddr[5] = (u_char)unit; /* fill the rest and attach interface */ ifp = tp->tap_ifp = if_alloc(IFT_ETHER); if (ifp == NULL) panic("%s%d: can not if_alloc()", name, unit); ifp->if_softc = tp; if_initname(ifp, name, unit); ifp->if_init = tapifinit; ifp->if_start = tapifstart; ifp->if_ioctl = tapifioctl; ifp->if_mtu = ETHERMTU; ifp->if_flags = (IFF_BROADCAST|IFF_SIMPLEX|IFF_MULTICAST); IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; dev->si_drv1 = tp; tp->tap_dev = dev; ether_ifattach(ifp, eaddr); mtx_lock(&tp->tap_mtx); tp->tap_flags |= TAP_INITED; mtx_unlock(&tp->tap_mtx); knlist_init_mtx(&tp->tap_rsel.si_note, &tp->tap_mtx); TAPDEBUG("interface %s is created. minor = %#x\n", ifp->if_xname, dev2unit(dev)); } /* tapcreate */ /* * tapopen * * to open tunnel. must be superuser */ static int tapopen(struct cdev *dev, int flag, int mode, struct thread *td) { struct tap_softc *tp = NULL; struct ifnet *ifp = NULL; int error; if (tapuopen == 0) { error = priv_check(td, PRIV_NET_TAP); if (error) return (error); } if ((dev2unit(dev) & CLONE_UNITMASK) > TAPMAXUNIT) return (ENXIO); tp = dev->si_drv1; mtx_lock(&tp->tap_mtx); if (tp->tap_flags & TAP_OPEN) { mtx_unlock(&tp->tap_mtx); return (EBUSY); } bcopy(IF_LLADDR(tp->tap_ifp), tp->ether_addr, sizeof(tp->ether_addr)); tp->tap_pid = td->td_proc->p_pid; tp->tap_flags |= TAP_OPEN; ifp = tp->tap_ifp; ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; if (tapuponopen) ifp->if_flags |= IFF_UP; if_link_state_change(ifp, LINK_STATE_UP); mtx_unlock(&tp->tap_mtx); TAPDEBUG("%s is open. minor = %#x\n", ifp->if_xname, dev2unit(dev)); return (0); } /* tapopen */ /* * tapclose * * close the device - mark i/f down & delete routing info */ static int tapclose(struct cdev *dev, int foo, int bar, struct thread *td) { struct ifaddr *ifa; struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; /* junk all pending output */ mtx_lock(&tp->tap_mtx); CURVNET_SET(ifp->if_vnet); IF_DRAIN(&ifp->if_snd); /* * Do not bring the interface down, and do not anything with * interface, if we are in VMnet mode. Just close the device. */ if (((tp->tap_flags & TAP_VMNET) == 0) && (ifp->if_flags & (IFF_UP | IFF_LINK0)) == IFF_UP) { mtx_unlock(&tp->tap_mtx); if_down(ifp); mtx_lock(&tp->tap_mtx); if (ifp->if_drv_flags & IFF_DRV_RUNNING) { ifp->if_drv_flags &= ~IFF_DRV_RUNNING; mtx_unlock(&tp->tap_mtx); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { rtinit(ifa, (int)RTM_DELETE, 0); } if_purgeaddrs(ifp); mtx_lock(&tp->tap_mtx); } } if_link_state_change(ifp, LINK_STATE_DOWN); CURVNET_RESTORE(); funsetown(&tp->tap_sigio); selwakeuppri(&tp->tap_rsel, PZERO+1); KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); tp->tap_flags &= ~TAP_OPEN; tp->tap_pid = 0; mtx_unlock(&tp->tap_mtx); TAPDEBUG("%s is closed. minor = %#x\n", ifp->if_xname, dev2unit(dev)); return (0); } /* tapclose */ /* * tapifinit * * network interface initialization function */ static void tapifinit(void *xtp) { struct tap_softc *tp = (struct tap_softc *)xtp; struct ifnet *ifp = tp->tap_ifp; TAPDEBUG("initializing %s\n", ifp->if_xname); mtx_lock(&tp->tap_mtx); ifp->if_drv_flags |= IFF_DRV_RUNNING; ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mtx_unlock(&tp->tap_mtx); /* attempt to start output */ tapifstart(ifp); } /* tapifinit */ /* * tapifioctl * * Process an ioctl request on network interface */ static int tapifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct tap_softc *tp = ifp->if_softc; struct ifreq *ifr = (struct ifreq *)data; struct ifstat *ifs = NULL; struct ifmediareq *ifmr = NULL; int dummy, error = 0; switch (cmd) { case SIOCSIFFLAGS: /* XXX -- just like vmnet does */ case SIOCADDMULTI: case SIOCDELMULTI: break; case SIOCGIFMEDIA: ifmr = (struct ifmediareq *)data; dummy = ifmr->ifm_count; ifmr->ifm_count = 1; ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; if (tp->tap_flags & TAP_OPEN) ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_current = ifmr->ifm_active; if (dummy >= 1) { int media = IFM_ETHER; error = copyout(&media, ifmr->ifm_ulist, sizeof(int)); } break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; break; case SIOCGIFSTATUS: ifs = (struct ifstat *)data; mtx_lock(&tp->tap_mtx); if (tp->tap_pid != 0) snprintf(ifs->ascii, sizeof(ifs->ascii), "\tOpened by PID %d\n", tp->tap_pid); else ifs->ascii[0] = '\0'; mtx_unlock(&tp->tap_mtx); break; default: error = ether_ioctl(ifp, cmd, data); break; } return (error); } /* tapifioctl */ /* * tapifstart * * queue packets from higher level ready to put out */ static void tapifstart(struct ifnet *ifp) { struct tap_softc *tp = ifp->if_softc; TAPDEBUG("%s starting\n", ifp->if_xname); /* * do not junk pending output if we are in VMnet mode. * XXX: can this do any harm because of queue overflow? */ mtx_lock(&tp->tap_mtx); if (((tp->tap_flags & TAP_VMNET) == 0) && ((tp->tap_flags & TAP_READY) != TAP_READY)) { struct mbuf *m; /* Unlocked read. */ TAPDEBUG("%s not ready, tap_flags = 0x%x\n", ifp->if_xname, tp->tap_flags); for (;;) { IF_DEQUEUE(&ifp->if_snd, m); if (m != NULL) { m_freem(m); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); } else break; } mtx_unlock(&tp->tap_mtx); return; } ifp->if_drv_flags |= IFF_DRV_OACTIVE; if (!IFQ_IS_EMPTY(&ifp->if_snd)) { if (tp->tap_flags & TAP_RWAIT) { tp->tap_flags &= ~TAP_RWAIT; wakeup(tp); } if ((tp->tap_flags & TAP_ASYNC) && (tp->tap_sigio != NULL)) { mtx_unlock(&tp->tap_mtx); pgsigio(&tp->tap_sigio, SIGIO, 0); mtx_lock(&tp->tap_mtx); } selwakeuppri(&tp->tap_rsel, PZERO+1); KNOTE_LOCKED(&tp->tap_rsel.si_note, 0); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); /* obytes are counted in ether_output */ } ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; mtx_unlock(&tp->tap_mtx); } /* tapifstart */ /* * tapioctl * * the cdevsw interface is now pretty minimal */ static int tapioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { + struct ifreq ifr; struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; struct tapinfo *tapp = NULL; int f; + int error; #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) int ival; #endif switch (cmd) { case TAPSIFINFO: tapp = (struct tapinfo *)data; if (ifp->if_type != tapp->type) return (EPROTOTYPE); mtx_lock(&tp->tap_mtx); - ifp->if_mtu = tapp->mtu; + if (ifp->if_mtu != tapp->mtu) { + strncpy(ifr.ifr_name, if_name(ifp), IFNAMSIZ); + ifr.ifr_mtu = tapp->mtu; + CURVNET_SET(ifp->if_vnet); + error = ifhwioctl(SIOCSIFMTU, ifp, + (caddr_t)&ifr, td); + CURVNET_RESTORE(); + if (error) { + mtx_unlock(&tp->tap_mtx); + return (error); + } + } ifp->if_baudrate = tapp->baudrate; mtx_unlock(&tp->tap_mtx); break; case TAPGIFINFO: tapp = (struct tapinfo *)data; mtx_lock(&tp->tap_mtx); tapp->mtu = ifp->if_mtu; tapp->type = ifp->if_type; tapp->baudrate = ifp->if_baudrate; mtx_unlock(&tp->tap_mtx); break; case TAPSDEBUG: tapdebug = *(int *)data; break; case TAPGDEBUG: *(int *)data = tapdebug; break; case TAPGIFNAME: { struct ifreq *ifr = (struct ifreq *) data; strlcpy(ifr->ifr_name, ifp->if_xname, IFNAMSIZ); } break; case FIONBIO: break; case FIOASYNC: mtx_lock(&tp->tap_mtx); if (*(int *)data) tp->tap_flags |= TAP_ASYNC; else tp->tap_flags &= ~TAP_ASYNC; mtx_unlock(&tp->tap_mtx); break; case FIONREAD: if (!IFQ_IS_EMPTY(&ifp->if_snd)) { struct mbuf *mb; IFQ_LOCK(&ifp->if_snd); IFQ_POLL_NOLOCK(&ifp->if_snd, mb); for (*(int *)data = 0; mb != NULL; mb = mb->m_next) *(int *)data += mb->m_len; IFQ_UNLOCK(&ifp->if_snd); } else *(int *)data = 0; break; case FIOSETOWN: return (fsetown(*(int *)data, &tp->tap_sigio)); case FIOGETOWN: *(int *)data = fgetown(&tp->tap_sigio); return (0); /* this is deprecated, FIOSETOWN should be used instead */ case TIOCSPGRP: return (fsetown(-(*(int *)data), &tp->tap_sigio)); /* this is deprecated, FIOGETOWN should be used instead */ case TIOCGPGRP: *(int *)data = -fgetown(&tp->tap_sigio); return (0); /* VMware/VMnet port ioctl's */ #if defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD4) case _IO('V', 0): ival = IOCPARM_IVAL(data); data = (caddr_t)&ival; /* FALLTHROUGH */ #endif case VMIO_SIOCSIFFLAGS: /* VMware/VMnet SIOCSIFFLAGS */ f = *(int *)data; f &= 0x0fff; f &= ~IFF_CANTCHANGE; f |= IFF_UP; mtx_lock(&tp->tap_mtx); ifp->if_flags = f | (ifp->if_flags & IFF_CANTCHANGE); mtx_unlock(&tp->tap_mtx); break; case SIOCGIFADDR: /* get MAC address of the remote side */ mtx_lock(&tp->tap_mtx); bcopy(tp->ether_addr, data, sizeof(tp->ether_addr)); mtx_unlock(&tp->tap_mtx); break; case SIOCSIFADDR: /* set MAC address of the remote side */ mtx_lock(&tp->tap_mtx); bcopy(data, tp->ether_addr, sizeof(tp->ether_addr)); mtx_unlock(&tp->tap_mtx); break; default: return (ENOTTY); } return (0); } /* tapioctl */ /* * tapread * * the cdevsw read interface - reads a packet at a time, or at * least as much of a packet as can be read */ static int tapread(struct cdev *dev, struct uio *uio, int flag) { struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; struct mbuf *m = NULL; int error = 0, len; TAPDEBUG("%s reading, minor = %#x\n", ifp->if_xname, dev2unit(dev)); mtx_lock(&tp->tap_mtx); if ((tp->tap_flags & TAP_READY) != TAP_READY) { mtx_unlock(&tp->tap_mtx); /* Unlocked read. */ TAPDEBUG("%s not ready. minor = %#x, tap_flags = 0x%x\n", ifp->if_xname, dev2unit(dev), tp->tap_flags); return (EHOSTDOWN); } tp->tap_flags &= ~TAP_RWAIT; /* sleep until we get a packet */ do { IF_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { if (flag & O_NONBLOCK) { mtx_unlock(&tp->tap_mtx); return (EWOULDBLOCK); } tp->tap_flags |= TAP_RWAIT; error = mtx_sleep(tp, &tp->tap_mtx, PCATCH | (PZERO + 1), "taprd", 0); if (error) { mtx_unlock(&tp->tap_mtx); return (error); } } } while (m == NULL); mtx_unlock(&tp->tap_mtx); /* feed packet to bpf */ BPF_MTAP(ifp, m); /* xfer packet to user space */ while ((m != NULL) && (uio->uio_resid > 0) && (error == 0)) { len = min(uio->uio_resid, m->m_len); if (len == 0) break; error = uiomove(mtod(m, void *), len, uio); m = m_free(m); } if (m != NULL) { TAPDEBUG("%s dropping mbuf, minor = %#x\n", ifp->if_xname, dev2unit(dev)); m_freem(m); } return (error); } /* tapread */ /* * tapwrite * * the cdevsw write interface - an atomic write is a packet - or else! */ static int tapwrite(struct cdev *dev, struct uio *uio, int flag) { struct ether_header *eh; struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; struct mbuf *m; TAPDEBUG("%s writing, minor = %#x\n", ifp->if_xname, dev2unit(dev)); if (uio->uio_resid == 0) return (0); if ((uio->uio_resid < 0) || (uio->uio_resid > TAPMRU)) { TAPDEBUG("%s invalid packet len = %zd, minor = %#x\n", ifp->if_xname, uio->uio_resid, dev2unit(dev)); return (EIO); } if ((m = m_uiotombuf(uio, M_NOWAIT, 0, ETHER_ALIGN, M_PKTHDR)) == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (ENOBUFS); } m->m_pkthdr.rcvif = ifp; /* * Only pass a unicast frame to ether_input(), if it would actually * have been received by non-virtual hardware. */ if (m->m_len < sizeof(struct ether_header)) { m_freem(m); return (0); } eh = mtod(m, struct ether_header *); if (eh && (ifp->if_flags & IFF_PROMISC) == 0 && !ETHER_IS_MULTICAST(eh->ether_dhost) && bcmp(eh->ether_dhost, IF_LLADDR(ifp), ETHER_ADDR_LEN) != 0) { m_freem(m); return (0); } /* Pass packet up to parent. */ CURVNET_SET(ifp->if_vnet); (*ifp->if_input)(ifp, m); CURVNET_RESTORE(); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); /* ibytes are counted in parent */ return (0); } /* tapwrite */ /* * tappoll * * the poll interface, this is only useful on reads * really. the write detect always returns true, write never blocks * anyway, it either accepts the packet or drops it */ static int tappoll(struct cdev *dev, int events, struct thread *td) { struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; int revents = 0; TAPDEBUG("%s polling, minor = %#x\n", ifp->if_xname, dev2unit(dev)); if (events & (POLLIN | POLLRDNORM)) { IFQ_LOCK(&ifp->if_snd); if (!IFQ_IS_EMPTY(&ifp->if_snd)) { TAPDEBUG("%s have data in queue. len = %d, " \ "minor = %#x\n", ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); revents |= (events & (POLLIN | POLLRDNORM)); } else { TAPDEBUG("%s waiting for data, minor = %#x\n", ifp->if_xname, dev2unit(dev)); selrecord(td, &tp->tap_rsel); } IFQ_UNLOCK(&ifp->if_snd); } if (events & (POLLOUT | POLLWRNORM)) revents |= (events & (POLLOUT | POLLWRNORM)); return (revents); } /* tappoll */ /* * tap_kqfilter * * support for kevent() system call */ static int tapkqfilter(struct cdev *dev, struct knote *kn) { struct tap_softc *tp = dev->si_drv1; struct ifnet *ifp = tp->tap_ifp; switch (kn->kn_filter) { case EVFILT_READ: TAPDEBUG("%s kqfilter: EVFILT_READ, minor = %#x\n", ifp->if_xname, dev2unit(dev)); kn->kn_fop = &tap_read_filterops; break; case EVFILT_WRITE: TAPDEBUG("%s kqfilter: EVFILT_WRITE, minor = %#x\n", ifp->if_xname, dev2unit(dev)); kn->kn_fop = &tap_write_filterops; break; default: TAPDEBUG("%s kqfilter: invalid filter, minor = %#x\n", ifp->if_xname, dev2unit(dev)); return (EINVAL); /* NOT REACHED */ } kn->kn_hook = tp; knlist_add(&tp->tap_rsel.si_note, kn, 0); return (0); } /* tapkqfilter */ /* * tap_kqread * * Return true if there is data in the interface queue */ static int tapkqread(struct knote *kn, long hint) { int ret; struct tap_softc *tp = kn->kn_hook; struct cdev *dev = tp->tap_dev; struct ifnet *ifp = tp->tap_ifp; if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { TAPDEBUG("%s have data in queue. len = %d, minor = %#x\n", ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); ret = 1; } else { TAPDEBUG("%s waiting for data, minor = %#x\n", ifp->if_xname, dev2unit(dev)); ret = 0; } return (ret); } /* tapkqread */ /* * tap_kqwrite * * Always can write. Return the MTU in kn->data */ static int tapkqwrite(struct knote *kn, long hint) { struct tap_softc *tp = kn->kn_hook; struct ifnet *ifp = tp->tap_ifp; kn->kn_data = ifp->if_mtu; return (1); } /* tapkqwrite */ static void tapkqdetach(struct knote *kn) { struct tap_softc *tp = kn->kn_hook; knlist_remove(&tp->tap_rsel.si_note, kn, 0); } /* tapkqdetach */ Index: projects/clang700-import/sys/net/if_tun.c =================================================================== --- projects/clang700-import/sys/net/if_tun.c (revision 339014) +++ projects/clang700-import/sys/net/if_tun.c (revision 339015) @@ -1,1036 +1,1041 @@ /* $NetBSD: if_tun.c,v 1.14 1994/06/29 06:36:25 cgd Exp $ */ /*- * Copyright (c) 1988, Julian Onions * Nottingham University 1987. * * This source may be freely distributed, however I would be interested * in any changes that are made. * * This driver takes packets off the IP i/f and hands them up to a * user process to have its wicked way with. This driver has it's * roots in a similar driver written by Phil Cockcroft (formerly) at * UCL. This driver is based much more on read/write/poll mode of * operation though. * * $FreeBSD$ */ #include "opt_inet.h" #include "opt_inet6.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef INET #include #endif #include #include #include #include #include /* * tun_list is protected by global tunmtx. Other mutable fields are * protected by tun->tun_mtx, or by their owning subsystem. tun_dev is * static for the duration of a tunnel interface. */ struct tun_softc { TAILQ_ENTRY(tun_softc) tun_list; struct cdev *tun_dev; u_short tun_flags; /* misc flags */ #define TUN_OPEN 0x0001 #define TUN_INITED 0x0002 #define TUN_RCOLL 0x0004 #define TUN_IASET 0x0008 #define TUN_DSTADDR 0x0010 #define TUN_LMODE 0x0020 #define TUN_RWAIT 0x0040 #define TUN_ASYNC 0x0080 #define TUN_IFHEAD 0x0100 #define TUN_READY (TUN_OPEN | TUN_INITED) /* * XXXRW: tun_pid is used to exclusively lock /dev/tun. Is this * actually needed? Can we just return EBUSY if already open? * Problem is that this involved inherent races when a tun device * is handed off from one process to another, as opposed to just * being slightly stale informationally. */ pid_t tun_pid; /* owning pid */ struct ifnet *tun_ifp; /* the interface */ struct sigio *tun_sigio; /* information for async I/O */ struct selinfo tun_rsel; /* read select */ struct mtx tun_mtx; /* protect mutable softc fields */ struct cv tun_cv; /* protect against ref'd dev destroy */ }; #define TUN2IFP(sc) ((sc)->tun_ifp) #define TUNDEBUG if (tundebug) if_printf /* * All mutable global variables in if_tun are locked using tunmtx, with * the exception of tundebug, which is used unlocked, and tunclones, * which is static after setup. */ static struct mtx tunmtx; static const char tunname[] = "tun"; static MALLOC_DEFINE(M_TUN, tunname, "Tunnel Interface"); static int tundebug = 0; static int tundclone = 1; static struct clonedevs *tunclones; static TAILQ_HEAD(,tun_softc) tunhead = TAILQ_HEAD_INITIALIZER(tunhead); SYSCTL_INT(_debug, OID_AUTO, if_tun_debug, CTLFLAG_RW, &tundebug, 0, ""); SYSCTL_DECL(_net_link); static SYSCTL_NODE(_net_link, OID_AUTO, tun, CTLFLAG_RW, 0, "IP tunnel software network interface."); SYSCTL_INT(_net_link_tun, OID_AUTO, devfs_cloning, CTLFLAG_RWTUN, &tundclone, 0, "Enable legacy devfs interface creation."); static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev); static void tuncreate(const char *name, struct cdev *dev); static int tunifioctl(struct ifnet *, u_long, caddr_t); static void tuninit(struct ifnet *); static int tunmodevent(module_t, int, void *); static int tunoutput(struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *ro); static void tunstart(struct ifnet *); static int tun_clone_create(struct if_clone *, int, caddr_t); static void tun_clone_destroy(struct ifnet *); static struct if_clone *tun_cloner; static d_open_t tunopen; static d_close_t tunclose; static d_read_t tunread; static d_write_t tunwrite; static d_ioctl_t tunioctl; static d_poll_t tunpoll; static d_kqfilter_t tunkqfilter; static int tunkqread(struct knote *, long); static int tunkqwrite(struct knote *, long); static void tunkqdetach(struct knote *); static struct filterops tun_read_filterops = { .f_isfd = 1, .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqread, }; static struct filterops tun_write_filterops = { .f_isfd = 1, .f_attach = NULL, .f_detach = tunkqdetach, .f_event = tunkqwrite, }; static struct cdevsw tun_cdevsw = { .d_version = D_VERSION, .d_flags = D_NEEDMINOR, .d_open = tunopen, .d_close = tunclose, .d_read = tunread, .d_write = tunwrite, .d_ioctl = tunioctl, .d_poll = tunpoll, .d_kqfilter = tunkqfilter, .d_name = tunname, }; static int tun_clone_create(struct if_clone *ifc, int unit, caddr_t params) { struct cdev *dev; int i; /* find any existing device, or allocate new unit number */ i = clone_create(&tunclones, &tun_cdevsw, &unit, &dev, 0); if (i) { /* No preexisting struct cdev *, create one */ dev = make_dev(&tun_cdevsw, unit, UID_UUCP, GID_DIALER, 0600, "%s%d", tunname, unit); } tuncreate(tunname, dev); return (0); } static void tunclone(void *arg, struct ucred *cred, char *name, int namelen, struct cdev **dev) { char devname[SPECNAMELEN + 1]; int u, i, append_unit; if (*dev != NULL) return; /* * If tun cloning is enabled, only the superuser can create an * interface. */ if (!tundclone || priv_check_cred(cred, PRIV_NET_IFCREATE, 0) != 0) return; if (strcmp(name, tunname) == 0) { u = -1; } else if (dev_stdclone(name, NULL, tunname, &u) != 1) return; /* Don't recognise the name */ if (u != -1 && u > IF_MAXUNIT) return; /* Unit number too high */ if (u == -1) append_unit = 1; else append_unit = 0; CURVNET_SET(CRED_TO_VNET(cred)); /* find any existing device, or allocate new unit number */ i = clone_create(&tunclones, &tun_cdevsw, &u, dev, 0); if (i) { if (append_unit) { namelen = snprintf(devname, sizeof(devname), "%s%d", name, u); name = devname; } /* No preexisting struct cdev *, create one */ *dev = make_dev_credf(MAKEDEV_REF, &tun_cdevsw, u, cred, UID_UUCP, GID_DIALER, 0600, "%s", name); } if_clone_create(name, namelen, NULL); CURVNET_RESTORE(); } static void tun_destroy(struct tun_softc *tp) { struct cdev *dev; mtx_lock(&tp->tun_mtx); if ((tp->tun_flags & TUN_OPEN) != 0) cv_wait_unlock(&tp->tun_cv, &tp->tun_mtx); else mtx_unlock(&tp->tun_mtx); CURVNET_SET(TUN2IFP(tp)->if_vnet); dev = tp->tun_dev; bpfdetach(TUN2IFP(tp)); if_detach(TUN2IFP(tp)); if_free(TUN2IFP(tp)); destroy_dev(dev); seldrain(&tp->tun_rsel); knlist_clear(&tp->tun_rsel.si_note, 0); knlist_destroy(&tp->tun_rsel.si_note); mtx_destroy(&tp->tun_mtx); cv_destroy(&tp->tun_cv); free(tp, M_TUN); CURVNET_RESTORE(); } static void tun_clone_destroy(struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; mtx_lock(&tunmtx); TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); tun_destroy(tp); } static int tunmodevent(module_t mod, int type, void *data) { static eventhandler_tag tag; struct tun_softc *tp; switch (type) { case MOD_LOAD: mtx_init(&tunmtx, "tunmtx", NULL, MTX_DEF); clone_setup(&tunclones); tag = EVENTHANDLER_REGISTER(dev_clone, tunclone, 0, 1000); if (tag == NULL) return (ENOMEM); tun_cloner = if_clone_simple(tunname, tun_clone_create, tun_clone_destroy, 0); break; case MOD_UNLOAD: if_clone_detach(tun_cloner); EVENTHANDLER_DEREGISTER(dev_clone, tag); drain_dev_clone_events(); mtx_lock(&tunmtx); while ((tp = TAILQ_FIRST(&tunhead)) != NULL) { TAILQ_REMOVE(&tunhead, tp, tun_list); mtx_unlock(&tunmtx); tun_destroy(tp); mtx_lock(&tunmtx); } mtx_unlock(&tunmtx); clone_cleanup(&tunclones); mtx_destroy(&tunmtx); break; default: return EOPNOTSUPP; } return 0; } static moduledata_t tun_mod = { "if_tun", tunmodevent, 0 }; DECLARE_MODULE(if_tun, tun_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); MODULE_VERSION(if_tun, 1); static void tunstart(struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; struct mbuf *m; TUNDEBUG(ifp,"%s starting\n", ifp->if_xname); if (ALTQ_IS_ENABLED(&ifp->if_snd)) { IFQ_LOCK(&ifp->if_snd); IFQ_POLL_NOLOCK(&ifp->if_snd, m); if (m == NULL) { IFQ_UNLOCK(&ifp->if_snd); return; } IFQ_UNLOCK(&ifp->if_snd); } mtx_lock(&tp->tun_mtx); if (tp->tun_flags & TUN_RWAIT) { tp->tun_flags &= ~TUN_RWAIT; wakeup(tp); } selwakeuppri(&tp->tun_rsel, PZERO + 1); KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); if (tp->tun_flags & TUN_ASYNC && tp->tun_sigio) { mtx_unlock(&tp->tun_mtx); pgsigio(&tp->tun_sigio, SIGIO, 0); } else mtx_unlock(&tp->tun_mtx); } /* XXX: should return an error code so it can fail. */ static void tuncreate(const char *name, struct cdev *dev) { struct tun_softc *sc; struct ifnet *ifp; sc = malloc(sizeof(*sc), M_TUN, M_WAITOK | M_ZERO); mtx_init(&sc->tun_mtx, "tun_mtx", NULL, MTX_DEF); cv_init(&sc->tun_cv, "tun_condvar"); sc->tun_flags = TUN_INITED; sc->tun_dev = dev; mtx_lock(&tunmtx); TAILQ_INSERT_TAIL(&tunhead, sc, tun_list); mtx_unlock(&tunmtx); ifp = sc->tun_ifp = if_alloc(IFT_PPP); if (ifp == NULL) panic("%s%d: failed to if_alloc() interface.\n", name, dev2unit(dev)); if_initname(ifp, name, dev2unit(dev)); ifp->if_mtu = TUNMTU; ifp->if_ioctl = tunifioctl; ifp->if_output = tunoutput; ifp->if_start = tunstart; ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST; ifp->if_softc = sc; IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen); ifp->if_snd.ifq_drv_maxlen = 0; IFQ_SET_READY(&ifp->if_snd); knlist_init_mtx(&sc->tun_rsel.si_note, &sc->tun_mtx); ifp->if_capabilities |= IFCAP_LINKSTATE; ifp->if_capenable |= IFCAP_LINKSTATE; if_attach(ifp); bpfattach(ifp, DLT_NULL, sizeof(u_int32_t)); dev->si_drv1 = sc; TUNDEBUG(ifp, "interface %s is created, minor = %#x\n", ifp->if_xname, dev2unit(dev)); } static int tunopen(struct cdev *dev, int flag, int mode, struct thread *td) { struct ifnet *ifp; struct tun_softc *tp; /* * XXXRW: Non-atomic test and set of dev->si_drv1 requires * synchronization. */ tp = dev->si_drv1; if (!tp) { tuncreate(tunname, dev); tp = dev->si_drv1; } /* * XXXRW: This use of tun_pid is subject to error due to the * fact that a reference to the tunnel can live beyond the * death of the process that created it. Can we replace this * with a simple busy flag? */ mtx_lock(&tp->tun_mtx); if (tp->tun_pid != 0 && tp->tun_pid != td->td_proc->p_pid) { mtx_unlock(&tp->tun_mtx); return (EBUSY); } tp->tun_pid = td->td_proc->p_pid; tp->tun_flags |= TUN_OPEN; ifp = TUN2IFP(tp); if_link_state_change(ifp, LINK_STATE_UP); TUNDEBUG(ifp, "open\n"); mtx_unlock(&tp->tun_mtx); return (0); } /* * tunclose - close the device - mark i/f down & delete * routing info */ static int tunclose(struct cdev *dev, int foo, int bar, struct thread *td) { struct tun_softc *tp; struct ifnet *ifp; tp = dev->si_drv1; ifp = TUN2IFP(tp); mtx_lock(&tp->tun_mtx); tp->tun_flags &= ~TUN_OPEN; tp->tun_pid = 0; /* * junk all pending output */ CURVNET_SET(ifp->if_vnet); IFQ_PURGE(&ifp->if_snd); if (ifp->if_flags & IFF_UP) { mtx_unlock(&tp->tun_mtx); if_down(ifp); mtx_lock(&tp->tun_mtx); } /* Delete all addresses and routes which reference this interface. */ if (ifp->if_drv_flags & IFF_DRV_RUNNING) { struct ifaddr *ifa; ifp->if_drv_flags &= ~IFF_DRV_RUNNING; mtx_unlock(&tp->tun_mtx); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { /* deal w/IPv4 PtP destination; unlocked read */ if (ifa->ifa_addr->sa_family == AF_INET) { rtinit(ifa, (int)RTM_DELETE, tp->tun_flags & TUN_DSTADDR ? RTF_HOST : 0); } else { rtinit(ifa, (int)RTM_DELETE, 0); } } if_purgeaddrs(ifp); mtx_lock(&tp->tun_mtx); } if_link_state_change(ifp, LINK_STATE_DOWN); CURVNET_RESTORE(); funsetown(&tp->tun_sigio); selwakeuppri(&tp->tun_rsel, PZERO + 1); KNOTE_LOCKED(&tp->tun_rsel.si_note, 0); TUNDEBUG (ifp, "closed\n"); cv_broadcast(&tp->tun_cv); mtx_unlock(&tp->tun_mtx); return (0); } static void tuninit(struct ifnet *ifp) { struct tun_softc *tp = ifp->if_softc; #ifdef INET struct ifaddr *ifa; #endif TUNDEBUG(ifp, "tuninit\n"); mtx_lock(&tp->tun_mtx); ifp->if_flags |= IFF_UP; ifp->if_drv_flags |= IFF_DRV_RUNNING; getmicrotime(&ifp->if_lastchange); #ifdef INET if_addr_rlock(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family == AF_INET) { struct sockaddr_in *si; si = (struct sockaddr_in *)ifa->ifa_addr; if (si->sin_addr.s_addr) tp->tun_flags |= TUN_IASET; si = (struct sockaddr_in *)ifa->ifa_dstaddr; if (si && si->sin_addr.s_addr) tp->tun_flags |= TUN_DSTADDR; } } if_addr_runlock(ifp); #endif mtx_unlock(&tp->tun_mtx); } /* * Process an ioctl request. */ static int tunifioctl(struct ifnet *ifp, u_long cmd, caddr_t data) { struct ifreq *ifr = (struct ifreq *)data; struct tun_softc *tp = ifp->if_softc; struct ifstat *ifs; int error = 0; switch(cmd) { case SIOCGIFSTATUS: ifs = (struct ifstat *)data; mtx_lock(&tp->tun_mtx); if (tp->tun_pid) snprintf(ifs->ascii, sizeof(ifs->ascii), "\tOpened by PID %d\n", tp->tun_pid); else ifs->ascii[0] = '\0'; mtx_unlock(&tp->tun_mtx); break; case SIOCSIFADDR: tuninit(ifp); TUNDEBUG(ifp, "address set\n"); break; case SIOCSIFMTU: ifp->if_mtu = ifr->ifr_mtu; TUNDEBUG(ifp, "mtu set\n"); break; case SIOCSIFFLAGS: case SIOCADDMULTI: case SIOCDELMULTI: break; default: error = EINVAL; } return (error); } /* * tunoutput - queue packets from higher level ready to put out. */ static int tunoutput(struct ifnet *ifp, struct mbuf *m0, const struct sockaddr *dst, struct route *ro) { struct tun_softc *tp = ifp->if_softc; u_short cached_tun_flags; int error; u_int32_t af; TUNDEBUG (ifp, "tunoutput\n"); #ifdef MAC error = mac_ifnet_check_transmit(ifp, m0); if (error) { m_freem(m0); return (error); } #endif /* Could be unlocked read? */ mtx_lock(&tp->tun_mtx); cached_tun_flags = tp->tun_flags; mtx_unlock(&tp->tun_mtx); if ((cached_tun_flags & TUN_READY) != TUN_READY) { TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); m_freem (m0); return (EHOSTDOWN); } if ((ifp->if_flags & IFF_UP) != IFF_UP) { m_freem (m0); return (EHOSTDOWN); } /* BPF writes need to be handled specially. */ if (dst->sa_family == AF_UNSPEC) bcopy(dst->sa_data, &af, sizeof(af)); else af = dst->sa_family; if (bpf_peers_present(ifp->if_bpf)) bpf_mtap2(ifp->if_bpf, &af, sizeof(af), m0); /* prepend sockaddr? this may abort if the mbuf allocation fails */ if (cached_tun_flags & TUN_LMODE) { /* allocate space for sockaddr */ M_PREPEND(m0, dst->sa_len, M_NOWAIT); /* if allocation failed drop packet */ if (m0 == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } else { bcopy(dst, m0->m_data, dst->sa_len); } } if (cached_tun_flags & TUN_IFHEAD) { /* Prepend the address family */ M_PREPEND(m0, 4, M_NOWAIT); /* if allocation failed drop packet */ if (m0 == NULL) { if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1); if_inc_counter(ifp, IFCOUNTER_OERRORS, 1); return (ENOBUFS); } else *(u_int32_t *)m0->m_data = htonl(af); } else { #ifdef INET if (af != AF_INET) #endif { m_freem(m0); return (EAFNOSUPPORT); } } error = (ifp->if_transmit)(ifp, m0); if (error) return (ENOBUFS); if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1); return (0); } /* * the cdevsw interface is now pretty minimal. */ static int tunioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td) { - int error; + struct ifreq ifr; struct tun_softc *tp = dev->si_drv1; struct tuninfo *tunp; + int error; switch (cmd) { case TUNSIFINFO: tunp = (struct tuninfo *)data; - if (tunp->mtu < IF_MINMTU) - return (EINVAL); - if (TUN2IFP(tp)->if_mtu != tunp->mtu) { - error = priv_check(td, PRIV_NET_SETIFMTU); - if (error) - return (error); - } if (TUN2IFP(tp)->if_type != tunp->type) return (EPROTOTYPE); mtx_lock(&tp->tun_mtx); - TUN2IFP(tp)->if_mtu = tunp->mtu; + if (TUN2IFP(tp)->if_mtu != tunp->mtu) { + strncpy(ifr.ifr_name, if_name(TUN2IFP(tp)), IFNAMSIZ); + ifr.ifr_mtu = tunp->mtu; + CURVNET_SET(TUN2IFP(tp)->if_vnet); + error = ifhwioctl(SIOCSIFMTU, TUN2IFP(tp), + (caddr_t)&ifr, td); + CURVNET_RESTORE(); + if (error) { + mtx_unlock(&tp->tun_mtx); + return (error); + } + } TUN2IFP(tp)->if_baudrate = tunp->baudrate; mtx_unlock(&tp->tun_mtx); break; case TUNGIFINFO: tunp = (struct tuninfo *)data; mtx_lock(&tp->tun_mtx); tunp->mtu = TUN2IFP(tp)->if_mtu; tunp->type = TUN2IFP(tp)->if_type; tunp->baudrate = TUN2IFP(tp)->if_baudrate; mtx_unlock(&tp->tun_mtx); break; case TUNSDEBUG: tundebug = *(int *)data; break; case TUNGDEBUG: *(int *)data = tundebug; break; case TUNSLMODE: mtx_lock(&tp->tun_mtx); if (*(int *)data) { tp->tun_flags |= TUN_LMODE; tp->tun_flags &= ~TUN_IFHEAD; } else tp->tun_flags &= ~TUN_LMODE; mtx_unlock(&tp->tun_mtx); break; case TUNSIFHEAD: mtx_lock(&tp->tun_mtx); if (*(int *)data) { tp->tun_flags |= TUN_IFHEAD; tp->tun_flags &= ~TUN_LMODE; } else tp->tun_flags &= ~TUN_IFHEAD; mtx_unlock(&tp->tun_mtx); break; case TUNGIFHEAD: mtx_lock(&tp->tun_mtx); *(int *)data = (tp->tun_flags & TUN_IFHEAD) ? 1 : 0; mtx_unlock(&tp->tun_mtx); break; case TUNSIFMODE: /* deny this if UP */ if (TUN2IFP(tp)->if_flags & IFF_UP) return(EBUSY); switch (*(int *)data & ~IFF_MULTICAST) { case IFF_POINTOPOINT: case IFF_BROADCAST: mtx_lock(&tp->tun_mtx); TUN2IFP(tp)->if_flags &= ~(IFF_BROADCAST|IFF_POINTOPOINT|IFF_MULTICAST); TUN2IFP(tp)->if_flags |= *(int *)data; mtx_unlock(&tp->tun_mtx); break; default: return(EINVAL); } break; case TUNSIFPID: mtx_lock(&tp->tun_mtx); tp->tun_pid = curthread->td_proc->p_pid; mtx_unlock(&tp->tun_mtx); break; case FIONBIO: break; case FIOASYNC: mtx_lock(&tp->tun_mtx); if (*(int *)data) tp->tun_flags |= TUN_ASYNC; else tp->tun_flags &= ~TUN_ASYNC; mtx_unlock(&tp->tun_mtx); break; case FIONREAD: if (!IFQ_IS_EMPTY(&TUN2IFP(tp)->if_snd)) { struct mbuf *mb; IFQ_LOCK(&TUN2IFP(tp)->if_snd); IFQ_POLL_NOLOCK(&TUN2IFP(tp)->if_snd, mb); for (*(int *)data = 0; mb != NULL; mb = mb->m_next) *(int *)data += mb->m_len; IFQ_UNLOCK(&TUN2IFP(tp)->if_snd); } else *(int *)data = 0; break; case FIOSETOWN: return (fsetown(*(int *)data, &tp->tun_sigio)); case FIOGETOWN: *(int *)data = fgetown(&tp->tun_sigio); return (0); /* This is deprecated, FIOSETOWN should be used instead. */ case TIOCSPGRP: return (fsetown(-(*(int *)data), &tp->tun_sigio)); /* This is deprecated, FIOGETOWN should be used instead. */ case TIOCGPGRP: *(int *)data = -fgetown(&tp->tun_sigio); return (0); default: return (ENOTTY); } return (0); } /* * The cdevsw read interface - reads a packet at a time, or at * least as much of a packet as can be read. */ static int tunread(struct cdev *dev, struct uio *uio, int flag) { struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); struct mbuf *m; int error=0, len; TUNDEBUG (ifp, "read\n"); mtx_lock(&tp->tun_mtx); if ((tp->tun_flags & TUN_READY) != TUN_READY) { mtx_unlock(&tp->tun_mtx); TUNDEBUG (ifp, "not ready 0%o\n", tp->tun_flags); return (EHOSTDOWN); } tp->tun_flags &= ~TUN_RWAIT; do { IFQ_DEQUEUE(&ifp->if_snd, m); if (m == NULL) { if (flag & O_NONBLOCK) { mtx_unlock(&tp->tun_mtx); return (EWOULDBLOCK); } tp->tun_flags |= TUN_RWAIT; error = mtx_sleep(tp, &tp->tun_mtx, PCATCH | (PZERO + 1), "tunread", 0); if (error != 0) { mtx_unlock(&tp->tun_mtx); return (error); } } } while (m == NULL); mtx_unlock(&tp->tun_mtx); while (m && uio->uio_resid > 0 && error == 0) { len = min(uio->uio_resid, m->m_len); if (len != 0) error = uiomove(mtod(m, void *), len, uio); m = m_free(m); } if (m) { TUNDEBUG(ifp, "Dropping mbuf\n"); m_freem(m); } return (error); } /* * the cdevsw write interface - an atomic write is a packet - or else! */ static int tunwrite(struct cdev *dev, struct uio *uio, int flag) { struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); struct mbuf *m; uint32_t family, mru; int isr; TUNDEBUG(ifp, "tunwrite\n"); if ((ifp->if_flags & IFF_UP) != IFF_UP) /* ignore silently */ return (0); if (uio->uio_resid == 0) return (0); mru = TUNMRU; if (tp->tun_flags & TUN_IFHEAD) mru += sizeof(family); if (uio->uio_resid < 0 || uio->uio_resid > mru) { TUNDEBUG(ifp, "len=%zd!\n", uio->uio_resid); return (EIO); } if ((m = m_uiotombuf(uio, M_NOWAIT, 0, 0, M_PKTHDR)) == NULL) { if_inc_counter(ifp, IFCOUNTER_IERRORS, 1); return (ENOBUFS); } m->m_pkthdr.rcvif = ifp; #ifdef MAC mac_ifnet_create_mbuf(ifp, m); #endif /* Could be unlocked read? */ mtx_lock(&tp->tun_mtx); if (tp->tun_flags & TUN_IFHEAD) { mtx_unlock(&tp->tun_mtx); if (m->m_len < sizeof(family) && (m = m_pullup(m, sizeof(family))) == NULL) return (ENOBUFS); family = ntohl(*mtod(m, u_int32_t *)); m_adj(m, sizeof(family)); } else { mtx_unlock(&tp->tun_mtx); family = AF_INET; } BPF_MTAP2(ifp, &family, sizeof(family), m); switch (family) { #ifdef INET case AF_INET: isr = NETISR_IP; break; #endif #ifdef INET6 case AF_INET6: isr = NETISR_IPV6; break; #endif default: m_freem(m); return (EAFNOSUPPORT); } random_harvest_queue(m, sizeof(*m), RANDOM_NET_TUN); if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len); if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1); CURVNET_SET(ifp->if_vnet); M_SETFIB(m, ifp->if_fib); netisr_dispatch(isr, m); CURVNET_RESTORE(); return (0); } /* * tunpoll - the poll interface, this is only useful on reads * really. The write detect always returns true, write never blocks * anyway, it either accepts the packet or drops it. */ static int tunpoll(struct cdev *dev, int events, struct thread *td) { struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); int revents = 0; struct mbuf *m; TUNDEBUG(ifp, "tunpoll\n"); if (events & (POLLIN | POLLRDNORM)) { IFQ_LOCK(&ifp->if_snd); IFQ_POLL_NOLOCK(&ifp->if_snd, m); if (m != NULL) { TUNDEBUG(ifp, "tunpoll q=%d\n", ifp->if_snd.ifq_len); revents |= events & (POLLIN | POLLRDNORM); } else { TUNDEBUG(ifp, "tunpoll waiting\n"); selrecord(td, &tp->tun_rsel); } IFQ_UNLOCK(&ifp->if_snd); } if (events & (POLLOUT | POLLWRNORM)) revents |= events & (POLLOUT | POLLWRNORM); return (revents); } /* * tunkqfilter - support for the kevent() system call. */ static int tunkqfilter(struct cdev *dev, struct knote *kn) { struct tun_softc *tp = dev->si_drv1; struct ifnet *ifp = TUN2IFP(tp); switch(kn->kn_filter) { case EVFILT_READ: TUNDEBUG(ifp, "%s kqfilter: EVFILT_READ, minor = %#x\n", ifp->if_xname, dev2unit(dev)); kn->kn_fop = &tun_read_filterops; break; case EVFILT_WRITE: TUNDEBUG(ifp, "%s kqfilter: EVFILT_WRITE, minor = %#x\n", ifp->if_xname, dev2unit(dev)); kn->kn_fop = &tun_write_filterops; break; default: TUNDEBUG(ifp, "%s kqfilter: invalid filter, minor = %#x\n", ifp->if_xname, dev2unit(dev)); return(EINVAL); } kn->kn_hook = tp; knlist_add(&tp->tun_rsel.si_note, kn, 0); return (0); } /* * Return true of there is data in the interface queue. */ static int tunkqread(struct knote *kn, long hint) { int ret; struct tun_softc *tp = kn->kn_hook; struct cdev *dev = tp->tun_dev; struct ifnet *ifp = TUN2IFP(tp); if ((kn->kn_data = ifp->if_snd.ifq_len) > 0) { TUNDEBUG(ifp, "%s have data in the queue. Len = %d, minor = %#x\n", ifp->if_xname, ifp->if_snd.ifq_len, dev2unit(dev)); ret = 1; } else { TUNDEBUG(ifp, "%s waiting for data, minor = %#x\n", ifp->if_xname, dev2unit(dev)); ret = 0; } return (ret); } /* * Always can write, always return MTU in kn->data. */ static int tunkqwrite(struct knote *kn, long hint) { struct tun_softc *tp = kn->kn_hook; struct ifnet *ifp = TUN2IFP(tp); kn->kn_data = ifp->if_mtu; return (1); } static void tunkqdetach(struct knote *kn) { struct tun_softc *tp = kn->kn_hook; knlist_remove(&tp->tun_rsel.si_note, kn, 0); } Index: projects/clang700-import/sys/net/if_var.h =================================================================== --- projects/clang700-import/sys/net/if_var.h (revision 339014) +++ projects/clang700-import/sys/net/if_var.h (revision 339015) @@ -1,775 +1,777 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * From: @(#)if.h 8.1 (Berkeley) 6/10/93 * $FreeBSD$ */ #ifndef _NET_IF_VAR_H_ #define _NET_IF_VAR_H_ /* * Structures defining a network interface, providing a packet * transport mechanism (ala level 0 of the PUP protocols). * * Each interface accepts output datagrams of a specified maximum * length, and provides higher level routines with input datagrams * received from its medium. * * Output occurs when the routine if_output is called, with three parameters: * (*ifp->if_output)(ifp, m, dst, rt) * Here m is the mbuf chain to be sent and dst is the destination address. * The output routine encapsulates the supplied datagram if necessary, * and then transmits it on its medium. * * On input, each interface unwraps the data received by it, and either * places it on the input queue of an internetwork datagram routine * and posts the associated software interrupt, or passes the datagram to a raw * packet input routine. * * Routines exist for locating interfaces by their addresses * or for locating an interface on a certain network, as well as more general * routing and gateway routines maintaining information used to locate * interfaces. These routines live in the files if.c and route.c */ struct rtentry; /* ifa_rtrequest */ struct rt_addrinfo; /* ifa_rtrequest */ struct socket; struct carp_if; struct carp_softc; struct ifvlantrunk; struct route; /* if_output */ struct vnet; struct ifmedia; struct netmap_adapter; struct netdump_methods; #ifdef _KERNEL #include /* ifqueue only? */ #include #include #endif /* _KERNEL */ #include #include #include #include /* XXX */ #include /* struct ifqueue */ #include /* XXX */ #include /* XXX */ #include /* if_link_task */ #define IF_DUNIT_NONE -1 #include CK_STAILQ_HEAD(ifnethead, ifnet); /* we use TAILQs so that the order of */ CK_STAILQ_HEAD(ifaddrhead, ifaddr); /* instantiation is preserved in the list */ CK_STAILQ_HEAD(ifmultihead, ifmultiaddr); CK_STAILQ_HEAD(ifgrouphead, ifg_group); #ifdef _KERNEL VNET_DECLARE(struct pfil_head, link_pfil_hook); /* packet filter hooks */ #define V_link_pfil_hook VNET(link_pfil_hook) #define HHOOK_IPSEC_INET 0 #define HHOOK_IPSEC_INET6 1 #define HHOOK_IPSEC_COUNT 2 VNET_DECLARE(struct hhook_head *, ipsec_hhh_in[HHOOK_IPSEC_COUNT]); VNET_DECLARE(struct hhook_head *, ipsec_hhh_out[HHOOK_IPSEC_COUNT]); #define V_ipsec_hhh_in VNET(ipsec_hhh_in) #define V_ipsec_hhh_out VNET(ipsec_hhh_out) extern epoch_t net_epoch_preempt; extern epoch_t net_epoch; #endif /* _KERNEL */ typedef enum { IFCOUNTER_IPACKETS = 0, IFCOUNTER_IERRORS, IFCOUNTER_OPACKETS, IFCOUNTER_OERRORS, IFCOUNTER_COLLISIONS, IFCOUNTER_IBYTES, IFCOUNTER_OBYTES, IFCOUNTER_IMCASTS, IFCOUNTER_OMCASTS, IFCOUNTER_IQDROPS, IFCOUNTER_OQDROPS, IFCOUNTER_NOPROTO, IFCOUNTERS /* Array size. */ } ift_counter; typedef struct ifnet * if_t; typedef void (*if_start_fn_t)(if_t); typedef int (*if_ioctl_fn_t)(if_t, u_long, caddr_t); typedef void (*if_init_fn_t)(void *); typedef void (*if_qflush_fn_t)(if_t); typedef int (*if_transmit_fn_t)(if_t, struct mbuf *); typedef uint64_t (*if_get_counter_t)(if_t, ift_counter); struct ifnet_hw_tsomax { u_int tsomaxbytes; /* TSO total burst length limit in bytes */ u_int tsomaxsegcount; /* TSO maximum segment count */ u_int tsomaxsegsize; /* TSO maximum segment size in bytes */ }; /* Interface encap request types */ typedef enum { IFENCAP_LL = 1 /* pre-calculate link-layer header */ } ife_type; /* * The structure below allows to request various pre-calculated L2/L3 headers * for different media. Requests varies by type (rtype field). * * IFENCAP_LL type: pre-calculates link header based on address family * and destination lladdr. * * Input data fields: * buf: pointer to destination buffer * bufsize: buffer size * flags: IFENCAP_FLAG_BROADCAST if destination is broadcast * family: address family defined by AF_ constant. * lladdr: pointer to link-layer address * lladdr_len: length of link-layer address * hdata: pointer to L3 header (optional, used for ARP requests). * Output data fields: * buf: encap data is stored here * bufsize: resulting encap length is stored here * lladdr_off: offset of link-layer address from encap hdr start * hdata: L3 header may be altered if necessary */ struct if_encap_req { u_char *buf; /* Destination buffer (w) */ size_t bufsize; /* size of provided buffer (r) */ ife_type rtype; /* request type (r) */ uint32_t flags; /* Request flags (r) */ int family; /* Address family AF_* (r) */ int lladdr_off; /* offset from header start (w) */ int lladdr_len; /* lladdr length (r) */ char *lladdr; /* link-level address pointer (r) */ char *hdata; /* Upper layer header data (rw) */ }; #define IFENCAP_FLAG_BROADCAST 0x02 /* Destination is broadcast */ /* * Network interface send tag support. The storage of "struct * m_snd_tag" comes from the network driver and it is free to allocate * as much additional space as it wants for its own use. */ struct m_snd_tag; #define IF_SND_TAG_TYPE_RATE_LIMIT 0 #define IF_SND_TAG_TYPE_UNLIMITED 1 #define IF_SND_TAG_TYPE_MAX 2 struct if_snd_tag_alloc_header { uint32_t type; /* send tag type, see IF_SND_TAG_XXX */ uint32_t flowid; /* mbuf hash value */ uint32_t flowtype; /* mbuf hash type */ }; struct if_snd_tag_alloc_rate_limit { struct if_snd_tag_alloc_header hdr; uint64_t max_rate; /* in bytes/s */ }; struct if_snd_tag_rate_limit_params { uint64_t max_rate; /* in bytes/s */ uint32_t queue_level; /* 0 (empty) .. 65535 (full) */ #define IF_SND_QUEUE_LEVEL_MIN 0 #define IF_SND_QUEUE_LEVEL_MAX 65535 uint32_t reserved; /* padding */ }; union if_snd_tag_alloc_params { struct if_snd_tag_alloc_header hdr; struct if_snd_tag_alloc_rate_limit rate_limit; struct if_snd_tag_alloc_rate_limit unlimited; }; union if_snd_tag_modify_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; union if_snd_tag_query_params { struct if_snd_tag_rate_limit_params rate_limit; struct if_snd_tag_rate_limit_params unlimited; }; typedef int (if_snd_tag_alloc_t)(struct ifnet *, union if_snd_tag_alloc_params *, struct m_snd_tag **); typedef int (if_snd_tag_modify_t)(struct m_snd_tag *, union if_snd_tag_modify_params *); typedef int (if_snd_tag_query_t)(struct m_snd_tag *, union if_snd_tag_query_params *); typedef void (if_snd_tag_free_t)(struct m_snd_tag *); /* * Structure defining a network interface. */ struct ifnet { /* General book keeping of interface lists. */ CK_STAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained (CK_) */ LIST_ENTRY(ifnet) if_clones; /* interfaces of a cloner */ CK_STAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if (CK_) */ /* protected by if_addr_lock */ u_char if_alloctype; /* if_type at time of allocation */ /* Driver and protocol specific information that remains stable. */ void *if_softc; /* pointer to driver state */ void *if_llsoftc; /* link layer softc */ void *if_l2com; /* pointer to protocol bits */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ u_short if_index; /* numeric abbreviation for this if */ short if_index_reserved; /* spare space to grow if_index */ char if_xname[IFNAMSIZ]; /* external name (name + unit) */ char *if_description; /* interface description */ /* Variable fields that are touched by the stack and drivers. */ int if_flags; /* up/down, broadcast, etc. */ int if_drv_flags; /* driver-managed status flags */ int if_capabilities; /* interface features & capabilities */ int if_capenable; /* enabled features & capabilities */ void *if_linkmib; /* link-type-specific MIB data */ size_t if_linkmiblen; /* length of above data */ u_int if_refcount; /* reference count */ /* These fields are shared with struct if_data. */ uint8_t if_type; /* ethernet, tokenring, etc */ uint8_t if_addrlen; /* media address length */ uint8_t if_hdrlen; /* media header length */ uint8_t if_link_state; /* current link state */ uint32_t if_mtu; /* maximum transmission unit */ uint32_t if_metric; /* routing metric (external only) */ uint64_t if_baudrate; /* linespeed */ uint64_t if_hwassist; /* HW offload capabilities, see IFCAP */ time_t if_epoch; /* uptime at attach or stat reset */ struct timeval if_lastchange; /* time of last administrative change */ struct ifaltq if_snd; /* output queue (includes altq) */ struct task if_linktask; /* task for link change events */ /* Addresses of different protocol families assigned to this if. */ struct mtx if_addr_lock; /* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. * Some code in the kernel assumes that first element * of the list has type AF_LINK, and contains sockaddr_dl * addresses which store the link-level address and the name * of the interface. * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct ifmultihead if_multiaddrs; /* multicast addresses configured */ int if_amcount; /* number of all-multicast requests */ struct ifaddr *if_addr; /* pointer to link-level address */ void *if_hw_addr; /* hardware link-level address */ const u_int8_t *if_broadcastaddr; /* linklevel broadcast bytestring */ struct mtx if_afdata_lock; void *if_afdata[AF_MAX]; int if_afdata_initialized; /* Additional features hung off the interface. */ u_int if_fib; /* interface FIB */ struct vnet *if_vnet; /* pointer to network stack instance */ struct vnet *if_home_vnet; /* where this ifnet originates from */ struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ struct bpf_if *if_bpf; /* packet filter structure */ int if_pcount; /* number of promiscuous listeners */ void *if_bridge; /* bridge glue */ void *if_lagg; /* lagg glue */ void *if_pf_kif; /* pf glue */ struct carp_if *if_carp; /* carp interface structure */ struct label *if_label; /* interface MAC label */ struct netmap_adapter *if_netmap; /* netmap(4) softc */ /* Various procedures of the layer2 encapsulation and drivers. */ int (*if_output) /* output routine (enqueue) */ (struct ifnet *, struct mbuf *, const struct sockaddr *, struct route *); void (*if_input) /* input routine (from h/w driver) */ (struct ifnet *, struct mbuf *); struct mbuf *(*if_bridge_input)(struct ifnet *, struct mbuf *); int (*if_bridge_output)(struct ifnet *, struct mbuf *, struct sockaddr *, struct rtentry *); void (*if_bridge_linkstate)(struct ifnet *ifp); if_start_fn_t if_start; /* initiate output routine */ if_ioctl_fn_t if_ioctl; /* ioctl routine */ if_init_fn_t if_init; /* Init routine */ int (*if_resolvemulti) /* validate/resolve multicast */ (struct ifnet *, struct sockaddr **, struct sockaddr *); if_qflush_fn_t if_qflush; /* flush any queue */ if_transmit_fn_t if_transmit; /* initiate output routine */ void (*if_reassign) /* reassign to vnet routine */ (struct ifnet *, struct vnet *, char *); if_get_counter_t if_get_counter; /* get counter values */ int (*if_requestencap) /* make link header from request */ (struct ifnet *, struct if_encap_req *); /* Statistics. */ counter_u64_t if_counters[IFCOUNTERS]; /* Stuff that's only temporary and doesn't belong here. */ /* * Network adapter TSO limits: * =========================== * * If the "if_hw_tsomax" field is zero the maximum segment * length limit does not apply. If the "if_hw_tsomaxsegcount" * or the "if_hw_tsomaxsegsize" field is zero the TSO segment * count limit does not apply. If all three fields are zero, * there is no TSO limit. * * NOTE: The TSO limits should reflect the values used in the * BUSDMA tag a network adapter is using to load a mbuf chain * for transmission. The TCP/IP network stack will subtract * space for all linklevel and protocol level headers and * ensure that the full mbuf chain passed to the network * adapter fits within the given limits. */ u_int if_hw_tsomax; /* TSO maximum size in bytes */ u_int if_hw_tsomaxsegcount; /* TSO maximum segment count */ u_int if_hw_tsomaxsegsize; /* TSO maximum segment size in bytes */ /* * Network adapter send tag support: */ if_snd_tag_alloc_t *if_snd_tag_alloc; if_snd_tag_modify_t *if_snd_tag_modify; if_snd_tag_query_t *if_snd_tag_query; if_snd_tag_free_t *if_snd_tag_free; /* Ethernet PCP */ uint8_t if_pcp; /* * Netdump hooks to be called while dumping. */ struct netdump_methods *if_netdump_methods; struct epoch_context if_epoch_ctx; struct epoch_tracker if_addr_et; struct epoch_tracker if_maddr_et; /* * Spare fields to be added before branching a stable branch, so * that structure can be enhanced without changing the kernel * binary interface. */ int if_ispare[4]; /* general use */ }; /* for compatibility with other BSDs */ #define if_name(ifp) ((ifp)->if_xname) /* * Locks for address lists on the network interface. */ #define IF_ADDR_LOCK_INIT(if) mtx_init(&(if)->if_addr_lock, "if_addr_lock", NULL, MTX_DEF) #define IF_ADDR_LOCK_DESTROY(if) mtx_destroy(&(if)->if_addr_lock) #define IF_ADDR_RLOCK(if) struct epoch_tracker if_addr_et; epoch_enter_preempt(net_epoch_preempt, &if_addr_et); #define IF_ADDR_RUNLOCK(if) epoch_exit_preempt(net_epoch_preempt, &if_addr_et); #define IF_ADDR_WLOCK(if) mtx_lock(&(if)->if_addr_lock) #define IF_ADDR_WUNLOCK(if) mtx_unlock(&(if)->if_addr_lock) #define IF_ADDR_LOCK_ASSERT(if) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(if)->if_addr_lock)) #define IF_ADDR_WLOCK_ASSERT(if) mtx_assert(&(if)->if_addr_lock, MA_OWNED) #define NET_EPOCH_ENTER() struct epoch_tracker nep_et; epoch_enter_preempt(net_epoch_preempt, &nep_et) #define NET_EPOCH_ENTER_ET(et) epoch_enter_preempt(net_epoch_preempt, &(et)) #define NET_EPOCH_EXIT() epoch_exit_preempt(net_epoch_preempt, &nep_et) #define NET_EPOCH_EXIT_ET(et) epoch_exit_preempt(net_epoch_preempt, &(et)) #define NET_EPOCH_WAIT() epoch_wait_preempt(net_epoch_preempt) /* * Function variations on locking macros intended to be used by loadable * kernel modules in order to divorce them from the internals of address list * locking. */ void if_addr_rlock(struct ifnet *ifp); /* if_addrhead */ void if_addr_runlock(struct ifnet *ifp); /* if_addrhead */ void if_maddr_rlock(if_t ifp); /* if_multiaddrs */ void if_maddr_runlock(if_t ifp); /* if_multiaddrs */ #ifdef _KERNEL #ifdef _SYS_EVENTHANDLER_H_ /* interface link layer address change event */ typedef void (*iflladdr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(iflladdr_event, iflladdr_event_handler_t); /* interface address change event */ typedef void (*ifaddr_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifaddr_event, ifaddr_event_handler_t); /* new interface arrival event */ typedef void (*ifnet_arrival_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_arrival_event, ifnet_arrival_event_handler_t); /* interface departure event */ typedef void (*ifnet_departure_event_handler_t)(void *, struct ifnet *); EVENTHANDLER_DECLARE(ifnet_departure_event, ifnet_departure_event_handler_t); /* Interface link state change event */ typedef void (*ifnet_link_event_handler_t)(void *, struct ifnet *, int); EVENTHANDLER_DECLARE(ifnet_link_event, ifnet_link_event_handler_t); /* Interface up/down event */ #define IFNET_EVENT_UP 0 #define IFNET_EVENT_DOWN 1 #define IFNET_EVENT_PCP 2 /* priority code point, PCP */ typedef void (*ifnet_event_fn)(void *, struct ifnet *ifp, int event); EVENTHANDLER_DECLARE(ifnet_event, ifnet_event_fn); #endif /* _SYS_EVENTHANDLER_H_ */ /* * interface groups */ struct ifg_group { char ifg_group[IFNAMSIZ]; u_int ifg_refcnt; void *ifg_pf_kif; CK_STAILQ_HEAD(, ifg_member) ifg_members; /* (CK_) */ CK_STAILQ_ENTRY(ifg_group) ifg_next; /* (CK_) */ }; struct ifg_member { CK_STAILQ_ENTRY(ifg_member) ifgm_next; /* (CK_) */ struct ifnet *ifgm_ifp; }; struct ifg_list { struct ifg_group *ifgl_group; CK_STAILQ_ENTRY(ifg_list) ifgl_next; /* (CK_) */ }; #ifdef _SYS_EVENTHANDLER_H_ /* group attach event */ typedef void (*group_attach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_attach_event, group_attach_event_handler_t); /* group detach event */ typedef void (*group_detach_event_handler_t)(void *, struct ifg_group *); EVENTHANDLER_DECLARE(group_detach_event, group_detach_event_handler_t); /* group change event */ typedef void (*group_change_event_handler_t)(void *, const char *); EVENTHANDLER_DECLARE(group_change_event, group_change_event_handler_t); #endif /* _SYS_EVENTHANDLER_H_ */ #define IF_AFDATA_LOCK_INIT(ifp) \ mtx_init(&(ifp)->if_afdata_lock, "if_afdata", NULL, MTX_DEF) #define IF_AFDATA_WLOCK(ifp) mtx_lock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RLOCK(ifp) struct epoch_tracker if_afdata_et; epoch_enter_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_WUNLOCK(ifp) mtx_unlock(&(ifp)->if_afdata_lock) #define IF_AFDATA_RUNLOCK(ifp) epoch_exit_preempt(net_epoch_preempt, &if_afdata_et) #define IF_AFDATA_LOCK(ifp) IF_AFDATA_WLOCK(ifp) #define IF_AFDATA_UNLOCK(ifp) IF_AFDATA_WUNLOCK(ifp) #define IF_AFDATA_TRYLOCK(ifp) mtx_trylock(&(ifp)->if_afdata_lock) #define IF_AFDATA_DESTROY(ifp) mtx_destroy(&(ifp)->if_afdata_lock) #define IF_AFDATA_LOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt) || mtx_owned(&(ifp)->if_afdata_lock)) #define IF_AFDATA_RLOCK_ASSERT(ifp) MPASS(in_epoch(net_epoch_preempt)); #define IF_AFDATA_WLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_OWNED) #define IF_AFDATA_UNLOCK_ASSERT(ifp) mtx_assert(&(ifp)->if_afdata_lock, MA_NOTOWNED) /* * 72 was chosen below because it is the size of a TCP/IP * header (40) + the minimum mss (32). */ #define IF_MINMTU 72 #define IF_MAXMTU 65535 #define TOEDEV(ifp) ((ifp)->if_llsoftc) /* * The ifaddr structure contains information about one address * of an interface. They are maintained by the different address families, * are allocated and attached when an address is set, and are linked * together so all addresses for an interface can be located. * * NOTE: a 'struct ifaddr' is always at the beginning of a larger * chunk of malloc'ed memory, where we store the three addresses * (ifa_addr, ifa_dstaddr and ifa_netmask) referenced here. */ struct ifaddr { struct sockaddr *ifa_addr; /* address of interface */ struct sockaddr *ifa_dstaddr; /* other end of p-to-p link */ #define ifa_broadaddr ifa_dstaddr /* broadcast address interface */ struct sockaddr *ifa_netmask; /* used to determine subnet */ struct ifnet *ifa_ifp; /* back-pointer to interface */ struct carp_softc *ifa_carp; /* pointer to CARP data */ CK_STAILQ_ENTRY(ifaddr) ifa_link; /* queue macro glue */ void (*ifa_rtrequest) /* check or clean routes (+ or -)'d */ (int, struct rtentry *, struct rt_addrinfo *); u_short ifa_flags; /* mostly rt_flags for cloning */ #define IFA_ROUTE RTF_UP /* route installed */ #define IFA_RTSELF RTF_HOST /* loopback route to self installed */ u_int ifa_refcnt; /* references to this structure */ counter_u64_t ifa_ipackets; counter_u64_t ifa_opackets; counter_u64_t ifa_ibytes; counter_u64_t ifa_obytes; struct epoch_context ifa_epoch_ctx; }; struct ifaddr * ifa_alloc(size_t size, int flags); void ifa_free(struct ifaddr *ifa); void ifa_ref(struct ifaddr *ifa); /* * Multicast address structure. This is analogous to the ifaddr * structure except that it keeps track of multicast addresses. */ #define IFMA_F_ENQUEUED 0x1 struct ifmultiaddr { CK_STAILQ_ENTRY(ifmultiaddr) ifma_link; /* queue macro glue */ struct sockaddr *ifma_addr; /* address this membership is for */ struct sockaddr *ifma_lladdr; /* link-layer translation, if any */ struct ifnet *ifma_ifp; /* back-pointer to interface */ u_int ifma_refcount; /* reference count */ int ifma_flags; void *ifma_protospec; /* protocol-specific state, if any */ struct ifmultiaddr *ifma_llifma; /* pointer to ifma for ifma_lladdr */ struct epoch_context ifma_epoch_ctx; }; extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #define IFNET_WLOCK() do { \ sx_xlock(&ifnet_sxlock); \ rw_wlock(&ifnet_rwlock); \ } while (0) #define IFNET_WUNLOCK() do { \ rw_wunlock(&ifnet_rwlock); \ sx_xunlock(&ifnet_sxlock); \ } while (0) /* * To assert the ifnet lock, you must know not only whether it's for read or * write, but also whether it was acquired with sleep support or not. */ #define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt)) #define IFNET_WLOCK_ASSERT() do { \ sx_assert(&ifnet_sxlock, SA_XLOCKED); \ rw_assert(&ifnet_rwlock, RA_WLOCKED); \ } while (0) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) #define IFNET_RUNLOCK_NOSLEEP() epoch_exit_preempt(net_epoch_preempt, &ifnet_rlock_et) /* * Look up an ifnet given its index; the _ref variant also acquires a * reference that must be freed using if_rele(). It is almost always a bug * to call ifnet_byindex() instead of ifnet_byindex_ref(). */ struct ifnet *ifnet_byindex(u_short idx); struct ifnet *ifnet_byindex_locked(u_short idx); struct ifnet *ifnet_byindex_ref(u_short idx); /* * Given the index, ifaddr_byindex() returns the one and only * link-level ifaddr for the interface. You are not supposed to use * it to traverse the list of addresses associated to the interface. */ struct ifaddr *ifaddr_byindex(u_short idx); VNET_DECLARE(struct ifnethead, ifnet); VNET_DECLARE(struct ifgrouphead, ifg_head); VNET_DECLARE(int, if_index); VNET_DECLARE(struct ifnet *, loif); /* first loopback interface */ #define V_ifnet VNET(ifnet) #define V_ifg_head VNET(ifg_head) #define V_if_index VNET(if_index) #define V_loif VNET(loif) #ifdef MCAST_VERBOSE #define MCDPRINTF printf #else #define MCDPRINTF(...) #endif int if_addgroup(struct ifnet *, const char *); int if_delgroup(struct ifnet *, const char *); int if_addmulti(struct ifnet *, struct sockaddr *, struct ifmultiaddr **); int if_allmulti(struct ifnet *, int); struct ifnet* if_alloc(u_char); void if_attach(struct ifnet *); void if_dead(struct ifnet *); int if_delmulti(struct ifnet *, struct sockaddr *); void if_delmulti_ifma(struct ifmultiaddr *); void if_delmulti_ifma_flags(struct ifmultiaddr *, int flags); void if_detach(struct ifnet *); void if_purgeaddrs(struct ifnet *); void if_delallmulti(struct ifnet *); void if_down(struct ifnet *); struct ifmultiaddr * if_findmulti(struct ifnet *, const struct sockaddr *); void if_freemulti(struct ifmultiaddr *ifma); void if_free(struct ifnet *); void if_initname(struct ifnet *, const char *, int); void if_link_state_change(struct ifnet *, int); int if_printf(struct ifnet *, const char *, ...) __printflike(2, 3); void if_ref(struct ifnet *); void if_rele(struct ifnet *); int if_setlladdr(struct ifnet *, const u_char *, int); int if_tunnel_check_nesting(struct ifnet *, struct mbuf *, uint32_t, int); void if_up(struct ifnet *); int ifioctl(struct socket *, u_long, caddr_t, struct thread *); int ifpromisc(struct ifnet *, int); struct ifnet *ifunit(const char *); struct ifnet *ifunit_ref(const char *); int ifa_add_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_del_loopback_route(struct ifaddr *, struct sockaddr *); int ifa_switch_loopback_route(struct ifaddr *, struct sockaddr *); struct ifaddr *ifa_ifwithaddr(const struct sockaddr *); int ifa_ifwithaddr_check(const struct sockaddr *); struct ifaddr *ifa_ifwithbroadaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithdstaddr(const struct sockaddr *, int); struct ifaddr *ifa_ifwithnet(const struct sockaddr *, int, int); struct ifaddr *ifa_ifwithroute(int, const struct sockaddr *, struct sockaddr *, u_int); struct ifaddr *ifaof_ifpforaddr(const struct sockaddr *, struct ifnet *); int ifa_preferred(struct ifaddr *, struct ifaddr *); int if_simloop(struct ifnet *ifp, struct mbuf *m, int af, int hlen); typedef void *if_com_alloc_t(u_char type, struct ifnet *ifp); typedef void if_com_free_t(void *com, u_char type); void if_register_com_alloc(u_char type, if_com_alloc_t *a, if_com_free_t *f); void if_deregister_com_alloc(u_char type); void if_data_copy(struct ifnet *, struct if_data *); uint64_t if_get_counter_default(struct ifnet *, ift_counter); void if_inc_counter(struct ifnet *, ift_counter, int64_t); #define IF_LLADDR(ifp) \ LLADDR((struct sockaddr_dl *)((ifp)->if_addr->ifa_addr)) uint64_t if_setbaudrate(if_t ifp, uint64_t baudrate); uint64_t if_getbaudrate(if_t ifp); int if_setcapabilities(if_t ifp, int capabilities); int if_setcapabilitiesbit(if_t ifp, int setbit, int clearbit); int if_getcapabilities(if_t ifp); int if_togglecapenable(if_t ifp, int togglecap); int if_setcapenable(if_t ifp, int capenable); int if_setcapenablebit(if_t ifp, int setcap, int clearcap); int if_getcapenable(if_t ifp); const char *if_getdname(if_t ifp); int if_setdev(if_t ifp, void *dev); int if_setdrvflagbits(if_t ifp, int if_setflags, int clear_flags); int if_getdrvflags(if_t ifp); int if_setdrvflags(if_t ifp, int flags); int if_clearhwassist(if_t ifp); int if_sethwassistbits(if_t ifp, int toset, int toclear); int if_sethwassist(if_t ifp, int hwassist_bit); int if_gethwassist(if_t ifp); int if_setsoftc(if_t ifp, void *softc); void *if_getsoftc(if_t ifp); int if_setflags(if_t ifp, int flags); int if_gethwaddr(if_t ifp, struct ifreq *); int if_setmtu(if_t ifp, int mtu); int if_getmtu(if_t ifp); int if_getmtu_family(if_t ifp, int family); int if_setflagbits(if_t ifp, int set, int clear); int if_getflags(if_t ifp); int if_sendq_empty(if_t ifp); int if_setsendqready(if_t ifp); int if_setsendqlen(if_t ifp, int tx_desc_count); int if_sethwtsomax(if_t ifp, u_int if_hw_tsomax); int if_sethwtsomaxsegcount(if_t ifp, u_int if_hw_tsomaxsegcount); int if_sethwtsomaxsegsize(if_t ifp, u_int if_hw_tsomaxsegsize); u_int if_gethwtsomax(if_t ifp); u_int if_gethwtsomaxsegcount(if_t ifp); u_int if_gethwtsomaxsegsize(if_t ifp); int if_input(if_t ifp, struct mbuf* sendmp); int if_sendq_prepend(if_t ifp, struct mbuf *m); struct mbuf *if_dequeue(if_t ifp); int if_setifheaderlen(if_t ifp, int len); void if_setrcvif(struct mbuf *m, if_t ifp); void if_setvtag(struct mbuf *m, u_int16_t tag); u_int16_t if_getvtag(struct mbuf *m); int if_vlantrunkinuse(if_t ifp); caddr_t if_getlladdr(if_t ifp); void *if_gethandle(u_char); void if_bpfmtap(if_t ifp, struct mbuf *m); void if_etherbpfmtap(if_t ifp, struct mbuf *m); void if_vlancap(if_t ifp); int if_setupmultiaddr(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_array(if_t ifp, void *mta, int *cnt, int max); int if_multiaddr_count(if_t ifp, int max); int if_multi_apply(struct ifnet *ifp, int (*filter)(void *, struct ifmultiaddr *, int), void *arg); int if_getamcount(if_t ifp); struct ifaddr * if_getifaddr(if_t ifp); /* Functions */ void if_setinitfn(if_t ifp, void (*)(void *)); void if_setioctlfn(if_t ifp, int (*)(if_t, u_long, caddr_t)); void if_setstartfn(if_t ifp, void (*)(if_t)); void if_settransmitfn(if_t ifp, if_transmit_fn_t); void if_setqflushfn(if_t ifp, if_qflush_fn_t); void if_setgetcounterfn(if_t ifp, if_get_counter_t); /* Revisit the below. These are inline functions originally */ int drbr_inuse_drv(if_t ifp, struct buf_ring *br); struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br); int drbr_needs_enqueue_drv(if_t ifp, struct buf_ring *br); int drbr_enqueue_drv(if_t ifp, struct buf_ring *br, struct mbuf *m); /* TSO */ void if_hw_tsomax_common(if_t ifp, struct ifnet_hw_tsomax *); int if_hw_tsomax_update(if_t ifp, struct ifnet_hw_tsomax *); /* accessors for struct ifreq */ void *ifr_data_get_ptr(void *ifrp); +int ifhwioctl(u_long, struct ifnet *, caddr_t, struct thread *); + #ifdef DEVICE_POLLING enum poll_cmd { POLL_ONLY, POLL_AND_CHECK_STATUS }; typedef int poll_handler_t(if_t ifp, enum poll_cmd cmd, int count); int ether_poll_register(poll_handler_t *h, if_t ifp); int ether_poll_deregister(if_t ifp); #endif /* DEVICE_POLLING */ #endif /* _KERNEL */ #include /* XXXAO: temporary unconditional include */ #endif /* !_NET_IF_VAR_H_ */ Index: projects/clang700-import/sys/netinet/sctp_output.c =================================================================== --- projects/clang700-import/sys/netinet/sctp_output.c (revision 339014) +++ projects/clang700-import/sys/netinet/sctp_output.c (revision 339015) @@ -1,13855 +1,13855 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2001-2008, by Cisco Systems, Inc. All rights reserved. * Copyright (c) 2008-2012, by Randall Stewart. All rights reserved. * Copyright (c) 2008-2012, by Michael Tuexen. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * a) Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * b) Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * c) Neither the name of Cisco Systems, Inc. nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #if defined(INET) || defined(INET6) #include #endif #include #include #include #define SCTP_MAX_GAPS_INARRAY 4 struct sack_track { uint8_t right_edge; /* mergable on the right edge */ uint8_t left_edge; /* mergable on the left edge */ uint8_t num_entries; uint8_t spare; struct sctp_gap_ack_block gaps[SCTP_MAX_GAPS_INARRAY]; }; const struct sack_track sack_array[256] = { {0, 0, 0, 0, /* 0x00 */ {{0, 0}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x01 */ {{0, 0}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x02 */ {{1, 1}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x03 */ {{0, 1}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x04 */ {{2, 2}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x05 */ {{0, 0}, {2, 2}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x06 */ {{1, 2}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x07 */ {{0, 2}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x08 */ {{3, 3}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x09 */ {{0, 0}, {3, 3}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x0a */ {{1, 1}, {3, 3}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x0b */ {{0, 1}, {3, 3}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x0c */ {{2, 3}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x0d */ {{0, 0}, {2, 3}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x0e */ {{1, 3}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x0f */ {{0, 3}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x10 */ {{4, 4}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x11 */ {{0, 0}, {4, 4}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x12 */ {{1, 1}, {4, 4}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x13 */ {{0, 1}, {4, 4}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x14 */ {{2, 2}, {4, 4}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x15 */ {{0, 0}, {2, 2}, {4, 4}, {0, 0} } }, {0, 0, 2, 0, /* 0x16 */ {{1, 2}, {4, 4}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x17 */ {{0, 2}, {4, 4}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x18 */ {{3, 4}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x19 */ {{0, 0}, {3, 4}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x1a */ {{1, 1}, {3, 4}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x1b */ {{0, 1}, {3, 4}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x1c */ {{2, 4}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x1d */ {{0, 0}, {2, 4}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x1e */ {{1, 4}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x1f */ {{0, 4}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x20 */ {{5, 5}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x21 */ {{0, 0}, {5, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x22 */ {{1, 1}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x23 */ {{0, 1}, {5, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x24 */ {{2, 2}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x25 */ {{0, 0}, {2, 2}, {5, 5}, {0, 0} } }, {0, 0, 2, 0, /* 0x26 */ {{1, 2}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x27 */ {{0, 2}, {5, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x28 */ {{3, 3}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x29 */ {{0, 0}, {3, 3}, {5, 5}, {0, 0} } }, {0, 0, 3, 0, /* 0x2a */ {{1, 1}, {3, 3}, {5, 5}, {0, 0} } }, {1, 0, 3, 0, /* 0x2b */ {{0, 1}, {3, 3}, {5, 5}, {0, 0} } }, {0, 0, 2, 0, /* 0x2c */ {{2, 3}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x2d */ {{0, 0}, {2, 3}, {5, 5}, {0, 0} } }, {0, 0, 2, 0, /* 0x2e */ {{1, 3}, {5, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x2f */ {{0, 3}, {5, 5}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x30 */ {{4, 5}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x31 */ {{0, 0}, {4, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x32 */ {{1, 1}, {4, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x33 */ {{0, 1}, {4, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x34 */ {{2, 2}, {4, 5}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x35 */ {{0, 0}, {2, 2}, {4, 5}, {0, 0} } }, {0, 0, 2, 0, /* 0x36 */ {{1, 2}, {4, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x37 */ {{0, 2}, {4, 5}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x38 */ {{3, 5}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x39 */ {{0, 0}, {3, 5}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x3a */ {{1, 1}, {3, 5}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x3b */ {{0, 1}, {3, 5}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x3c */ {{2, 5}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x3d */ {{0, 0}, {2, 5}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x3e */ {{1, 5}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x3f */ {{0, 5}, {0, 0}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x40 */ {{6, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x41 */ {{0, 0}, {6, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x42 */ {{1, 1}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x43 */ {{0, 1}, {6, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x44 */ {{2, 2}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x45 */ {{0, 0}, {2, 2}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x46 */ {{1, 2}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x47 */ {{0, 2}, {6, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x48 */ {{3, 3}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x49 */ {{0, 0}, {3, 3}, {6, 6}, {0, 0} } }, {0, 0, 3, 0, /* 0x4a */ {{1, 1}, {3, 3}, {6, 6}, {0, 0} } }, {1, 0, 3, 0, /* 0x4b */ {{0, 1}, {3, 3}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x4c */ {{2, 3}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x4d */ {{0, 0}, {2, 3}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x4e */ {{1, 3}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x4f */ {{0, 3}, {6, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x50 */ {{4, 4}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x51 */ {{0, 0}, {4, 4}, {6, 6}, {0, 0} } }, {0, 0, 3, 0, /* 0x52 */ {{1, 1}, {4, 4}, {6, 6}, {0, 0} } }, {1, 0, 3, 0, /* 0x53 */ {{0, 1}, {4, 4}, {6, 6}, {0, 0} } }, {0, 0, 3, 0, /* 0x54 */ {{2, 2}, {4, 4}, {6, 6}, {0, 0} } }, {1, 0, 4, 0, /* 0x55 */ {{0, 0}, {2, 2}, {4, 4}, {6, 6} } }, {0, 0, 3, 0, /* 0x56 */ {{1, 2}, {4, 4}, {6, 6}, {0, 0} } }, {1, 0, 3, 0, /* 0x57 */ {{0, 2}, {4, 4}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x58 */ {{3, 4}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x59 */ {{0, 0}, {3, 4}, {6, 6}, {0, 0} } }, {0, 0, 3, 0, /* 0x5a */ {{1, 1}, {3, 4}, {6, 6}, {0, 0} } }, {1, 0, 3, 0, /* 0x5b */ {{0, 1}, {3, 4}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x5c */ {{2, 4}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x5d */ {{0, 0}, {2, 4}, {6, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x5e */ {{1, 4}, {6, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x5f */ {{0, 4}, {6, 6}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x60 */ {{5, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x61 */ {{0, 0}, {5, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x62 */ {{1, 1}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x63 */ {{0, 1}, {5, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x64 */ {{2, 2}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x65 */ {{0, 0}, {2, 2}, {5, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x66 */ {{1, 2}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x67 */ {{0, 2}, {5, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x68 */ {{3, 3}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x69 */ {{0, 0}, {3, 3}, {5, 6}, {0, 0} } }, {0, 0, 3, 0, /* 0x6a */ {{1, 1}, {3, 3}, {5, 6}, {0, 0} } }, {1, 0, 3, 0, /* 0x6b */ {{0, 1}, {3, 3}, {5, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x6c */ {{2, 3}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x6d */ {{0, 0}, {2, 3}, {5, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x6e */ {{1, 3}, {5, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x6f */ {{0, 3}, {5, 6}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x70 */ {{4, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x71 */ {{0, 0}, {4, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x72 */ {{1, 1}, {4, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x73 */ {{0, 1}, {4, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x74 */ {{2, 2}, {4, 6}, {0, 0}, {0, 0} } }, {1, 0, 3, 0, /* 0x75 */ {{0, 0}, {2, 2}, {4, 6}, {0, 0} } }, {0, 0, 2, 0, /* 0x76 */ {{1, 2}, {4, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x77 */ {{0, 2}, {4, 6}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x78 */ {{3, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x79 */ {{0, 0}, {3, 6}, {0, 0}, {0, 0} } }, {0, 0, 2, 0, /* 0x7a */ {{1, 1}, {3, 6}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x7b */ {{0, 1}, {3, 6}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x7c */ {{2, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 2, 0, /* 0x7d */ {{0, 0}, {2, 6}, {0, 0}, {0, 0} } }, {0, 0, 1, 0, /* 0x7e */ {{1, 6}, {0, 0}, {0, 0}, {0, 0} } }, {1, 0, 1, 0, /* 0x7f */ {{0, 6}, {0, 0}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0x80 */ {{7, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0x81 */ {{0, 0}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0x82 */ {{1, 1}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0x83 */ {{0, 1}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0x84 */ {{2, 2}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x85 */ {{0, 0}, {2, 2}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x86 */ {{1, 2}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0x87 */ {{0, 2}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0x88 */ {{3, 3}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x89 */ {{0, 0}, {3, 3}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0x8a */ {{1, 1}, {3, 3}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0x8b */ {{0, 1}, {3, 3}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x8c */ {{2, 3}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x8d */ {{0, 0}, {2, 3}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x8e */ {{1, 3}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0x8f */ {{0, 3}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0x90 */ {{4, 4}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x91 */ {{0, 0}, {4, 4}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0x92 */ {{1, 1}, {4, 4}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0x93 */ {{0, 1}, {4, 4}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0x94 */ {{2, 2}, {4, 4}, {7, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0x95 */ {{0, 0}, {2, 2}, {4, 4}, {7, 7} } }, {0, 1, 3, 0, /* 0x96 */ {{1, 2}, {4, 4}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0x97 */ {{0, 2}, {4, 4}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x98 */ {{3, 4}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x99 */ {{0, 0}, {3, 4}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0x9a */ {{1, 1}, {3, 4}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0x9b */ {{0, 1}, {3, 4}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x9c */ {{2, 4}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0x9d */ {{0, 0}, {2, 4}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0x9e */ {{1, 4}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0x9f */ {{0, 4}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xa0 */ {{5, 5}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xa1 */ {{0, 0}, {5, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xa2 */ {{1, 1}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xa3 */ {{0, 1}, {5, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xa4 */ {{2, 2}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0xa5 */ {{0, 0}, {2, 2}, {5, 5}, {7, 7} } }, {0, 1, 3, 0, /* 0xa6 */ {{1, 2}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xa7 */ {{0, 2}, {5, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xa8 */ {{3, 3}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0xa9 */ {{0, 0}, {3, 3}, {5, 5}, {7, 7} } }, {0, 1, 4, 0, /* 0xaa */ {{1, 1}, {3, 3}, {5, 5}, {7, 7} } }, {1, 1, 4, 0, /* 0xab */ {{0, 1}, {3, 3}, {5, 5}, {7, 7} } }, {0, 1, 3, 0, /* 0xac */ {{2, 3}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0xad */ {{0, 0}, {2, 3}, {5, 5}, {7, 7} } }, {0, 1, 3, 0, /* 0xae */ {{1, 3}, {5, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xaf */ {{0, 3}, {5, 5}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xb0 */ {{4, 5}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xb1 */ {{0, 0}, {4, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xb2 */ {{1, 1}, {4, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xb3 */ {{0, 1}, {4, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xb4 */ {{2, 2}, {4, 5}, {7, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0xb5 */ {{0, 0}, {2, 2}, {4, 5}, {7, 7} } }, {0, 1, 3, 0, /* 0xb6 */ {{1, 2}, {4, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xb7 */ {{0, 2}, {4, 5}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xb8 */ {{3, 5}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xb9 */ {{0, 0}, {3, 5}, {7, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xba */ {{1, 1}, {3, 5}, {7, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xbb */ {{0, 1}, {3, 5}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xbc */ {{2, 5}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xbd */ {{0, 0}, {2, 5}, {7, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xbe */ {{1, 5}, {7, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xbf */ {{0, 5}, {7, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xc0 */ {{6, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xc1 */ {{0, 0}, {6, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xc2 */ {{1, 1}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xc3 */ {{0, 1}, {6, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xc4 */ {{2, 2}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xc5 */ {{0, 0}, {2, 2}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xc6 */ {{1, 2}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xc7 */ {{0, 2}, {6, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xc8 */ {{3, 3}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xc9 */ {{0, 0}, {3, 3}, {6, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xca */ {{1, 1}, {3, 3}, {6, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xcb */ {{0, 1}, {3, 3}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xcc */ {{2, 3}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xcd */ {{0, 0}, {2, 3}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xce */ {{1, 3}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xcf */ {{0, 3}, {6, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xd0 */ {{4, 4}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xd1 */ {{0, 0}, {4, 4}, {6, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xd2 */ {{1, 1}, {4, 4}, {6, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xd3 */ {{0, 1}, {4, 4}, {6, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xd4 */ {{2, 2}, {4, 4}, {6, 7}, {0, 0} } }, {1, 1, 4, 0, /* 0xd5 */ {{0, 0}, {2, 2}, {4, 4}, {6, 7} } }, {0, 1, 3, 0, /* 0xd6 */ {{1, 2}, {4, 4}, {6, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xd7 */ {{0, 2}, {4, 4}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xd8 */ {{3, 4}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xd9 */ {{0, 0}, {3, 4}, {6, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xda */ {{1, 1}, {3, 4}, {6, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xdb */ {{0, 1}, {3, 4}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xdc */ {{2, 4}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xdd */ {{0, 0}, {2, 4}, {6, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xde */ {{1, 4}, {6, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xdf */ {{0, 4}, {6, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xe0 */ {{5, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xe1 */ {{0, 0}, {5, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xe2 */ {{1, 1}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xe3 */ {{0, 1}, {5, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xe4 */ {{2, 2}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xe5 */ {{0, 0}, {2, 2}, {5, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xe6 */ {{1, 2}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xe7 */ {{0, 2}, {5, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xe8 */ {{3, 3}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xe9 */ {{0, 0}, {3, 3}, {5, 7}, {0, 0} } }, {0, 1, 3, 0, /* 0xea */ {{1, 1}, {3, 3}, {5, 7}, {0, 0} } }, {1, 1, 3, 0, /* 0xeb */ {{0, 1}, {3, 3}, {5, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xec */ {{2, 3}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xed */ {{0, 0}, {2, 3}, {5, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xee */ {{1, 3}, {5, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xef */ {{0, 3}, {5, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xf0 */ {{4, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xf1 */ {{0, 0}, {4, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xf2 */ {{1, 1}, {4, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xf3 */ {{0, 1}, {4, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xf4 */ {{2, 2}, {4, 7}, {0, 0}, {0, 0} } }, {1, 1, 3, 0, /* 0xf5 */ {{0, 0}, {2, 2}, {4, 7}, {0, 0} } }, {0, 1, 2, 0, /* 0xf6 */ {{1, 2}, {4, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xf7 */ {{0, 2}, {4, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xf8 */ {{3, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xf9 */ {{0, 0}, {3, 7}, {0, 0}, {0, 0} } }, {0, 1, 2, 0, /* 0xfa */ {{1, 1}, {3, 7}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xfb */ {{0, 1}, {3, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xfc */ {{2, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 2, 0, /* 0xfd */ {{0, 0}, {2, 7}, {0, 0}, {0, 0} } }, {0, 1, 1, 0, /* 0xfe */ {{1, 7}, {0, 0}, {0, 0}, {0, 0} } }, {1, 1, 1, 0, /* 0xff */ {{0, 7}, {0, 0}, {0, 0}, {0, 0} } } }; int sctp_is_address_in_scope(struct sctp_ifa *ifa, struct sctp_scoping *scope, int do_update) { if ((scope->loopback_scope == 0) && (ifa->ifn_p) && SCTP_IFN_IS_IFT_LOOP(ifa->ifn_p)) { /* * skip loopback if not in scope * */ return (0); } switch (ifa->address.sa.sa_family) { #ifdef INET case AF_INET: if (scope->ipv4_addr_legal) { struct sockaddr_in *sin; sin = &ifa->address.sin; if (sin->sin_addr.s_addr == 0) { /* not in scope , unspecified */ return (0); } if ((scope->ipv4_local_scope == 0) && (IN4_ISPRIVATE_ADDRESS(&sin->sin_addr))) { /* private address not in scope */ return (0); } } else { return (0); } break; #endif #ifdef INET6 case AF_INET6: if (scope->ipv6_addr_legal) { struct sockaddr_in6 *sin6; /* * Must update the flags, bummer, which means any * IFA locks must now be applied HERE <-> */ if (do_update) { sctp_gather_internal_ifa_flags(ifa); } if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { return (0); } /* ok to use deprecated addresses? */ sin6 = &ifa->address.sin6; if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { /* skip unspecifed addresses */ return (0); } if ( /* (local_scope == 0) && */ (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))) { return (0); } if ((scope->site_scope == 0) && (IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr))) { return (0); } } else { return (0); } break; #endif default: return (0); } return (1); } static struct mbuf * sctp_add_addr_to_mbuf(struct mbuf *m, struct sctp_ifa *ifa, uint16_t *len) { #if defined(INET) || defined(INET6) struct sctp_paramhdr *paramh; struct mbuf *mret; uint16_t plen; #endif switch (ifa->address.sa.sa_family) { #ifdef INET case AF_INET: plen = (uint16_t)sizeof(struct sctp_ipv4addr_param); break; #endif #ifdef INET6 case AF_INET6: plen = (uint16_t)sizeof(struct sctp_ipv6addr_param); break; #endif default: return (m); } #if defined(INET) || defined(INET6) if (M_TRAILINGSPACE(m) >= plen) { /* easy side we just drop it on the end */ paramh = (struct sctp_paramhdr *)(SCTP_BUF_AT(m, SCTP_BUF_LEN(m))); mret = m; } else { /* Need more space */ mret = m; while (SCTP_BUF_NEXT(mret) != NULL) { mret = SCTP_BUF_NEXT(mret); } SCTP_BUF_NEXT(mret) = sctp_get_mbuf_for_msg(plen, 0, M_NOWAIT, 1, MT_DATA); if (SCTP_BUF_NEXT(mret) == NULL) { /* We are hosed, can't add more addresses */ return (m); } mret = SCTP_BUF_NEXT(mret); paramh = mtod(mret, struct sctp_paramhdr *); } /* now add the parameter */ switch (ifa->address.sa.sa_family) { #ifdef INET case AF_INET: { struct sctp_ipv4addr_param *ipv4p; struct sockaddr_in *sin; sin = &ifa->address.sin; ipv4p = (struct sctp_ipv4addr_param *)paramh; paramh->param_type = htons(SCTP_IPV4_ADDRESS); paramh->param_length = htons(plen); ipv4p->addr = sin->sin_addr.s_addr; SCTP_BUF_LEN(mret) += plen; break; } #endif #ifdef INET6 case AF_INET6: { struct sctp_ipv6addr_param *ipv6p; struct sockaddr_in6 *sin6; sin6 = &ifa->address.sin6; ipv6p = (struct sctp_ipv6addr_param *)paramh; paramh->param_type = htons(SCTP_IPV6_ADDRESS); paramh->param_length = htons(plen); memcpy(ipv6p->addr, &sin6->sin6_addr, sizeof(ipv6p->addr)); /* clear embedded scope in the address */ in6_clearscope((struct in6_addr *)ipv6p->addr); SCTP_BUF_LEN(mret) += plen; break; } #endif default: return (m); } if (len != NULL) { *len += plen; } return (mret); #endif } struct mbuf * sctp_add_addresses_to_i_ia(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_scoping *scope, struct mbuf *m_at, int cnt_inits_to, uint16_t *padding_len, uint16_t *chunk_len) { struct sctp_vrf *vrf = NULL; int cnt, limit_out = 0, total_count; uint32_t vrf_id; vrf_id = inp->def_vrf_id; SCTP_IPI_ADDR_RLOCK(); vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) { SCTP_IPI_ADDR_RUNLOCK(); return (m_at); } if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { struct sctp_ifa *sctp_ifap; struct sctp_ifn *sctp_ifnp; cnt = cnt_inits_to; if (vrf->total_ifa_count > SCTP_COUNT_LIMIT) { limit_out = 1; cnt = SCTP_ADDRESS_LIMIT; goto skip_count; } LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { if ((scope->loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) { /* * Skip loopback devices if loopback_scope * not set */ continue; } LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { #ifdef INET if ((sctp_ifap->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifap->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifap->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifap->address.sin6.sin6_addr) != 0)) { continue; } #endif if (sctp_is_addr_restricted(stcb, sctp_ifap)) { continue; } if (sctp_is_address_in_scope(sctp_ifap, scope, 1) == 0) { continue; } cnt++; if (cnt > SCTP_ADDRESS_LIMIT) { break; } } if (cnt > SCTP_ADDRESS_LIMIT) { break; } } skip_count: if (cnt > 1) { total_count = 0; LIST_FOREACH(sctp_ifnp, &vrf->ifnlist, next_ifn) { cnt = 0; if ((scope->loopback_scope == 0) && SCTP_IFN_IS_IFT_LOOP(sctp_ifnp)) { /* * Skip loopback devices if * loopback_scope not set */ continue; } LIST_FOREACH(sctp_ifap, &sctp_ifnp->ifalist, next_ifa) { #ifdef INET if ((sctp_ifap->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifap->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifap->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifap->address.sin6.sin6_addr) != 0)) { continue; } #endif if (sctp_is_addr_restricted(stcb, sctp_ifap)) { continue; } if (sctp_is_address_in_scope(sctp_ifap, scope, 0) == 0) { continue; } if ((chunk_len != NULL) && (padding_len != NULL) && (*padding_len > 0)) { memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len); SCTP_BUF_LEN(m_at) += *padding_len; *chunk_len += *padding_len; *padding_len = 0; } m_at = sctp_add_addr_to_mbuf(m_at, sctp_ifap, chunk_len); if (limit_out) { cnt++; total_count++; if (cnt >= 2) { /* * two from each * address */ break; } if (total_count > SCTP_ADDRESS_LIMIT) { /* No more addresses */ break; } } } } } } else { struct sctp_laddr *laddr; cnt = cnt_inits_to; /* First, how many ? */ LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) /* * Address being deleted by the system, dont * list. */ continue; if (laddr->action == SCTP_DEL_IP_ADDRESS) { /* * Address being deleted on this ep don't * list. */ continue; } if (sctp_is_address_in_scope(laddr->ifa, scope, 1) == 0) { continue; } cnt++; } /* * To get through a NAT we only list addresses if we have * more than one. That way if you just bind a single address * we let the source of the init dictate our address. */ if (cnt > 1) { cnt = cnt_inits_to; LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { continue; } if (laddr->ifa->localifa_flags & SCTP_BEING_DELETED) { continue; } if (sctp_is_address_in_scope(laddr->ifa, scope, 0) == 0) { continue; } if ((chunk_len != NULL) && (padding_len != NULL) && (*padding_len > 0)) { memset(mtod(m_at, caddr_t)+*chunk_len, 0, *padding_len); SCTP_BUF_LEN(m_at) += *padding_len; *chunk_len += *padding_len; *padding_len = 0; } m_at = sctp_add_addr_to_mbuf(m_at, laddr->ifa, chunk_len); cnt++; if (cnt >= SCTP_ADDRESS_LIMIT) { break; } } } } SCTP_IPI_ADDR_RUNLOCK(); return (m_at); } static struct sctp_ifa * sctp_is_ifa_addr_preferred(struct sctp_ifa *ifa, uint8_t dest_is_loop, uint8_t dest_is_priv, sa_family_t fam) { uint8_t dest_is_global = 0; /* dest_is_priv is true if destination is a private address */ /* dest_is_loop is true if destination is a loopback addresses */ /** * Here we determine if its a preferred address. A preferred address * means it is the same scope or higher scope then the destination. * L = loopback, P = private, G = global * ----------------------------------------- * src | dest | result * ---------------------------------------- * L | L | yes * ----------------------------------------- * P | L | yes-v4 no-v6 * ----------------------------------------- * G | L | yes-v4 no-v6 * ----------------------------------------- * L | P | no * ----------------------------------------- * P | P | yes * ----------------------------------------- * G | P | no * ----------------------------------------- * L | G | no * ----------------------------------------- * P | G | no * ----------------------------------------- * G | G | yes * ----------------------------------------- */ if (ifa->address.sa.sa_family != fam) { /* forget mis-matched family */ return (NULL); } if ((dest_is_priv == 0) && (dest_is_loop == 0)) { dest_is_global = 1; } SCTPDBG(SCTP_DEBUG_OUTPUT2, "Is destination preferred:"); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ifa->address.sa); /* Ok the address may be ok */ #ifdef INET6 if (fam == AF_INET6) { /* ok to use deprecated addresses? no lets not! */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:1\n"); return (NULL); } if (ifa->src_is_priv && !ifa->src_is_loop) { if (dest_is_loop) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:2\n"); return (NULL); } } if (ifa->src_is_glob) { if (dest_is_loop) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:3\n"); return (NULL); } } } #endif /* * Now that we know what is what, implement or table this could in * theory be done slicker (it used to be), but this is * straightforward and easier to validate :-) */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "src_loop:%d src_priv:%d src_glob:%d\n", ifa->src_is_loop, ifa->src_is_priv, ifa->src_is_glob); SCTPDBG(SCTP_DEBUG_OUTPUT3, "dest_loop:%d dest_priv:%d dest_glob:%d\n", dest_is_loop, dest_is_priv, dest_is_global); if ((ifa->src_is_loop) && (dest_is_priv)) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:4\n"); return (NULL); } if ((ifa->src_is_glob) && (dest_is_priv)) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:5\n"); return (NULL); } if ((ifa->src_is_loop) && (dest_is_global)) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:6\n"); return (NULL); } if ((ifa->src_is_priv) && (dest_is_global)) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "NO:7\n"); return (NULL); } SCTPDBG(SCTP_DEBUG_OUTPUT3, "YES\n"); /* its a preferred address */ return (ifa); } static struct sctp_ifa * sctp_is_ifa_addr_acceptable(struct sctp_ifa *ifa, uint8_t dest_is_loop, uint8_t dest_is_priv, sa_family_t fam) { uint8_t dest_is_global = 0; /** * Here we determine if its a acceptable address. A acceptable * address means it is the same scope or higher scope but we can * allow for NAT which means its ok to have a global dest and a * private src. * * L = loopback, P = private, G = global * ----------------------------------------- * src | dest | result * ----------------------------------------- * L | L | yes * ----------------------------------------- * P | L | yes-v4 no-v6 * ----------------------------------------- * G | L | yes * ----------------------------------------- * L | P | no * ----------------------------------------- * P | P | yes * ----------------------------------------- * G | P | yes - May not work * ----------------------------------------- * L | G | no * ----------------------------------------- * P | G | yes - May not work * ----------------------------------------- * G | G | yes * ----------------------------------------- */ if (ifa->address.sa.sa_family != fam) { /* forget non matching family */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa_fam:%d fam:%d\n", ifa->address.sa.sa_family, fam); return (NULL); } /* Ok the address may be ok */ SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, &ifa->address.sa); SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst_is_loop:%d dest_is_priv:%d\n", dest_is_loop, dest_is_priv); if ((dest_is_loop == 0) && (dest_is_priv == 0)) { dest_is_global = 1; } #ifdef INET6 if (fam == AF_INET6) { /* ok to use deprecated addresses? */ if (ifa->localifa_flags & SCTP_ADDR_IFA_UNUSEABLE) { return (NULL); } if (ifa->src_is_priv) { /* Special case, linklocal to loop */ if (dest_is_loop) return (NULL); } } #endif /* * Now that we know what is what, implement our table. This could in * theory be done slicker (it used to be), but this is * straightforward and easier to validate :-) */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_priv:%d\n", ifa->src_is_loop, dest_is_priv); if ((ifa->src_is_loop == 1) && (dest_is_priv)) { return (NULL); } SCTPDBG(SCTP_DEBUG_OUTPUT3, "ifa->src_is_loop:%d dest_is_glob:%d\n", ifa->src_is_loop, dest_is_global); if ((ifa->src_is_loop == 1) && (dest_is_global)) { return (NULL); } SCTPDBG(SCTP_DEBUG_OUTPUT3, "address is acceptable\n"); /* its an acceptable address */ return (ifa); } int sctp_is_addr_restricted(struct sctp_tcb *stcb, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; if (stcb == NULL) { /* There are no restrictions, no TCB :-) */ return (0); } LIST_FOREACH(laddr, &stcb->asoc.sctp_restricted_addrs, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n", __func__); continue; } if (laddr->ifa == ifa) { /* Yes it is on the list */ return (1); } } return (0); } int sctp_is_addr_in_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) { struct sctp_laddr *laddr; if (ifa == NULL) return (0); LIST_FOREACH(laddr, &inp->sctp_addr_list, sctp_nxt_addr) { if (laddr->ifa == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "%s: NULL ifa\n", __func__); continue; } if ((laddr->ifa == ifa) && laddr->action == 0) /* same pointer */ return (1); } return (0); } static struct sctp_ifa * sctp_choose_boundspecific_inp(struct sctp_inpcb *inp, sctp_route_t *ro, uint32_t vrf_id, int non_asoc_addr_ok, uint8_t dest_is_priv, uint8_t dest_is_loop, sa_family_t fam) { struct sctp_laddr *laddr, *starting_point; void *ifn; int resettotop = 0; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa, *sifa; struct sctp_vrf *vrf; uint32_t ifn_index; vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) return (NULL); ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); sctp_ifn = sctp_find_ifn(ifn, ifn_index); /* * first question, is the ifn we will emit on in our list, if so, we * want such an address. Note that we first looked for a preferred * address. */ if (sctp_ifn) { /* is a preferred one on the interface we route out? */ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (sctp_is_addr_in_ep(inp, sifa)) { atomic_add_int(&sifa->refcount, 1); return (sifa); } } } /* * ok, now we now need to find one on the list of the addresses. We * can't get one on the emitting interface so let's find first a * preferred one. If not that an acceptable one otherwise... we * return NULL. */ starting_point = inp->next_addr_touse; once_again: if (inp->next_addr_touse == NULL) { inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list); resettotop = 1; } for (laddr = inp->next_addr_touse; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { if (laddr->ifa == NULL) { /* address has been removed */ continue; } if (laddr->action == SCTP_DEL_IP_ADDRESS) { /* address is being deleted */ continue; } sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; atomic_add_int(&sifa->refcount, 1); return (sifa); } if (resettotop == 0) { inp->next_addr_touse = NULL; goto once_again; } inp->next_addr_touse = starting_point; resettotop = 0; once_again_too: if (inp->next_addr_touse == NULL) { inp->next_addr_touse = LIST_FIRST(&inp->sctp_addr_list); resettotop = 1; } /* ok, what about an acceptable address in the inp */ for (laddr = inp->next_addr_touse; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { if (laddr->ifa == NULL) { /* address has been removed */ continue; } if (laddr->action == SCTP_DEL_IP_ADDRESS) { /* address is being deleted */ continue; } sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; atomic_add_int(&sifa->refcount, 1); return (sifa); } if (resettotop == 0) { inp->next_addr_touse = NULL; goto once_again_too; } /* * no address bound can be a source for the destination we are in * trouble */ return (NULL); } static struct sctp_ifa * sctp_choose_boundspecific_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_route_t *ro, uint32_t vrf_id, uint8_t dest_is_priv, uint8_t dest_is_loop, int non_asoc_addr_ok, sa_family_t fam) { struct sctp_laddr *laddr, *starting_point; void *ifn; struct sctp_ifn *sctp_ifn; struct sctp_ifa *sctp_ifa, *sifa; uint8_t start_at_beginning = 0; struct sctp_vrf *vrf; uint32_t ifn_index; /* * first question, is the ifn we will emit on in our list, if so, we * want that one. */ vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) return (NULL); ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); sctp_ifn = sctp_find_ifn(ifn, ifn_index); /* * first question, is the ifn we will emit on in our list? If so, * we want that one. First we look for a preferred. Second, we go * for an acceptable. */ if (sctp_ifn) { /* first try for a preferred address on the ep */ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; if (sctp_is_addr_in_ep(inp, sctp_ifa)) { sifa = sctp_is_ifa_addr_preferred(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* on the no-no list */ continue; } atomic_add_int(&sifa->refcount, 1); return (sifa); } } /* next try for an acceptable address on the ep */ LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; if (sctp_is_addr_in_ep(inp, sctp_ifa)) { sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* on the no-no list */ continue; } atomic_add_int(&sifa->refcount, 1); return (sifa); } } } /* * if we can't find one like that then we must look at all addresses * bound to pick one at first preferable then secondly acceptable. */ starting_point = stcb->asoc.last_used_address; sctp_from_the_top: if (stcb->asoc.last_used_address == NULL) { start_at_beginning = 1; stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); } /* search beginning with the last used address */ for (laddr = stcb->asoc.last_used_address; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { if (laddr->ifa == NULL) { /* address has been removed */ continue; } if (laddr->action == SCTP_DEL_IP_ADDRESS) { /* address is being deleted */ continue; } sifa = sctp_is_ifa_addr_preferred(laddr->ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* on the no-no list */ continue; } stcb->asoc.last_used_address = laddr; atomic_add_int(&sifa->refcount, 1); return (sifa); } if (start_at_beginning == 0) { stcb->asoc.last_used_address = NULL; goto sctp_from_the_top; } /* now try for any higher scope than the destination */ stcb->asoc.last_used_address = starting_point; start_at_beginning = 0; sctp_from_the_top2: if (stcb->asoc.last_used_address == NULL) { start_at_beginning = 1; stcb->asoc.last_used_address = LIST_FIRST(&inp->sctp_addr_list); } /* search beginning with the last used address */ for (laddr = stcb->asoc.last_used_address; laddr; laddr = LIST_NEXT(laddr, sctp_nxt_addr)) { if (laddr->ifa == NULL) { /* address has been removed */ continue; } if (laddr->action == SCTP_DEL_IP_ADDRESS) { /* address is being deleted */ continue; } sifa = sctp_is_ifa_addr_acceptable(laddr->ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* on the no-no list */ continue; } stcb->asoc.last_used_address = laddr; atomic_add_int(&sifa->refcount, 1); return (sifa); } if (start_at_beginning == 0) { stcb->asoc.last_used_address = NULL; goto sctp_from_the_top2; } return (NULL); } static struct sctp_ifa * sctp_select_nth_preferred_addr_from_ifn_boundall(struct sctp_ifn *ifn, struct sctp_inpcb *inp, struct sctp_tcb *stcb, int non_asoc_addr_ok, uint8_t dest_is_loop, uint8_t dest_is_priv, int addr_wanted, sa_family_t fam, sctp_route_t *ro ) { struct sctp_ifa *ifa, *sifa; int num_eligible_addr = 0; #ifdef INET6 struct sockaddr_in6 sin6, lsa6; if (fam == AF_INET6) { memcpy(&sin6, &ro->ro_dst, sizeof(struct sockaddr_in6)); (void)sa6_recoverscope(&sin6); } #endif /* INET6 */ LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) { #ifdef INET if ((ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; #ifdef INET6 if (fam == AF_INET6 && dest_is_loop && sifa->src_is_loop && sifa->src_is_priv) { /* * don't allow fe80::1 to be a src on loop ::1, we * don't list it to the peer so we will get an * abort. */ continue; } if (fam == AF_INET6 && IN6_IS_ADDR_LINKLOCAL(&sifa->address.sin6.sin6_addr) && IN6_IS_ADDR_LINKLOCAL(&sin6.sin6_addr)) { /* * link-local <-> link-local must belong to the same * scope. */ memcpy(&lsa6, &sifa->address.sin6, sizeof(struct sockaddr_in6)); (void)sa6_recoverscope(&lsa6); if (sin6.sin6_scope_id != lsa6.sin6_scope_id) { continue; } } #endif /* INET6 */ /* * Check if the IPv6 address matches to next-hop. In the * mobile case, old IPv6 address may be not deleted from the * interface. Then, the interface has previous and new * addresses. We should use one corresponding to the * next-hop. (by micchie) */ #ifdef INET6 if (stcb && fam == AF_INET6 && sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) { if (sctp_v6src_match_nexthop(&sifa->address.sin6, ro) == 0) { continue; } } #endif #ifdef INET /* Avoid topologically incorrect IPv4 address */ if (stcb && fam == AF_INET && sctp_is_mobility_feature_on(stcb->sctp_ep, SCTP_MOBILITY_BASE)) { if (sctp_v4src_match_nexthop(sifa, ro) == 0) { continue; } } #endif if (stcb) { if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) { continue; } if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* * It is restricted for some reason.. * probably not yet added. */ continue; } } if (num_eligible_addr >= addr_wanted) { return (sifa); } num_eligible_addr++; } return (NULL); } static int sctp_count_num_preferred_boundall(struct sctp_ifn *ifn, struct sctp_inpcb *inp, struct sctp_tcb *stcb, int non_asoc_addr_ok, uint8_t dest_is_loop, uint8_t dest_is_priv, sa_family_t fam) { struct sctp_ifa *ifa, *sifa; int num_eligible_addr = 0; LIST_FOREACH(ifa, &ifn->ifalist, next_ifa) { #ifdef INET if ((ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((ifa->address.sa.sa_family == AF_INET6) && (stcb != NULL) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) { continue; } sifa = sctp_is_ifa_addr_preferred(ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) { continue; } if (stcb) { if (sctp_is_address_in_scope(ifa, &stcb->asoc.scope, 0) == 0) { continue; } if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* * It is restricted for some reason.. * probably not yet added. */ continue; } } num_eligible_addr++; } return (num_eligible_addr); } static struct sctp_ifa * sctp_choose_boundall(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net, sctp_route_t *ro, uint32_t vrf_id, uint8_t dest_is_priv, uint8_t dest_is_loop, int non_asoc_addr_ok, sa_family_t fam) { int cur_addr_num = 0, num_preferred = 0; void *ifn; struct sctp_ifn *sctp_ifn, *looked_at = NULL, *emit_ifn; struct sctp_ifa *sctp_ifa, *sifa; uint32_t ifn_index; struct sctp_vrf *vrf; #ifdef INET int retried = 0; #endif /*- * For boundall we can use any address in the association. * If non_asoc_addr_ok is set we can use any address (at least in * theory). So we look for preferred addresses first. If we find one, * we use it. Otherwise we next try to get an address on the * interface, which we should be able to do (unless non_asoc_addr_ok * is false and we are routed out that way). In these cases where we * can't use the address of the interface we go through all the * ifn's looking for an address we can use and fill that in. Punting * means we send back address 0, which will probably cause problems * actually since then IP will fill in the address of the route ifn, * which means we probably already rejected it.. i.e. here comes an * abort :-<. */ vrf = sctp_find_vrf(vrf_id); if (vrf == NULL) return (NULL); ifn = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); ifn_index = SCTP_GET_IF_INDEX_FROM_ROUTE(ro); SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn from route:%p ifn_index:%d\n", ifn, ifn_index); emit_ifn = looked_at = sctp_ifn = sctp_find_ifn(ifn, ifn_index); if (sctp_ifn == NULL) { /* ?? We don't have this guy ?? */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No ifn emit interface?\n"); goto bound_all_plan_b; } SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifn_index:%d name:%s is emit interface\n", ifn_index, sctp_ifn->ifn_name); if (net) { cur_addr_num = net->indx_of_eligible_next_to_use; } num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop, dest_is_priv, fam); SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found %d preferred source addresses for intf:%s\n", num_preferred, sctp_ifn->ifn_name); if (num_preferred == 0) { /* * no eligible addresses, we must use some other interface * address if we can find one. */ goto bound_all_plan_b; } /* * Ok we have num_eligible_addr set with how many we can use, this * may vary from call to call due to addresses being deprecated * etc.. */ if (cur_addr_num >= num_preferred) { cur_addr_num = 0; } /* * select the nth address from the list (where cur_addr_num is the * nth) and 0 is the first one, 1 is the second one etc... */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "cur_addr_num:%d\n", cur_addr_num); sctp_ifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop, dest_is_priv, cur_addr_num, fam, ro); /* if sctp_ifa is NULL something changed??, fall to plan b. */ if (sctp_ifa) { atomic_add_int(&sctp_ifa->refcount, 1); if (net) { /* save off where the next one we will want */ net->indx_of_eligible_next_to_use = cur_addr_num + 1; } return (sctp_ifa); } /* * plan_b: Look at all interfaces and find a preferred address. If * no preferred fall through to plan_c. */ bound_all_plan_b: SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan B\n"); LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "Examine interface %s\n", sctp_ifn->ifn_name); if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* wrong base scope */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "skip\n"); continue; } if ((sctp_ifn == looked_at) && looked_at) { /* already looked at this guy */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "already seen\n"); continue; } num_preferred = sctp_count_num_preferred_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop, dest_is_priv, fam); SCTPDBG(SCTP_DEBUG_OUTPUT2, "Found ifn:%p %d preferred source addresses\n", ifn, num_preferred); if (num_preferred == 0) { /* None on this interface. */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "No preferred -- skipping to next\n"); continue; } SCTPDBG(SCTP_DEBUG_OUTPUT2, "num preferred:%d on interface:%p cur_addr_num:%d\n", num_preferred, (void *)sctp_ifn, cur_addr_num); /* * Ok we have num_eligible_addr set with how many we can * use, this may vary from call to call due to addresses * being deprecated etc.. */ if (cur_addr_num >= num_preferred) { cur_addr_num = 0; } sifa = sctp_select_nth_preferred_addr_from_ifn_boundall(sctp_ifn, inp, stcb, non_asoc_addr_ok, dest_is_loop, dest_is_priv, cur_addr_num, fam, ro); if (sifa == NULL) continue; if (net) { net->indx_of_eligible_next_to_use = cur_addr_num + 1; SCTPDBG(SCTP_DEBUG_OUTPUT2, "we selected %d\n", cur_addr_num); SCTPDBG(SCTP_DEBUG_OUTPUT2, "Source:"); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa); SCTPDBG(SCTP_DEBUG_OUTPUT2, "Dest:"); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &net->ro._l_addr.sa); } atomic_add_int(&sifa->refcount, 1); return (sifa); } #ifdef INET again_with_private_addresses_allowed: #endif /* plan_c: do we have an acceptable address on the emit interface */ sifa = NULL; SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan C: find acceptable on interface\n"); if (emit_ifn == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jump to Plan D - no emit_ifn\n"); goto plan_d; } LIST_FOREACH(sctp_ifa, &emit_ifn->ifalist, next_ifa) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "ifa:%p\n", (void *)sctp_ifa); #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n"); continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "Jailed\n"); continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "Defer\n"); continue; } sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "IFA not acceptable\n"); continue; } if (stcb) { if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) { SCTPDBG(SCTP_DEBUG_OUTPUT2, "NOT in scope\n"); sifa = NULL; continue; } if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* * It is restricted for some reason.. * probably not yet added. */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Its restricted\n"); sifa = NULL; continue; } } atomic_add_int(&sifa->refcount, 1); goto out; } plan_d: /* * plan_d: We are in trouble. No preferred address on the emit * interface. And not even a preferred address on all interfaces. Go * out and see if we can find an acceptable address somewhere * amongst all interfaces. */ SCTPDBG(SCTP_DEBUG_OUTPUT2, "Trying Plan D looked_at is %p\n", (void *)looked_at); LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* wrong base scope */ continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (sifa == NULL) continue; if (stcb) { if (sctp_is_address_in_scope(sifa, &stcb->asoc.scope, 0) == 0) { sifa = NULL; continue; } if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, sifa)) && (!sctp_is_addr_pending(stcb, sifa)))) { /* * It is restricted for some * reason.. probably not yet added. */ sifa = NULL; continue; } } goto out; } } #ifdef INET if (stcb) { if ((retried == 0) && (stcb->asoc.scope.ipv4_local_scope == 0)) { stcb->asoc.scope.ipv4_local_scope = 1; retried = 1; goto again_with_private_addresses_allowed; } else if (retried == 1) { stcb->asoc.scope.ipv4_local_scope = 0; } } #endif out: #ifdef INET if (sifa) { if (retried == 1) { LIST_FOREACH(sctp_ifn, &vrf->ifnlist, next_ifn) { if (dest_is_loop == 0 && SCTP_IFN_IS_IFT_LOOP(sctp_ifn)) { /* wrong base scope */ continue; } LIST_FOREACH(sctp_ifa, &sctp_ifn->ifalist, next_ifa) { struct sctp_ifa *tmp_sifa; #ifdef INET if ((sctp_ifa->address.sa.sa_family == AF_INET) && (prison_check_ip4(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin.sin_addr) != 0)) { continue; } #endif #ifdef INET6 if ((sctp_ifa->address.sa.sa_family == AF_INET6) && (prison_check_ip6(inp->ip_inp.inp.inp_cred, &sctp_ifa->address.sin6.sin6_addr) != 0)) { continue; } #endif if ((sctp_ifa->localifa_flags & SCTP_ADDR_DEFER_USE) && (non_asoc_addr_ok == 0)) continue; tmp_sifa = sctp_is_ifa_addr_acceptable(sctp_ifa, dest_is_loop, dest_is_priv, fam); if (tmp_sifa == NULL) { continue; } if (tmp_sifa == sifa) { continue; } if (stcb) { if (sctp_is_address_in_scope(tmp_sifa, &stcb->asoc.scope, 0) == 0) { continue; } if (((non_asoc_addr_ok == 0) && (sctp_is_addr_restricted(stcb, tmp_sifa))) || (non_asoc_addr_ok && (sctp_is_addr_restricted(stcb, tmp_sifa)) && (!sctp_is_addr_pending(stcb, tmp_sifa)))) { /* * It is restricted * for some reason.. * probably not yet * added. */ continue; } } if ((tmp_sifa->address.sin.sin_family == AF_INET) && (IN4_ISPRIVATE_ADDRESS(&(tmp_sifa->address.sin.sin_addr)))) { sctp_add_local_addr_restricted(stcb, tmp_sifa); } } } } atomic_add_int(&sifa->refcount, 1); } #endif return (sifa); } /* tcb may be NULL */ struct sctp_ifa * sctp_source_address_selection(struct sctp_inpcb *inp, struct sctp_tcb *stcb, sctp_route_t *ro, struct sctp_nets *net, int non_asoc_addr_ok, uint32_t vrf_id) { struct sctp_ifa *answer; uint8_t dest_is_priv, dest_is_loop; sa_family_t fam; #ifdef INET struct sockaddr_in *to = (struct sockaddr_in *)&ro->ro_dst; #endif #ifdef INET6 struct sockaddr_in6 *to6 = (struct sockaddr_in6 *)&ro->ro_dst; #endif /** * Rules: * - Find the route if needed, cache if I can. * - Look at interface address in route, Is it in the bound list. If so we * have the best source. * - If not we must rotate amongst the addresses. * * Cavets and issues * * Do we need to pay attention to scope. We can have a private address * or a global address we are sourcing or sending to. So if we draw * it out * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz * For V4 * ------------------------------------------ * source * dest * result * ----------------------------------------- * Private * Global * NAT * ----------------------------------------- * Private * Private * No problem * ----------------------------------------- * Global * Private * Huh, How will this work? * ----------------------------------------- * Global * Global * No Problem *------------------------------------------ * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz * For V6 *------------------------------------------ * source * dest * result * ----------------------------------------- * Linklocal * Global * * ----------------------------------------- * Linklocal * Linklocal * No problem * ----------------------------------------- * Global * Linklocal * Huh, How will this work? * ----------------------------------------- * Global * Global * No Problem *------------------------------------------ * zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz * * And then we add to that what happens if there are multiple addresses * assigned to an interface. Remember the ifa on a ifn is a linked * list of addresses. So one interface can have more than one IP * address. What happens if we have both a private and a global * address? Do we then use context of destination to sort out which * one is best? And what about NAT's sending P->G may get you a NAT * translation, or should you select the G thats on the interface in * preference. * * Decisions: * * - count the number of addresses on the interface. * - if it is one, no problem except case . * For we will assume a NAT out there. * - if there are more than one, then we need to worry about scope P * or G. We should prefer G -> G and P -> P if possible. * Then as a secondary fall back to mixed types G->P being a last * ditch one. * - The above all works for bound all, but bound specific we need to * use the same concept but instead only consider the bound * addresses. If the bound set is NOT assigned to the interface then * we must use rotation amongst the bound addresses.. */ if (ro->ro_rt == NULL) { /* * Need a route to cache. */ SCTP_RTALLOC(ro, vrf_id, inp->fibnum); } if (ro->ro_rt == NULL) { return (NULL); } fam = ro->ro_dst.sa_family; dest_is_priv = dest_is_loop = 0; /* Setup our scopes for the destination */ switch (fam) { #ifdef INET case AF_INET: /* Scope based on outbound address */ if (IN4_ISLOOPBACK_ADDRESS(&to->sin_addr)) { dest_is_loop = 1; if (net != NULL) { /* mark it as local */ net->addr_is_local = 1; } } else if ((IN4_ISPRIVATE_ADDRESS(&to->sin_addr))) { dest_is_priv = 1; } break; #endif #ifdef INET6 case AF_INET6: /* Scope based on outbound address */ if (IN6_IS_ADDR_LOOPBACK(&to6->sin6_addr) || SCTP_ROUTE_IS_REAL_LOOP(ro)) { /* * If the address is a loopback address, which * consists of "::1" OR "fe80::1%lo0", we are * loopback scope. But we don't use dest_is_priv * (link local addresses). */ dest_is_loop = 1; if (net != NULL) { /* mark it as local */ net->addr_is_local = 1; } } else if (IN6_IS_ADDR_LINKLOCAL(&to6->sin6_addr)) { dest_is_priv = 1; } break; #endif } SCTPDBG(SCTP_DEBUG_OUTPUT2, "Select source addr for:"); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&ro->ro_dst); SCTP_IPI_ADDR_RLOCK(); if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUNDALL) { /* * Bound all case */ answer = sctp_choose_boundall(inp, stcb, net, ro, vrf_id, dest_is_priv, dest_is_loop, non_asoc_addr_ok, fam); SCTP_IPI_ADDR_RUNLOCK(); return (answer); } /* * Subset bound case */ if (stcb) { answer = sctp_choose_boundspecific_stcb(inp, stcb, ro, vrf_id, dest_is_priv, dest_is_loop, non_asoc_addr_ok, fam); } else { answer = sctp_choose_boundspecific_inp(inp, ro, vrf_id, non_asoc_addr_ok, dest_is_priv, dest_is_loop, fam); } SCTP_IPI_ADDR_RUNLOCK(); return (answer); } static int sctp_find_cmsg(int c_type, void *data, struct mbuf *control, size_t cpsize) { struct cmsghdr cmh; struct sctp_sndinfo sndinfo; struct sctp_prinfo prinfo; struct sctp_authinfo authinfo; int tot_len, rem_len, cmsg_data_len, cmsg_data_off, off; int found; /* * Independent of how many mbufs, find the c_type inside the control * structure and copy out the data. */ found = 0; tot_len = SCTP_BUF_LEN(control); for (off = 0; off < tot_len; off += CMSG_ALIGN(cmh.cmsg_len)) { rem_len = tot_len - off; if (rem_len < (int)CMSG_ALIGN(sizeof(cmh))) { /* There is not enough room for one more. */ return (found); } m_copydata(control, off, sizeof(cmh), (caddr_t)&cmh); if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { /* We dont't have a complete CMSG header. */ return (found); } if ((cmh.cmsg_len > INT_MAX) || ((int)cmh.cmsg_len > rem_len)) { /* We don't have the complete CMSG. */ return (found); } cmsg_data_len = (int)cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh)); cmsg_data_off = off + CMSG_ALIGN(sizeof(cmh)); if ((cmh.cmsg_level == IPPROTO_SCTP) && ((c_type == cmh.cmsg_type) || ((c_type == SCTP_SNDRCV) && ((cmh.cmsg_type == SCTP_SNDINFO) || (cmh.cmsg_type == SCTP_PRINFO) || (cmh.cmsg_type == SCTP_AUTHINFO))))) { if (c_type == cmh.cmsg_type) { if (cpsize > INT_MAX) { return (found); } if (cmsg_data_len < (int)cpsize) { return (found); } /* It is exactly what we want. Copy it out. */ m_copydata(control, cmsg_data_off, (int)cpsize, (caddr_t)data); return (1); } else { struct sctp_sndrcvinfo *sndrcvinfo; sndrcvinfo = (struct sctp_sndrcvinfo *)data; if (found == 0) { if (cpsize < sizeof(struct sctp_sndrcvinfo)) { return (found); } memset(sndrcvinfo, 0, sizeof(struct sctp_sndrcvinfo)); } switch (cmh.cmsg_type) { case SCTP_SNDINFO: if (cmsg_data_len < (int)sizeof(struct sctp_sndinfo)) { return (found); } m_copydata(control, cmsg_data_off, sizeof(struct sctp_sndinfo), (caddr_t)&sndinfo); sndrcvinfo->sinfo_stream = sndinfo.snd_sid; sndrcvinfo->sinfo_flags = sndinfo.snd_flags; sndrcvinfo->sinfo_ppid = sndinfo.snd_ppid; sndrcvinfo->sinfo_context = sndinfo.snd_context; sndrcvinfo->sinfo_assoc_id = sndinfo.snd_assoc_id; break; case SCTP_PRINFO: if (cmsg_data_len < (int)sizeof(struct sctp_prinfo)) { return (found); } m_copydata(control, cmsg_data_off, sizeof(struct sctp_prinfo), (caddr_t)&prinfo); if (prinfo.pr_policy != SCTP_PR_SCTP_NONE) { sndrcvinfo->sinfo_timetolive = prinfo.pr_value; } else { sndrcvinfo->sinfo_timetolive = 0; } sndrcvinfo->sinfo_flags |= prinfo.pr_policy; break; case SCTP_AUTHINFO: if (cmsg_data_len < (int)sizeof(struct sctp_authinfo)) { return (found); } m_copydata(control, cmsg_data_off, sizeof(struct sctp_authinfo), (caddr_t)&authinfo); sndrcvinfo->sinfo_keynumber_valid = 1; sndrcvinfo->sinfo_keynumber = authinfo.auth_keynumber; break; default: return (found); } found = 1; } } } return (found); } static int sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *error) { struct cmsghdr cmh; int tlen, at; struct sctp_initmsg initmsg; #ifdef INET struct sockaddr_in sin; #endif #ifdef INET6 struct sockaddr_in6 sin6; #endif tlen = SCTP_BUF_LEN(control); at = 0; while (at < tlen) { if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { /* There is not enough room for one more. */ *error = EINVAL; return (1); } m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { /* We dont't have a complete CMSG header. */ *error = EINVAL; return (1); } if (((int)cmh.cmsg_len + at) > tlen) { /* We don't have the complete CMSG. */ *error = EINVAL; return (1); } if (cmh.cmsg_level == IPPROTO_SCTP) { switch (cmh.cmsg_type) { case SCTP_INIT: if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct sctp_initmsg)) { *error = EINVAL; return (1); } m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct sctp_initmsg), (caddr_t)&initmsg); if (initmsg.sinit_max_attempts) stcb->asoc.max_init_times = initmsg.sinit_max_attempts; if (initmsg.sinit_num_ostreams) stcb->asoc.pre_open_streams = initmsg.sinit_num_ostreams; if (initmsg.sinit_max_instreams) stcb->asoc.max_inbound_streams = initmsg.sinit_max_instreams; if (initmsg.sinit_max_init_timeo) stcb->asoc.initial_init_rto_max = initmsg.sinit_max_init_timeo; if (stcb->asoc.streamoutcnt < stcb->asoc.pre_open_streams) { struct sctp_stream_out *tmp_str; unsigned int i; #if defined(SCTP_DETAILED_STR_STATS) int j; #endif /* Default is NOT correct */ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, default:%d pre_open:%d\n", stcb->asoc.streamoutcnt, stcb->asoc.pre_open_streams); SCTP_TCB_UNLOCK(stcb); SCTP_MALLOC(tmp_str, struct sctp_stream_out *, (stcb->asoc.pre_open_streams * sizeof(struct sctp_stream_out)), SCTP_M_STRMO); SCTP_TCB_LOCK(stcb); if (tmp_str != NULL) { SCTP_FREE(stcb->asoc.strmout, SCTP_M_STRMO); stcb->asoc.strmout = tmp_str; stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt = stcb->asoc.pre_open_streams; } else { stcb->asoc.pre_open_streams = stcb->asoc.streamoutcnt; } for (i = 0; i < stcb->asoc.streamoutcnt; i++) { TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); stcb->asoc.strmout[i].chunks_on_queues = 0; stcb->asoc.strmout[i].next_mid_ordered = 0; stcb->asoc.strmout[i].next_mid_unordered = 0; #if defined(SCTP_DETAILED_STR_STATS) for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) { stcb->asoc.strmout[i].abandoned_sent[j] = 0; stcb->asoc.strmout[i].abandoned_unsent[j] = 0; } #else stcb->asoc.strmout[i].abandoned_sent[0] = 0; stcb->asoc.strmout[i].abandoned_unsent[0] = 0; #endif stcb->asoc.strmout[i].sid = i; stcb->asoc.strmout[i].last_msg_incomplete = 0; stcb->asoc.strmout[i].state = SCTP_STREAM_OPENING; stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL); } } break; #ifdef INET case SCTP_DSTADDRV4: if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { *error = EINVAL; return (1); } memset(&sin, 0, sizeof(struct sockaddr_in)); sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_port = stcb->rport; m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); if ((sin.sin_addr.s_addr == INADDR_ANY) || (sin.sin_addr.s_addr == INADDR_BROADCAST) || IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { *error = EINVAL; return (1); } if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { *error = ENOBUFS; return (1); } break; #endif #ifdef INET6 case SCTP_DSTADDRV6: if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { *error = EINVAL; return (1); } memset(&sin6, 0, sizeof(struct sockaddr_in6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = stcb->rport; m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); if (IN6_IS_ADDR_UNSPECIFIED(&sin6.sin6_addr) || IN6_IS_ADDR_MULTICAST(&sin6.sin6_addr)) { *error = EINVAL; return (1); } #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { in6_sin6_2_sin(&sin, &sin6); if ((sin.sin_addr.s_addr == INADDR_ANY) || (sin.sin_addr.s_addr == INADDR_BROADCAST) || IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) { *error = EINVAL; return (1); } if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin, NULL, stcb->asoc.port, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { *error = ENOBUFS; return (1); } } else #endif if (sctp_add_remote_addr(stcb, (struct sockaddr *)&sin6, NULL, stcb->asoc.port, SCTP_DONOT_SETSCOPE, SCTP_ADDR_IS_CONFIRMED)) { *error = ENOBUFS; return (1); } break; #endif default: break; } } at += CMSG_ALIGN(cmh.cmsg_len); } return (0); } static struct sctp_tcb * sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, uint16_t port, struct mbuf *control, struct sctp_nets **net_p, int *error) { struct cmsghdr cmh; int tlen, at; struct sctp_tcb *stcb; struct sockaddr *addr; #ifdef INET struct sockaddr_in sin; #endif #ifdef INET6 struct sockaddr_in6 sin6; #endif tlen = SCTP_BUF_LEN(control); at = 0; while (at < tlen) { if ((tlen - at) < (int)CMSG_ALIGN(sizeof(cmh))) { /* There is not enough room for one more. */ *error = EINVAL; return (NULL); } m_copydata(control, at, sizeof(cmh), (caddr_t)&cmh); if (cmh.cmsg_len < CMSG_ALIGN(sizeof(cmh))) { /* We dont't have a complete CMSG header. */ *error = EINVAL; return (NULL); } if (((int)cmh.cmsg_len + at) > tlen) { /* We don't have the complete CMSG. */ *error = EINVAL; return (NULL); } if (cmh.cmsg_level == IPPROTO_SCTP) { switch (cmh.cmsg_type) { #ifdef INET case SCTP_DSTADDRV4: if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in_addr)) { *error = EINVAL; return (NULL); } memset(&sin, 0, sizeof(struct sockaddr_in)); sin.sin_family = AF_INET; sin.sin_len = sizeof(struct sockaddr_in); sin.sin_port = port; m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in_addr), (caddr_t)&sin.sin_addr); addr = (struct sockaddr *)&sin; break; #endif #ifdef INET6 case SCTP_DSTADDRV6: if ((size_t)(cmh.cmsg_len - CMSG_ALIGN(sizeof(cmh))) < sizeof(struct in6_addr)) { *error = EINVAL; return (NULL); } memset(&sin6, 0, sizeof(struct sockaddr_in6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(struct sockaddr_in6); sin6.sin6_port = port; m_copydata(control, at + CMSG_ALIGN(sizeof(cmh)), sizeof(struct in6_addr), (caddr_t)&sin6.sin6_addr); #ifdef INET if (IN6_IS_ADDR_V4MAPPED(&sin6.sin6_addr)) { in6_sin6_2_sin(&sin, &sin6); addr = (struct sockaddr *)&sin; } else #endif addr = (struct sockaddr *)&sin6; break; #endif default: addr = NULL; break; } if (addr) { stcb = sctp_findassociation_ep_addr(inp_p, addr, net_p, NULL, NULL); if (stcb != NULL) { return (stcb); } } } at += CMSG_ALIGN(cmh.cmsg_len); } return (NULL); } static struct mbuf * sctp_add_cookie(struct mbuf *init, int init_offset, struct mbuf *initack, int initack_offset, struct sctp_state_cookie *stc_in, uint8_t **signature) { struct mbuf *copy_init, *copy_initack, *m_at, *sig, *mret; struct sctp_state_cookie *stc; struct sctp_paramhdr *ph; uint8_t *foo; int sig_offset; uint16_t cookie_sz; mret = sctp_get_mbuf_for_msg((sizeof(struct sctp_state_cookie) + sizeof(struct sctp_paramhdr)), 0, M_NOWAIT, 1, MT_DATA); if (mret == NULL) { return (NULL); } copy_init = SCTP_M_COPYM(init, init_offset, M_COPYALL, M_NOWAIT); if (copy_init == NULL) { sctp_m_freem(mret); return (NULL); } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(copy_init, SCTP_MBUF_ICOPY); } #endif copy_initack = SCTP_M_COPYM(initack, initack_offset, M_COPYALL, M_NOWAIT); if (copy_initack == NULL) { sctp_m_freem(mret); sctp_m_freem(copy_init); return (NULL); } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(copy_initack, SCTP_MBUF_ICOPY); } #endif /* easy side we just drop it on the end */ ph = mtod(mret, struct sctp_paramhdr *); SCTP_BUF_LEN(mret) = sizeof(struct sctp_state_cookie) + sizeof(struct sctp_paramhdr); stc = (struct sctp_state_cookie *)((caddr_t)ph + sizeof(struct sctp_paramhdr)); ph->param_type = htons(SCTP_STATE_COOKIE); ph->param_length = 0; /* fill in at the end */ /* Fill in the stc cookie data */ memcpy(stc, stc_in, sizeof(struct sctp_state_cookie)); /* tack the INIT and then the INIT-ACK onto the chain */ cookie_sz = 0; for (m_at = mret; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { SCTP_BUF_NEXT(m_at) = copy_init; break; } } for (m_at = copy_init; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { SCTP_BUF_NEXT(m_at) = copy_initack; break; } } for (m_at = copy_initack; m_at; m_at = SCTP_BUF_NEXT(m_at)) { cookie_sz += SCTP_BUF_LEN(m_at); if (SCTP_BUF_NEXT(m_at) == NULL) { break; } } sig = sctp_get_mbuf_for_msg(SCTP_SECRET_SIZE, 0, M_NOWAIT, 1, MT_DATA); if (sig == NULL) { /* no space, so free the entire chain */ sctp_m_freem(mret); return (NULL); } SCTP_BUF_LEN(sig) = 0; SCTP_BUF_NEXT(m_at) = sig; sig_offset = 0; foo = (uint8_t *)(mtod(sig, caddr_t)+sig_offset); memset(foo, 0, SCTP_SIGNATURE_SIZE); *signature = foo; SCTP_BUF_LEN(sig) += SCTP_SIGNATURE_SIZE; cookie_sz += SCTP_SIGNATURE_SIZE; ph->param_length = htons(cookie_sz); return (mret); } static uint8_t sctp_get_ect(struct sctp_tcb *stcb) { if ((stcb != NULL) && (stcb->asoc.ecn_supported == 1)) { return (SCTP_ECT0_BIT); } else { return (0); } } #if defined(INET) || defined(INET6) static void sctp_handle_no_route(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "dropped packet - no valid source addr\n"); if (net) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Destination was "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT1, &net->ro._l_addr.sa); if (net->dest_state & SCTP_ADDR_CONFIRMED) { if ((net->dest_state & SCTP_ADDR_REACHABLE) && stcb) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "no route takes interface %p down\n", (void *)net); sctp_ulp_notify(SCTP_NOTIFY_INTERFACE_DOWN, stcb, 0, (void *)net, so_locked); net->dest_state &= ~SCTP_ADDR_REACHABLE; net->dest_state &= ~SCTP_ADDR_PF; } } if (stcb) { if (net == stcb->asoc.primary_destination) { /* need a new primary */ struct sctp_nets *alt; alt = sctp_find_alternate_net(stcb, net, 0); if (alt != net) { if (stcb->asoc.alternate) { sctp_free_remote_addr(stcb->asoc.alternate); } stcb->asoc.alternate = alt; atomic_add_int(&stcb->asoc.alternate->ref_count, 1); if (net->ro._s_addr) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; } net->src_addr_selected = 0; } } } } } #endif static int sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, /* may be NULL */ struct sctp_nets *net, struct sockaddr *to, struct mbuf *m, uint32_t auth_offset, struct sctp_auth_chunk *auth, uint16_t auth_keyid, int nofragment_flag, int ecn_ok, int out_of_asoc_ok, uint16_t src_port, uint16_t dest_port, uint32_t v_tag, uint16_t port, union sctp_sockstore *over_addr, uint8_t mflowtype, uint32_t mflowid, #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) int so_locked SCTP_UNUSED #else int so_locked #endif ) { /* nofragment_flag to tell if IP_DF should be set (IPv4 only) */ /** * Given a mbuf chain (via SCTP_BUF_NEXT()) that holds a packet header * WITH an SCTPHDR but no IP header, endpoint inp and sa structure: * - fill in the HMAC digest of any AUTH chunk in the packet. * - calculate and fill in the SCTP checksum. * - prepend an IP address header. * - if boundall use INADDR_ANY. * - if boundspecific do source address selection. * - set fragmentation option for ipV4. * - On return from IP output, check/adjust mtu size of output * interface and smallest_mtu size as well. */ /* Will need ifdefs around this */ struct mbuf *newm; struct sctphdr *sctphdr; int packet_length; int ret; #if defined(INET) || defined(INET6) uint32_t vrf_id; #endif #if defined(INET) || defined(INET6) struct mbuf *o_pak; sctp_route_t *ro = NULL; struct udphdr *udp = NULL; #endif uint8_t tos_value; #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) struct socket *so = NULL; #endif if ((net) && (net->dest_state & SCTP_ADDR_OUT_OF_SCOPE)) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT); sctp_m_freem(m); return (EFAULT); } #if defined(INET) || defined(INET6) if (stcb) { vrf_id = stcb->asoc.vrf_id; } else { vrf_id = inp->def_vrf_id; } #endif /* fill in the HMAC digest for any AUTH chunk in the packet */ if ((auth != NULL) && (stcb != NULL)) { sctp_fill_hmac_digest_m(m, auth_offset, auth, stcb, auth_keyid); } if (net) { tos_value = net->dscp; } else if (stcb) { tos_value = stcb->asoc.default_dscp; } else { tos_value = inp->sctp_ep.default_dscp; } switch (to->sa_family) { #ifdef INET case AF_INET: { struct ip *ip = NULL; sctp_route_t iproute; int len; len = SCTP_MIN_V4_OVERHEAD; if (port) { len += sizeof(struct udphdr); } newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA); if (newm == NULL) { sctp_m_freem(m); SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_ALIGN_TO_END(newm, len); SCTP_BUF_LEN(newm) = len; SCTP_BUF_NEXT(newm) = m; m = newm; if (net != NULL) { m->m_pkthdr.flowid = net->flowid; M_HASHTYPE_SET(m, net->flowtype); } else { m->m_pkthdr.flowid = mflowid; M_HASHTYPE_SET(m, mflowtype); } packet_length = sctp_calculate_len(m); ip = mtod(m, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = (sizeof(struct ip) >> 2); if (tos_value == 0) { /* * This means especially, that it is not set * at the SCTP layer. So use the value from * the IP layer. */ tos_value = inp->ip_inp.inp.inp_ip_tos; } tos_value &= 0xfc; if (ecn_ok) { tos_value |= sctp_get_ect(stcb); } if ((nofragment_flag) && (port == 0)) { ip->ip_off = htons(IP_DF); } else { ip->ip_off = htons(0); } /* FreeBSD has a function for ip_id's */ ip_fillid(ip); ip->ip_ttl = inp->ip_inp.inp.inp_ip_ttl; ip->ip_len = htons(packet_length); ip->ip_tos = tos_value; if (port) { ip->ip_p = IPPROTO_UDP; } else { ip->ip_p = IPPROTO_SCTP; } ip->ip_sum = 0; if (net == NULL) { ro = &iproute; memset(&iproute, 0, sizeof(iproute)); memcpy(&ro->ro_dst, to, to->sa_len); } else { ro = (sctp_route_t *)&net->ro; } /* Now the address selection part */ ip->ip_dst.s_addr = ((struct sockaddr_in *)to)->sin_addr.s_addr; /* call the routine to select the src address */ if (net && out_of_asoc_ok == 0) { if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; if (ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } if (net->src_addr_selected == 0) { /* Cache the source address */ net->ro._s_addr = sctp_source_address_selection(inp, stcb, ro, net, 0, vrf_id); net->src_addr_selected = 1; } if (net->ro._s_addr == NULL) { /* No route to host */ net->src_addr_selected = 0; sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } ip->ip_src = net->ro._s_addr->address.sin.sin_addr; } else { if (over_addr == NULL) { struct sctp_ifa *_lsrc; _lsrc = sctp_source_address_selection(inp, stcb, ro, net, out_of_asoc_ok, vrf_id); if (_lsrc == NULL) { sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } ip->ip_src = _lsrc->address.sin.sin_addr; sctp_free_ifa(_lsrc); } else { ip->ip_src = over_addr->sin.sin_addr; SCTP_RTALLOC(ro, vrf_id, inp->fibnum); } } if (port) { if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } udp = (struct udphdr *)((caddr_t)ip + sizeof(struct ip)); udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); udp->uh_dport = port; udp->uh_ulen = htons((uint16_t)(packet_length - sizeof(struct ip))); if (V_udp_cksum) { udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); } else { udp->uh_sum = 0; } sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); } else { sctphdr = (struct sctphdr *)((caddr_t)ip + sizeof(struct ip)); } sctphdr->src_port = src_port; sctphdr->dest_port = dest_port; sctphdr->v_tag = v_tag; sctphdr->checksum = 0; /* * If source address selection fails and we find no * route then the ip_output should fail as well with * a NO_ROUTE_TO_HOST type error. We probably should * catch that somewhere and abort the association * right away (assuming this is an INIT being sent). */ if (ro->ro_rt == NULL) { /* * src addr selection failed to find a route * (or valid source addr), so we can't get * there from here (yet)! */ sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } if (ro != &iproute) { memcpy(&iproute, ro, sizeof(*ro)); } SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv4 output routine from low level src addr:%x\n", (uint32_t)(ntohl(ip->ip_src.s_addr))); SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n", (uint32_t)(ntohl(ip->ip_dst.s_addr))); SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n", (void *)ro->ro_rt); if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { /* failed to prepend data, give up */ SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); sctp_m_freem(m); return (ENOMEM); } SCTP_ATTACH_CHAIN(o_pak, m, packet_length); if (port) { sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); if (V_udp_cksum) { SCTP_ENABLE_UDP_CSUM(o_pak); } } else { m->m_pkthdr.csum_flags = CSUM_SCTP; m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum); SCTP_STAT_INCR(sctps_sendhwcrc); } #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) sctp_packet_log(o_pak); #endif /* send it out. table id is taken from stcb */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { so = SCTP_INP_SO(inp); SCTP_SOCKET_UNLOCK(so, 0); } #endif SCTP_PROBE5(send, NULL, stcb, ip, stcb, sctphdr); SCTP_IP_OUTPUT(ret, o_pak, ro, stcb, vrf_id); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 0); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); } #endif SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); if (ret) SCTP_STAT_INCR(sctps_senderrors); SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret); if (net == NULL) { /* free tempy routes */ RO_RTFREE(ro); } else { if ((ro->ro_rt != NULL) && (net->ro._s_addr) && ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) { uint32_t mtu; mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); if (mtu > 0) { if (net->port) { mtu -= sizeof(struct udphdr); } if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) { sctp_mtu_size_reset(inp, &stcb->asoc, mtu); } net->mtu = mtu; } } else if (ro->ro_rt == NULL) { /* route was freed */ if (net->ro._s_addr && net->src_addr_selected) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; } net->src_addr_selected = 0; } } return (ret); } #endif #ifdef INET6 case AF_INET6: { uint32_t flowlabel, flowinfo; struct ip6_hdr *ip6h; struct route_in6 ip6route; struct ifnet *ifp; struct sockaddr_in6 *sin6, tmp, *lsa6, lsa6_tmp; int prev_scope = 0; struct sockaddr_in6 lsa6_storage; int error; u_short prev_port = 0; int len; if (net) { flowlabel = net->flowlabel; } else if (stcb) { flowlabel = stcb->asoc.default_flowlabel; } else { flowlabel = inp->sctp_ep.default_flowlabel; } if (flowlabel == 0) { /* * This means especially, that it is not set * at the SCTP layer. So use the value from * the IP layer. */ flowlabel = ntohl(((struct in6pcb *)inp)->in6p_flowinfo); } flowlabel &= 0x000fffff; len = SCTP_MIN_OVERHEAD; if (port) { len += sizeof(struct udphdr); } newm = sctp_get_mbuf_for_msg(len, 1, M_NOWAIT, 1, MT_DATA); if (newm == NULL) { sctp_m_freem(m); SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_ALIGN_TO_END(newm, len); SCTP_BUF_LEN(newm) = len; SCTP_BUF_NEXT(newm) = m; m = newm; if (net != NULL) { m->m_pkthdr.flowid = net->flowid; M_HASHTYPE_SET(m, net->flowtype); } else { m->m_pkthdr.flowid = mflowid; M_HASHTYPE_SET(m, mflowtype); } packet_length = sctp_calculate_len(m); ip6h = mtod(m, struct ip6_hdr *); /* protect *sin6 from overwrite */ sin6 = (struct sockaddr_in6 *)to; tmp = *sin6; sin6 = &tmp; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } if (net == NULL) { memset(&ip6route, 0, sizeof(ip6route)); ro = (sctp_route_t *)&ip6route; memcpy(&ro->ro_dst, sin6, sin6->sin6_len); } else { ro = (sctp_route_t *)&net->ro; } /* * We assume here that inp_flow is in host byte * order within the TCB! */ if (tos_value == 0) { /* * This means especially, that it is not set * at the SCTP layer. So use the value from * the IP layer. */ tos_value = (ntohl(((struct in6pcb *)inp)->in6p_flowinfo) >> 20) & 0xff; } tos_value &= 0xfc; if (ecn_ok) { tos_value |= sctp_get_ect(stcb); } flowinfo = 0x06; flowinfo <<= 8; flowinfo |= tos_value; flowinfo <<= 20; flowinfo |= flowlabel; ip6h->ip6_flow = htonl(flowinfo); if (port) { ip6h->ip6_nxt = IPPROTO_UDP; } else { ip6h->ip6_nxt = IPPROTO_SCTP; } ip6h->ip6_plen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr))); ip6h->ip6_dst = sin6->sin6_addr; /* * Add SRC address selection here: we can only reuse * to a limited degree the kame src-addr-sel, since * we can try their selection but it may not be * bound. */ memset(&lsa6_tmp, 0, sizeof(lsa6_tmp)); lsa6_tmp.sin6_family = AF_INET6; lsa6_tmp.sin6_len = sizeof(lsa6_tmp); lsa6 = &lsa6_tmp; if (net && out_of_asoc_ok == 0) { if (net->ro._s_addr && (net->ro._s_addr->localifa_flags & (SCTP_BEING_DELETED | SCTP_ADDR_IFA_UNUSEABLE))) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; if (ro->ro_rt) { RTFREE(ro->ro_rt); ro->ro_rt = NULL; } } if (net->src_addr_selected == 0) { sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } /* Cache the source address */ net->ro._s_addr = sctp_source_address_selection(inp, stcb, ro, net, 0, vrf_id); (void)sa6_recoverscope(sin6); net->src_addr_selected = 1; } if (net->ro._s_addr == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "V6:No route to host\n"); net->src_addr_selected = 0; sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } lsa6->sin6_addr = net->ro._s_addr->address.sin6.sin6_addr; } else { sin6 = (struct sockaddr_in6 *)&ro->ro_dst; /* KAME hack: embed scopeid */ if (sa6_embedscope(sin6, MODULE_GLOBAL(ip6_use_defzone)) != 0) { SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } if (over_addr == NULL) { struct sctp_ifa *_lsrc; _lsrc = sctp_source_address_selection(inp, stcb, ro, net, out_of_asoc_ok, vrf_id); if (_lsrc == NULL) { sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } lsa6->sin6_addr = _lsrc->address.sin6.sin6_addr; sctp_free_ifa(_lsrc); } else { lsa6->sin6_addr = over_addr->sin6.sin6_addr; SCTP_RTALLOC(ro, vrf_id, inp->fibnum); } (void)sa6_recoverscope(sin6); } lsa6->sin6_port = inp->sctp_lport; if (ro->ro_rt == NULL) { /* * src addr selection failed to find a route * (or valid source addr), so we can't get * there from here! */ sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } /* * XXX: sa6 may not have a valid sin6_scope_id in * the non-SCOPEDROUTING case. */ memset(&lsa6_storage, 0, sizeof(lsa6_storage)); lsa6_storage.sin6_family = AF_INET6; lsa6_storage.sin6_len = sizeof(lsa6_storage); lsa6_storage.sin6_addr = lsa6->sin6_addr; if ((error = sa6_recoverscope(&lsa6_storage)) != 0) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "recover scope fails error %d\n", error); sctp_m_freem(m); return (error); } /* XXX */ lsa6_storage.sin6_addr = lsa6->sin6_addr; lsa6_storage.sin6_port = inp->sctp_lport; lsa6 = &lsa6_storage; ip6h->ip6_src = lsa6->sin6_addr; if (port) { if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { sctp_handle_no_route(stcb, net, so_locked); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EHOSTUNREACH); sctp_m_freem(m); return (EHOSTUNREACH); } udp = (struct udphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); udp->uh_dport = port; udp->uh_ulen = htons((uint16_t)(packet_length - sizeof(struct ip6_hdr))); udp->uh_sum = 0; sctphdr = (struct sctphdr *)((caddr_t)udp + sizeof(struct udphdr)); } else { sctphdr = (struct sctphdr *)((caddr_t)ip6h + sizeof(struct ip6_hdr)); } sctphdr->src_port = src_port; sctphdr->dest_port = dest_port; sctphdr->v_tag = v_tag; sctphdr->checksum = 0; /* * We set the hop limit now since there is a good * chance that our ro pointer is now filled */ ip6h->ip6_hlim = SCTP_GET_HLIM(inp, ro); ifp = SCTP_GET_IFN_VOID_FROM_ROUTE(ro); #ifdef SCTP_DEBUG /* Copy to be sure something bad is not happening */ sin6->sin6_addr = ip6h->ip6_dst; lsa6->sin6_addr = ip6h->ip6_src; #endif SCTPDBG(SCTP_DEBUG_OUTPUT3, "Calling ipv6 output routine from low level\n"); SCTPDBG(SCTP_DEBUG_OUTPUT3, "src: "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)lsa6); SCTPDBG(SCTP_DEBUG_OUTPUT3, "dst: "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT3, (struct sockaddr *)sin6); if (net) { sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; /* * preserve the port and scope for link * local send */ prev_scope = sin6->sin6_scope_id; prev_port = sin6->sin6_port; } if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { /* failed to prepend data, give up */ sctp_m_freem(m); SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_ATTACH_CHAIN(o_pak, m, packet_length); if (port) { sctphdr->checksum = sctp_calculate_cksum(m, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), packet_length - sizeof(struct ip6_hdr))) == 0) { udp->uh_sum = 0xffff; } } else { m->m_pkthdr.csum_flags = CSUM_SCTP_IPV6; m->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum); SCTP_STAT_INCR(sctps_sendhwcrc); } /* send it out. table id is taken from stcb */ #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { so = SCTP_INP_SO(inp); SCTP_SOCKET_UNLOCK(so, 0); } #endif #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) sctp_packet_log(o_pak); #endif SCTP_PROBE5(send, NULL, stcb, ip6h, stcb, sctphdr); SCTP_IP6_OUTPUT(ret, o_pak, (struct route_in6 *)ro, &ifp, stcb, vrf_id); #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) if ((SCTP_BASE_SYSCTL(sctp_output_unlocked)) && (so_locked)) { atomic_add_int(&stcb->asoc.refcnt, 1); SCTP_TCB_UNLOCK(stcb); SCTP_SOCKET_LOCK(so, 0); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); } #endif if (net) { /* for link local this must be done */ sin6->sin6_scope_id = prev_scope; sin6->sin6_port = prev_port; } SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); if (ret) { SCTP_STAT_INCR(sctps_senderrors); } if (net == NULL) { /* Now if we had a temp route free it */ RO_RTFREE(ro); } else { /* * PMTU check versus smallest asoc MTU goes * here */ if (ro->ro_rt == NULL) { /* Route was freed */ if (net->ro._s_addr && net->src_addr_selected) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; } net->src_addr_selected = 0; } if ((ro->ro_rt != NULL) && (net->ro._s_addr) && ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) { uint32_t mtu; mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); if (mtu > 0) { if (net->port) { mtu -= sizeof(struct udphdr); } if ((stcb != NULL) && (stcb->asoc.smallest_mtu > mtu)) { sctp_mtu_size_reset(inp, &stcb->asoc, mtu); } net->mtu = mtu; } } else if (ifp) { if (ND_IFINFO(ifp)->linkmtu && (stcb->asoc.smallest_mtu > ND_IFINFO(ifp)->linkmtu)) { sctp_mtu_size_reset(inp, &stcb->asoc, ND_IFINFO(ifp)->linkmtu); } } } return (ret); } #endif default: SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n", ((struct sockaddr *)to)->sa_family); sctp_m_freem(m); SCTP_LTRACE_ERR_RET_PKT(m, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT); return (EFAULT); } } void sctp_send_initiate(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct mbuf *m, *m_last; struct sctp_nets *net; struct sctp_init_chunk *init; struct sctp_supported_addr_param *sup_addr; struct sctp_adaptation_layer_indication *ali; struct sctp_supported_chunk_types_param *pr_supported; struct sctp_paramhdr *ph; int cnt_inits_to = 0; int error; uint16_t num_ext, chunk_len, padding_len, parameter_len; /* INIT's always go to the primary (and usually ONLY address) */ net = stcb->asoc.primary_destination; if (net == NULL) { net = TAILQ_FIRST(&stcb->asoc.nets); if (net == NULL) { /* TSNH */ return; } /* we confirm any address we send an INIT to */ net->dest_state &= ~SCTP_ADDR_UNCONFIRMED; (void)sctp_set_primary_addr(stcb, NULL, net); } else { /* we confirm any address we send an INIT to */ net->dest_state &= ~SCTP_ADDR_UNCONFIRMED; } SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT\n"); #ifdef INET6 if (net->ro._l_addr.sa.sa_family == AF_INET6) { /* * special hook, if we are sending to link local it will not * show up in our private address count. */ if (IN6_IS_ADDR_LINKLOCAL(&net->ro._l_addr.sin6.sin6_addr)) cnt_inits_to = 1; } #endif if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { /* This case should not happen */ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - failed timer?\n"); return; } /* start the INIT timer */ sctp_timer_start(SCTP_TIMER_TYPE_INIT, inp, stcb, net); m = sctp_get_mbuf_for_msg(MCLBYTES, 1, M_NOWAIT, 1, MT_DATA); if (m == NULL) { /* No memory, INIT timer will re-attempt. */ SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - mbuf?\n"); return; } chunk_len = (uint16_t)sizeof(struct sctp_init_chunk); padding_len = 0; /* Now lets put the chunk header in place */ init = mtod(m, struct sctp_init_chunk *); /* now the chunk header */ init->ch.chunk_type = SCTP_INITIATION; init->ch.chunk_flags = 0; /* fill in later from mbuf we build */ init->ch.chunk_length = 0; /* place in my tag */ init->init.initiate_tag = htonl(stcb->asoc.my_vtag); /* set up some of the credits. */ init->init.a_rwnd = htonl(max(inp->sctp_socket ? SCTP_SB_LIMIT_RCV(inp->sctp_socket) : 0, SCTP_MINIMAL_RWND)); init->init.num_outbound_streams = htons(stcb->asoc.pre_open_streams); init->init.num_inbound_streams = htons(stcb->asoc.max_inbound_streams); init->init.initial_tsn = htonl(stcb->asoc.init_seq_number); /* Adaptation layer indication parameter */ if (inp->sctp_ep.adaptation_layer_indicator_provided) { parameter_len = (uint16_t)sizeof(struct sctp_adaptation_layer_indication); ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len); ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); ali->ph.param_length = htons(parameter_len); ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator); chunk_len += parameter_len; } /* ECN parameter */ if (stcb->asoc.ecn_supported == 1) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_ECN_CAPABLE); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* PR-SCTP supported parameter */ if (stcb->asoc.prsctp_supported == 1) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_PRSCTP_SUPPORTED); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* Add NAT friendly parameter. */ if (SCTP_BASE_SYSCTL(sctp_inits_include_nat_friendly)) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_HAS_NAT_SUPPORT); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* And now tell the peer which extensions we support */ num_ext = 0; pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len); if (stcb->asoc.prsctp_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN; if (stcb->asoc.idata_supported) { pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN; } } if (stcb->asoc.auth_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION; } if (stcb->asoc.asconf_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_ASCONF; pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK; } if (stcb->asoc.reconfig_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET; } if (stcb->asoc.idata_supported) { pr_supported->chunk_types[num_ext++] = SCTP_IDATA; } if (stcb->asoc.nrsack_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK; } if (stcb->asoc.pktdrop_supported == 1) { pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED; } if (num_ext > 0) { parameter_len = (uint16_t)sizeof(struct sctp_supported_chunk_types_param) + num_ext; pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT); pr_supported->ph.param_length = htons(parameter_len); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } /* add authentication parameters */ if (stcb->asoc.auth_supported) { /* attach RANDOM parameter, if available */ if (stcb->asoc.authinfo.random != NULL) { struct sctp_auth_random *randp; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)sizeof(struct sctp_auth_random) + stcb->asoc.authinfo.random_len; /* random key already contains the header */ memcpy(randp, stcb->asoc.authinfo.random->key, parameter_len); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } /* add HMAC_ALGO parameter */ if (stcb->asoc.local_hmacs != NULL) { struct sctp_auth_hmac_algo *hmacs; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)(sizeof(struct sctp_auth_hmac_algo) + stcb->asoc.local_hmacs->num_algo * sizeof(uint16_t)); hmacs->ph.param_type = htons(SCTP_HMAC_LIST); hmacs->ph.param_length = htons(parameter_len); sctp_serialize_hmaclist(stcb->asoc.local_hmacs, (uint8_t *)hmacs->hmac_ids); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } /* add CHUNKS parameter */ if (stcb->asoc.local_auth_chunks != NULL) { struct sctp_auth_chunk_list *chunks; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)(sizeof(struct sctp_auth_chunk_list) + sctp_auth_get_chklist_size(stcb->asoc.local_auth_chunks)); chunks->ph.param_type = htons(SCTP_CHUNK_LIST); chunks->ph.param_length = htons(parameter_len); sctp_serialize_auth_chunks(stcb->asoc.local_auth_chunks, chunks->chunk_types); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } } /* now any cookie time extensions */ if (stcb->asoc.cookie_preserve_req) { struct sctp_cookie_perserve_param *cookie_preserve; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } parameter_len = (uint16_t)sizeof(struct sctp_cookie_perserve_param); cookie_preserve = (struct sctp_cookie_perserve_param *)(mtod(m, caddr_t)+chunk_len); cookie_preserve->ph.param_type = htons(SCTP_COOKIE_PRESERVE); cookie_preserve->ph.param_length = htons(parameter_len); cookie_preserve->time = htonl(stcb->asoc.cookie_preserve_req); stcb->asoc.cookie_preserve_req = 0; chunk_len += parameter_len; } if (stcb->asoc.scope.ipv4_addr_legal || stcb->asoc.scope.ipv6_addr_legal) { uint8_t i; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); if (stcb->asoc.scope.ipv4_addr_legal) { parameter_len += (uint16_t)sizeof(uint16_t); } if (stcb->asoc.scope.ipv6_addr_legal) { parameter_len += (uint16_t)sizeof(uint16_t); } sup_addr = (struct sctp_supported_addr_param *)(mtod(m, caddr_t)+chunk_len); sup_addr->ph.param_type = htons(SCTP_SUPPORTED_ADDRTYPE); sup_addr->ph.param_length = htons(parameter_len); i = 0; if (stcb->asoc.scope.ipv4_addr_legal) { sup_addr->addr_type[i++] = htons(SCTP_IPV4_ADDRESS); } if (stcb->asoc.scope.ipv6_addr_legal) { sup_addr->addr_type[i++] = htons(SCTP_IPV6_ADDRESS); } padding_len = 4 - 2 * i; chunk_len += parameter_len; } SCTP_BUF_LEN(m) = chunk_len; /* now the addresses */ /* * To optimize this we could put the scoping stuff into a structure * and remove the individual uint8's from the assoc structure. Then * we could just sifa in the address within the stcb. But for now * this is a quick hack to get the address stuff teased apart. */ m_last = sctp_add_addresses_to_i_ia(inp, stcb, &stcb->asoc.scope, m, cnt_inits_to, &padding_len, &chunk_len); init->ch.chunk_length = htons(chunk_len); if (padding_len > 0) { if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) { sctp_m_freem(m); return; } } SCTPDBG(SCTP_DEBUG_OUTPUT4, "Sending INIT - calls lowlevel_output\n"); if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, m, 0, NULL, 0, 0, 0, 0, inp->sctp_lport, stcb->rport, htonl(0), net->port, NULL, 0, 0, so_locked))) { SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak send error %d\n", error); if (error == ENOBUFS) { stcb->asoc.ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } } else { stcb->asoc.ifp_had_enobuf = 0; } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); } struct mbuf * sctp_arethere_unrecognized_parameters(struct mbuf *in_initpkt, int param_offset, int *abort_processing, struct sctp_chunkhdr *cp, int *nat_friendly) { /* * Given a mbuf containing an INIT or INIT-ACK with the param_offset * being equal to the beginning of the params i.e. (iphlen + * sizeof(struct sctp_init_msg) parse through the parameters to the * end of the mbuf verifying that all parameters are known. * * For unknown parameters build and return a mbuf with * UNRECOGNIZED_PARAMETER errors. If the flags indicate to stop * processing this chunk stop, and set *abort_processing to 1. * * By having param_offset be pre-set to where parameters begin it is * hoped that this routine may be reused in the future by new * features. */ struct sctp_paramhdr *phdr, params; struct mbuf *mat, *op_err; char tempbuf[SCTP_PARAM_BUFFER_SIZE]; int at, limit, pad_needed; uint16_t ptype, plen, padded_size; int err_at; *abort_processing = 0; mat = in_initpkt; err_at = 0; limit = ntohs(cp->chunk_length) - sizeof(struct sctp_init_chunk); at = param_offset; op_err = NULL; SCTPDBG(SCTP_DEBUG_OUTPUT1, "Check for unrecognized param's\n"); phdr = sctp_get_next_param(mat, at, ¶ms, sizeof(params)); while ((phdr != NULL) && ((size_t)limit >= sizeof(struct sctp_paramhdr))) { ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); if ((plen > limit) || (plen < sizeof(struct sctp_paramhdr))) { /* wacked parameter */ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error %d\n", plen); goto invalid_size; } limit -= SCTP_SIZE32(plen); /*- * All parameters for all chunks that we know/understand are * listed here. We process them other places and make * appropriate stop actions per the upper bits. However this * is the generic routine processor's can call to get back * an operr.. to either incorporate (init-ack) or send. */ padded_size = SCTP_SIZE32(plen); switch (ptype) { /* Param's with variable size */ case SCTP_HEARTBEAT_INFO: case SCTP_STATE_COOKIE: case SCTP_UNRECOG_PARAM: case SCTP_ERROR_CAUSE_IND: /* ok skip fwd */ at += padded_size; break; /* Param's with variable size within a range */ case SCTP_CHUNK_LIST: case SCTP_SUPPORTED_CHUNK_EXT: if (padded_size > (sizeof(struct sctp_supported_chunk_types_param) + (sizeof(uint8_t) * SCTP_MAX_SUPPORTED_EXT))) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error chklist %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_SUPPORTED_ADDRTYPE: if (padded_size > SCTP_MAX_ADDR_PARAMS_SIZE) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error supaddrtype %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_RANDOM: if (padded_size > (sizeof(struct sctp_auth_random) + SCTP_RANDOM_MAX_SIZE)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error random %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_SET_PRIM_ADDR: case SCTP_DEL_IP_ADDRESS: case SCTP_ADD_IP_ADDRESS: if ((padded_size != sizeof(struct sctp_asconf_addrv4_param)) && (padded_size != sizeof(struct sctp_asconf_addr_param))) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error setprim %d\n", plen); goto invalid_size; } at += padded_size; break; /* Param's with a fixed size */ case SCTP_IPV4_ADDRESS: if (padded_size != sizeof(struct sctp_ipv4addr_param)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv4 addr %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_IPV6_ADDRESS: if (padded_size != sizeof(struct sctp_ipv6addr_param)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ipv6 addr %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_COOKIE_PRESERVE: if (padded_size != sizeof(struct sctp_cookie_perserve_param)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error cookie-preserve %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_HAS_NAT_SUPPORT: *nat_friendly = 1; /* fall through */ case SCTP_PRSCTP_SUPPORTED: if (padded_size != sizeof(struct sctp_paramhdr)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error prsctp/nat support %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_ECN_CAPABLE: if (padded_size != sizeof(struct sctp_paramhdr)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error ecn %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_ULP_ADAPTATION: if (padded_size != sizeof(struct sctp_adaptation_layer_indication)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error adapatation %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_SUCCESS_REPORT: if (padded_size != sizeof(struct sctp_asconf_paramhdr)) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Invalid size - error success %d\n", plen); goto invalid_size; } at += padded_size; break; case SCTP_HOSTNAME_ADDRESS: { /* We can NOT handle HOST NAME addresses!! */ int l_len; SCTPDBG(SCTP_DEBUG_OUTPUT1, "Can't handle hostname addresses.. abort processing\n"); *abort_processing = 1; if (op_err == NULL) { /* Ok need to try to get a mbuf */ #ifdef INET6 l_len = SCTP_MIN_OVERHEAD; #else l_len = SCTP_MIN_V4_OVERHEAD; #endif l_len += sizeof(struct sctp_chunkhdr); l_len += plen; l_len += sizeof(struct sctp_paramhdr); op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA); if (op_err) { SCTP_BUF_LEN(op_err) = 0; /* * pre-reserve space for ip * and sctp header and * chunk hdr */ #ifdef INET6 SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr)); #else SCTP_BUF_RESV_UF(op_err, sizeof(struct ip)); #endif SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr)); SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr)); } } if (op_err) { /* If we have space */ struct sctp_paramhdr s; if (err_at % 4) { uint32_t cpthis = 0; pad_needed = 4 - (err_at % 4); m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis); err_at += pad_needed; } s.param_type = htons(SCTP_CAUSE_UNRESOLVABLE_ADDR); s.param_length = htons(sizeof(s) + plen); m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s); err_at += sizeof(s); if (plen > sizeof(tempbuf)) { plen = sizeof(tempbuf); } phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, plen); if (phdr == NULL) { sctp_m_freem(op_err); /* * we are out of memory but * we still need to have a * look at what to do (the * system is in trouble * though). */ return (NULL); } m_copyback(op_err, err_at, plen, (caddr_t)phdr); } return (op_err); break; } default: /* * we do not recognize the parameter figure out what * we do. */ SCTPDBG(SCTP_DEBUG_OUTPUT1, "Hit default param %x\n", ptype); if ((ptype & 0x4000) == 0x4000) { /* Report bit is set?? */ SCTPDBG(SCTP_DEBUG_OUTPUT1, "report op err\n"); if (op_err == NULL) { int l_len; /* Ok need to try to get an mbuf */ #ifdef INET6 l_len = SCTP_MIN_OVERHEAD; #else l_len = SCTP_MIN_V4_OVERHEAD; #endif l_len += sizeof(struct sctp_chunkhdr); l_len += plen; l_len += sizeof(struct sctp_paramhdr); op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA); if (op_err) { SCTP_BUF_LEN(op_err) = 0; #ifdef INET6 SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr)); #else SCTP_BUF_RESV_UF(op_err, sizeof(struct ip)); #endif SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr)); SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr)); } } if (op_err) { /* If we have space */ struct sctp_paramhdr s; if (err_at % 4) { uint32_t cpthis = 0; pad_needed = 4 - (err_at % 4); m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis); err_at += pad_needed; } s.param_type = htons(SCTP_UNRECOG_PARAM); s.param_length = htons(sizeof(s) + plen); m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s); err_at += sizeof(s); if (plen > sizeof(tempbuf)) { plen = sizeof(tempbuf); } phdr = sctp_get_next_param(mat, at, (struct sctp_paramhdr *)tempbuf, plen); if (phdr == NULL) { sctp_m_freem(op_err); /* * we are out of memory but * we still need to have a * look at what to do (the * system is in trouble * though). */ op_err = NULL; goto more_processing; } m_copyback(op_err, err_at, plen, (caddr_t)phdr); err_at += plen; } } more_processing: if ((ptype & 0x8000) == 0x0000) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "stop proc\n"); return (op_err); } else { /* skip this chunk and continue processing */ SCTPDBG(SCTP_DEBUG_OUTPUT1, "move on\n"); at += SCTP_SIZE32(plen); } break; } phdr = sctp_get_next_param(mat, at, ¶ms, sizeof(params)); } return (op_err); invalid_size: SCTPDBG(SCTP_DEBUG_OUTPUT1, "abort flag set\n"); *abort_processing = 1; if ((op_err == NULL) && phdr) { int l_len; #ifdef INET6 l_len = SCTP_MIN_OVERHEAD; #else l_len = SCTP_MIN_V4_OVERHEAD; #endif l_len += sizeof(struct sctp_chunkhdr); l_len += (2 * sizeof(struct sctp_paramhdr)); op_err = sctp_get_mbuf_for_msg(l_len, 0, M_NOWAIT, 1, MT_DATA); if (op_err) { SCTP_BUF_LEN(op_err) = 0; #ifdef INET6 SCTP_BUF_RESV_UF(op_err, sizeof(struct ip6_hdr)); #else SCTP_BUF_RESV_UF(op_err, sizeof(struct ip)); #endif SCTP_BUF_RESV_UF(op_err, sizeof(struct sctphdr)); SCTP_BUF_RESV_UF(op_err, sizeof(struct sctp_chunkhdr)); } } if ((op_err) && phdr) { struct sctp_paramhdr s; if (err_at % 4) { uint32_t cpthis = 0; pad_needed = 4 - (err_at % 4); m_copyback(op_err, err_at, pad_needed, (caddr_t)&cpthis); err_at += pad_needed; } s.param_type = htons(SCTP_CAUSE_PROTOCOL_VIOLATION); s.param_length = htons(sizeof(s) + sizeof(struct sctp_paramhdr)); m_copyback(op_err, err_at, sizeof(s), (caddr_t)&s); err_at += sizeof(s); /* Only copy back the p-hdr that caused the issue */ m_copyback(op_err, err_at, sizeof(struct sctp_paramhdr), (caddr_t)phdr); } return (op_err); } static int sctp_are_there_new_addresses(struct sctp_association *asoc, struct mbuf *in_initpkt, int offset, struct sockaddr *src) { /* * Given a INIT packet, look through the packet to verify that there * are NO new addresses. As we go through the parameters add reports * of any un-understood parameters that require an error. Also we * must return (1) to drop the packet if we see a un-understood * parameter that tells us to drop the chunk. */ struct sockaddr *sa_touse; struct sockaddr *sa; struct sctp_paramhdr *phdr, params; uint16_t ptype, plen; uint8_t fnd; struct sctp_nets *net; int check_src; #ifdef INET struct sockaddr_in sin4, *sa4; #endif #ifdef INET6 struct sockaddr_in6 sin6, *sa6; #endif #ifdef INET memset(&sin4, 0, sizeof(sin4)); sin4.sin_family = AF_INET; sin4.sin_len = sizeof(sin4); #endif #ifdef INET6 memset(&sin6, 0, sizeof(sin6)); sin6.sin6_family = AF_INET6; sin6.sin6_len = sizeof(sin6); #endif /* First what about the src address of the pkt ? */ check_src = 0; switch (src->sa_family) { #ifdef INET case AF_INET: if (asoc->scope.ipv4_addr_legal) { check_src = 1; } break; #endif #ifdef INET6 case AF_INET6: if (asoc->scope.ipv6_addr_legal) { check_src = 1; } break; #endif default: /* TSNH */ break; } if (check_src) { fnd = 0; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sa = (struct sockaddr *)&net->ro._l_addr; if (sa->sa_family == src->sa_family) { #ifdef INET if (sa->sa_family == AF_INET) { struct sockaddr_in *src4; sa4 = (struct sockaddr_in *)sa; src4 = (struct sockaddr_in *)src; if (sa4->sin_addr.s_addr == src4->sin_addr.s_addr) { fnd = 1; break; } } #endif #ifdef INET6 if (sa->sa_family == AF_INET6) { struct sockaddr_in6 *src6; sa6 = (struct sockaddr_in6 *)sa; src6 = (struct sockaddr_in6 *)src; if (SCTP6_ARE_ADDR_EQUAL(sa6, src6)) { fnd = 1; break; } } #endif } } if (fnd == 0) { /* New address added! no need to look further. */ return (1); } } /* Ok so far lets munge through the rest of the packet */ offset += sizeof(struct sctp_init_chunk); phdr = sctp_get_next_param(in_initpkt, offset, ¶ms, sizeof(params)); while (phdr) { sa_touse = NULL; ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); switch (ptype) { #ifdef INET case SCTP_IPV4_ADDRESS: { struct sctp_ipv4addr_param *p4, p4_buf; if (plen != sizeof(struct sctp_ipv4addr_param)) { return (1); } phdr = sctp_get_next_param(in_initpkt, offset, (struct sctp_paramhdr *)&p4_buf, sizeof(p4_buf)); if (phdr == NULL) { return (1); } if (asoc->scope.ipv4_addr_legal) { p4 = (struct sctp_ipv4addr_param *)phdr; sin4.sin_addr.s_addr = p4->addr; sa_touse = (struct sockaddr *)&sin4; } break; } #endif #ifdef INET6 case SCTP_IPV6_ADDRESS: { struct sctp_ipv6addr_param *p6, p6_buf; if (plen != sizeof(struct sctp_ipv6addr_param)) { return (1); } phdr = sctp_get_next_param(in_initpkt, offset, (struct sctp_paramhdr *)&p6_buf, sizeof(p6_buf)); if (phdr == NULL) { return (1); } if (asoc->scope.ipv6_addr_legal) { p6 = (struct sctp_ipv6addr_param *)phdr; memcpy((caddr_t)&sin6.sin6_addr, p6->addr, sizeof(p6->addr)); sa_touse = (struct sockaddr *)&sin6; } break; } #endif default: sa_touse = NULL; break; } if (sa_touse) { /* ok, sa_touse points to one to check */ fnd = 0; TAILQ_FOREACH(net, &asoc->nets, sctp_next) { sa = (struct sockaddr *)&net->ro._l_addr; if (sa->sa_family != sa_touse->sa_family) { continue; } #ifdef INET if (sa->sa_family == AF_INET) { sa4 = (struct sockaddr_in *)sa; if (sa4->sin_addr.s_addr == sin4.sin_addr.s_addr) { fnd = 1; break; } } #endif #ifdef INET6 if (sa->sa_family == AF_INET6) { sa6 = (struct sockaddr_in6 *)sa; if (SCTP6_ARE_ADDR_EQUAL( sa6, &sin6)) { fnd = 1; break; } } #endif } if (!fnd) { /* New addr added! no need to look further */ return (1); } } offset += SCTP_SIZE32(plen); phdr = sctp_get_next_param(in_initpkt, offset, ¶ms, sizeof(params)); } return (0); } /* * Given a MBUF chain that was sent into us containing an INIT. Build a * INIT-ACK with COOKIE and send back. We assume that the in_initpkt has done * a pullup to include IPv6/4header, SCTP header and initial part of INIT * message (i.e. the struct sctp_init_msg). */ void sctp_send_initiate_ack(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *src_net, struct mbuf *init_pkt, int iphlen, int offset, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, struct sctp_init_chunk *init_chk, uint8_t mflowtype, uint32_t mflowid, uint32_t vrf_id, uint16_t port) { struct sctp_association *asoc; struct mbuf *m, *m_tmp, *m_last, *m_cookie, *op_err; struct sctp_init_ack_chunk *initack; struct sctp_adaptation_layer_indication *ali; struct sctp_supported_chunk_types_param *pr_supported; struct sctp_paramhdr *ph; union sctp_sockstore *over_addr; struct sctp_scoping scp; struct timeval now; #ifdef INET struct sockaddr_in *dst4 = (struct sockaddr_in *)dst; struct sockaddr_in *src4 = (struct sockaddr_in *)src; struct sockaddr_in *sin; #endif #ifdef INET6 struct sockaddr_in6 *dst6 = (struct sockaddr_in6 *)dst; struct sockaddr_in6 *src6 = (struct sockaddr_in6 *)src; struct sockaddr_in6 *sin6; #endif struct sockaddr *to; struct sctp_state_cookie stc; struct sctp_nets *net = NULL; uint8_t *signature = NULL; int cnt_inits_to = 0; uint16_t his_limit, i_want; int abort_flag; int nat_friendly = 0; int error; struct socket *so; uint16_t num_ext, chunk_len, padding_len, parameter_len; if (stcb) { asoc = &stcb->asoc; } else { asoc = NULL; } if ((asoc != NULL) && (SCTP_GET_STATE(stcb) != SCTP_STATE_COOKIE_WAIT)) { if (sctp_are_there_new_addresses(asoc, init_pkt, offset, src)) { /* * new addresses, out of here in non-cookie-wait * states * * Send an ABORT, without the new address error * cause. This looks no different than if no * listener was present. */ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Address added"); sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); return; } if (src_net != NULL && (src_net->port != port)) { /* * change of remote encapsulation port, out of here * in non-cookie-wait states * * Send an ABORT, without an specific error cause. * This looks no different than if no listener was * present. */ op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), "Remote encapsulation port changed"); sctp_send_abort(init_pkt, iphlen, src, dst, sh, 0, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); return; } } abort_flag = 0; op_err = sctp_arethere_unrecognized_parameters(init_pkt, (offset + sizeof(struct sctp_init_chunk)), &abort_flag, (struct sctp_chunkhdr *)init_chk, &nat_friendly); if (abort_flag) { do_a_abort: if (op_err == NULL) { char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); } sctp_send_abort(init_pkt, iphlen, src, dst, sh, init_chk->init.initiate_tag, op_err, mflowtype, mflowid, inp->fibnum, vrf_id, port); return; } m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (m == NULL) { /* No memory, INIT timer will re-attempt. */ if (op_err) sctp_m_freem(op_err); return; } chunk_len = (uint16_t)sizeof(struct sctp_init_ack_chunk); padding_len = 0; /* * We might not overwrite the identification[] completely and on * some platforms time_entered will contain some padding. Therefore * zero out the cookie to avoid putting uninitialized memory on the * wire. */ memset(&stc, 0, sizeof(struct sctp_state_cookie)); /* the time I built cookie */ (void)SCTP_GETTIME_TIMEVAL(&now); stc.time_entered.tv_sec = now.tv_sec; stc.time_entered.tv_usec = now.tv_usec; /* populate any tie tags */ if (asoc != NULL) { /* unlock before tag selections */ stc.tie_tag_my_vtag = asoc->my_vtag_nonce; stc.tie_tag_peer_vtag = asoc->peer_vtag_nonce; stc.cookie_life = asoc->cookie_life; net = asoc->primary_destination; } else { stc.tie_tag_my_vtag = 0; stc.tie_tag_peer_vtag = 0; /* life I will award this cookie */ stc.cookie_life = inp->sctp_ep.def_cookie_life; } /* copy in the ports for later check */ stc.myport = sh->dest_port; stc.peerport = sh->src_port; /* * If we wanted to honor cookie life extensions, we would add to * stc.cookie_life. For now we should NOT honor any extension */ stc.site_scope = stc.local_scope = stc.loopback_scope = 0; if (inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { stc.ipv6_addr_legal = 1; if (SCTP_IPV6_V6ONLY(inp)) { stc.ipv4_addr_legal = 0; } else { stc.ipv4_addr_legal = 1; } } else { stc.ipv6_addr_legal = 0; stc.ipv4_addr_legal = 1; } stc.ipv4_scope = 0; if (net == NULL) { to = src; switch (dst->sa_family) { #ifdef INET case AF_INET: { /* lookup address */ stc.address[0] = src4->sin_addr.s_addr; stc.address[1] = 0; stc.address[2] = 0; stc.address[3] = 0; stc.addr_type = SCTP_IPV4_ADDRESS; /* local from address */ stc.laddress[0] = dst4->sin_addr.s_addr; stc.laddress[1] = 0; stc.laddress[2] = 0; stc.laddress[3] = 0; stc.laddr_type = SCTP_IPV4_ADDRESS; /* scope_id is only for v6 */ stc.scope_id = 0; if ((IN4_ISPRIVATE_ADDRESS(&src4->sin_addr)) || (IN4_ISPRIVATE_ADDRESS(&dst4->sin_addr))) { stc.ipv4_scope = 1; } /* Must use the address in this case */ if (sctp_is_address_on_local_host(src, vrf_id)) { stc.loopback_scope = 1; stc.ipv4_scope = 1; stc.site_scope = 1; stc.local_scope = 0; } break; } #endif #ifdef INET6 case AF_INET6: { stc.addr_type = SCTP_IPV6_ADDRESS; memcpy(&stc.address, &src6->sin6_addr, sizeof(struct in6_addr)); stc.scope_id = ntohs(in6_getscope(&src6->sin6_addr)); if (sctp_is_address_on_local_host(src, vrf_id)) { stc.loopback_scope = 1; stc.local_scope = 0; stc.site_scope = 1; stc.ipv4_scope = 1; } else if (IN6_IS_ADDR_LINKLOCAL(&src6->sin6_addr) || IN6_IS_ADDR_LINKLOCAL(&dst6->sin6_addr)) { /* * If the new destination or source * is a LINK_LOCAL we must have * common both site and local scope. * Don't set local scope though * since we must depend on the * source to be added implicitly. We * cannot assure just because we * share one link that all links are * common. */ stc.local_scope = 0; stc.site_scope = 1; stc.ipv4_scope = 1; /* * we start counting for the private * address stuff at 1. since the * link local we source from won't * show up in our scoped count. */ cnt_inits_to = 1; /* * pull out the scope_id from * incoming pkt */ } else if (IN6_IS_ADDR_SITELOCAL(&src6->sin6_addr) || IN6_IS_ADDR_SITELOCAL(&dst6->sin6_addr)) { /* * If the new destination or source * is SITE_LOCAL then we must have * site scope in common. */ stc.site_scope = 1; } memcpy(&stc.laddress, &dst6->sin6_addr, sizeof(struct in6_addr)); stc.laddr_type = SCTP_IPV6_ADDRESS; break; } #endif default: /* TSNH */ goto do_a_abort; break; } } else { /* set the scope per the existing tcb */ #ifdef INET6 struct sctp_nets *lnet; #endif stc.loopback_scope = asoc->scope.loopback_scope; stc.ipv4_scope = asoc->scope.ipv4_local_scope; stc.site_scope = asoc->scope.site_scope; stc.local_scope = asoc->scope.local_scope; #ifdef INET6 /* Why do we not consider IPv4 LL addresses? */ TAILQ_FOREACH(lnet, &asoc->nets, sctp_next) { if (lnet->ro._l_addr.sin6.sin6_family == AF_INET6) { if (IN6_IS_ADDR_LINKLOCAL(&lnet->ro._l_addr.sin6.sin6_addr)) { /* * if we have a LL address, start * counting at 1. */ cnt_inits_to = 1; } } } #endif /* use the net pointer */ to = (struct sockaddr *)&net->ro._l_addr; switch (to->sa_family) { #ifdef INET case AF_INET: sin = (struct sockaddr_in *)to; stc.address[0] = sin->sin_addr.s_addr; stc.address[1] = 0; stc.address[2] = 0; stc.address[3] = 0; stc.addr_type = SCTP_IPV4_ADDRESS; if (net->src_addr_selected == 0) { /* * strange case here, the INIT should have * did the selection. */ net->ro._s_addr = sctp_source_address_selection(inp, stcb, (sctp_route_t *)&net->ro, net, 0, vrf_id); if (net->ro._s_addr == NULL) return; net->src_addr_selected = 1; } stc.laddress[0] = net->ro._s_addr->address.sin.sin_addr.s_addr; stc.laddress[1] = 0; stc.laddress[2] = 0; stc.laddress[3] = 0; stc.laddr_type = SCTP_IPV4_ADDRESS; /* scope_id is only for v6 */ stc.scope_id = 0; break; #endif #ifdef INET6 case AF_INET6: sin6 = (struct sockaddr_in6 *)to; memcpy(&stc.address, &sin6->sin6_addr, sizeof(struct in6_addr)); stc.addr_type = SCTP_IPV6_ADDRESS; stc.scope_id = sin6->sin6_scope_id; if (net->src_addr_selected == 0) { /* * strange case here, the INIT should have * done the selection. */ net->ro._s_addr = sctp_source_address_selection(inp, stcb, (sctp_route_t *)&net->ro, net, 0, vrf_id); if (net->ro._s_addr == NULL) return; net->src_addr_selected = 1; } memcpy(&stc.laddress, &net->ro._s_addr->address.sin6.sin6_addr, sizeof(struct in6_addr)); stc.laddr_type = SCTP_IPV6_ADDRESS; break; #endif } } /* Now lets put the SCTP header in place */ initack = mtod(m, struct sctp_init_ack_chunk *); /* Save it off for quick ref */ stc.peers_vtag = ntohl(init_chk->init.initiate_tag); /* who are we */ memcpy(stc.identification, SCTP_VERSION_STRING, min(strlen(SCTP_VERSION_STRING), sizeof(stc.identification))); memset(stc.reserved, 0, SCTP_RESERVE_SPACE); /* now the chunk header */ initack->ch.chunk_type = SCTP_INITIATION_ACK; initack->ch.chunk_flags = 0; /* fill in later from mbuf we build */ initack->ch.chunk_length = 0; /* place in my tag */ if ((asoc != NULL) && ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_INUSE) || (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED))) { /* re-use the v-tags and init-seq here */ initack->init.initiate_tag = htonl(asoc->my_vtag); initack->init.initial_tsn = htonl(asoc->init_seq_number); } else { uint32_t vtag, itsn; if (asoc) { atomic_add_int(&asoc->refcnt, 1); SCTP_TCB_UNLOCK(stcb); new_tag: vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1); if ((asoc->peer_supports_nat) && (vtag == asoc->my_vtag)) { /* * Got a duplicate vtag on some guy behind a * nat make sure we don't use it. */ goto new_tag; } initack->init.initiate_tag = htonl(vtag); /* get a TSN to use too */ itsn = sctp_select_initial_TSN(&inp->sctp_ep); initack->init.initial_tsn = htonl(itsn); SCTP_TCB_LOCK(stcb); atomic_add_int(&asoc->refcnt, -1); } else { SCTP_INP_INCR_REF(inp); SCTP_INP_RUNLOCK(inp); vtag = sctp_select_a_tag(inp, inp->sctp_lport, sh->src_port, 1); initack->init.initiate_tag = htonl(vtag); /* get a TSN to use too */ initack->init.initial_tsn = htonl(sctp_select_initial_TSN(&inp->sctp_ep)); SCTP_INP_RLOCK(inp); SCTP_INP_DECR_REF(inp); } } /* save away my tag to */ stc.my_vtag = initack->init.initiate_tag; /* set up some of the credits. */ so = inp->sctp_socket; if (so == NULL) { /* memory problem */ sctp_m_freem(m); return; } else { initack->init.a_rwnd = htonl(max(SCTP_SB_LIMIT_RCV(so), SCTP_MINIMAL_RWND)); } /* set what I want */ his_limit = ntohs(init_chk->init.num_inbound_streams); /* choose what I want */ if (asoc != NULL) { if (asoc->streamoutcnt > asoc->pre_open_streams) { i_want = asoc->streamoutcnt; } else { i_want = asoc->pre_open_streams; } } else { i_want = inp->sctp_ep.pre_open_stream_count; } if (his_limit < i_want) { /* I Want more :< */ initack->init.num_outbound_streams = init_chk->init.num_inbound_streams; } else { /* I can have what I want :> */ initack->init.num_outbound_streams = htons(i_want); } /* tell him his limit. */ initack->init.num_inbound_streams = htons(inp->sctp_ep.max_open_streams_intome); /* adaptation layer indication parameter */ if (inp->sctp_ep.adaptation_layer_indicator_provided) { parameter_len = (uint16_t)sizeof(struct sctp_adaptation_layer_indication); ali = (struct sctp_adaptation_layer_indication *)(mtod(m, caddr_t)+chunk_len); ali->ph.param_type = htons(SCTP_ULP_ADAPTATION); ali->ph.param_length = htons(parameter_len); ali->indication = htonl(inp->sctp_ep.adaptation_layer_indicator); chunk_len += parameter_len; } /* ECN parameter */ if (((asoc != NULL) && (asoc->ecn_supported == 1)) || ((asoc == NULL) && (inp->ecn_supported == 1))) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_ECN_CAPABLE); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* PR-SCTP supported parameter */ if (((asoc != NULL) && (asoc->prsctp_supported == 1)) || ((asoc == NULL) && (inp->prsctp_supported == 1))) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_PRSCTP_SUPPORTED); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* Add NAT friendly parameter */ if (nat_friendly) { parameter_len = (uint16_t)sizeof(struct sctp_paramhdr); ph = (struct sctp_paramhdr *)(mtod(m, caddr_t)+chunk_len); ph->param_type = htons(SCTP_HAS_NAT_SUPPORT); ph->param_length = htons(parameter_len); chunk_len += parameter_len; } /* And now tell the peer which extensions we support */ num_ext = 0; pr_supported = (struct sctp_supported_chunk_types_param *)(mtod(m, caddr_t)+chunk_len); if (((asoc != NULL) && (asoc->prsctp_supported == 1)) || ((asoc == NULL) && (inp->prsctp_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_FORWARD_CUM_TSN; if (((asoc != NULL) && (asoc->idata_supported == 1)) || ((asoc == NULL) && (inp->idata_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_IFORWARD_CUM_TSN; } } if (((asoc != NULL) && (asoc->auth_supported == 1)) || ((asoc == NULL) && (inp->auth_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_AUTHENTICATION; } if (((asoc != NULL) && (asoc->asconf_supported == 1)) || ((asoc == NULL) && (inp->asconf_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_ASCONF; pr_supported->chunk_types[num_ext++] = SCTP_ASCONF_ACK; } if (((asoc != NULL) && (asoc->reconfig_supported == 1)) || ((asoc == NULL) && (inp->reconfig_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_STREAM_RESET; } if (((asoc != NULL) && (asoc->idata_supported == 1)) || ((asoc == NULL) && (inp->idata_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_IDATA; } if (((asoc != NULL) && (asoc->nrsack_supported == 1)) || ((asoc == NULL) && (inp->nrsack_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_NR_SELECTIVE_ACK; } if (((asoc != NULL) && (asoc->pktdrop_supported == 1)) || ((asoc == NULL) && (inp->pktdrop_supported == 1))) { pr_supported->chunk_types[num_ext++] = SCTP_PACKET_DROPPED; } if (num_ext > 0) { parameter_len = (uint16_t)sizeof(struct sctp_supported_chunk_types_param) + num_ext; pr_supported->ph.param_type = htons(SCTP_SUPPORTED_CHUNK_EXT); pr_supported->ph.param_length = htons(parameter_len); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } /* add authentication parameters */ if (((asoc != NULL) && (asoc->auth_supported == 1)) || ((asoc == NULL) && (inp->auth_supported == 1))) { struct sctp_auth_random *randp; struct sctp_auth_hmac_algo *hmacs; struct sctp_auth_chunk_list *chunks; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } /* generate and add RANDOM parameter */ randp = (struct sctp_auth_random *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)sizeof(struct sctp_auth_random) + SCTP_AUTH_RANDOM_SIZE_DEFAULT; randp->ph.param_type = htons(SCTP_RANDOM); randp->ph.param_length = htons(parameter_len); SCTP_READ_RANDOM(randp->random_data, SCTP_AUTH_RANDOM_SIZE_DEFAULT); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } /* add HMAC_ALGO parameter */ hmacs = (struct sctp_auth_hmac_algo *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)sizeof(struct sctp_auth_hmac_algo) + sctp_serialize_hmaclist(inp->sctp_ep.local_hmacs, (uint8_t *)hmacs->hmac_ids); hmacs->ph.param_type = htons(SCTP_HMAC_LIST); hmacs->ph.param_length = htons(parameter_len); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; padding_len = 0; } /* add CHUNKS parameter */ chunks = (struct sctp_auth_chunk_list *)(mtod(m, caddr_t)+chunk_len); parameter_len = (uint16_t)sizeof(struct sctp_auth_chunk_list) + sctp_serialize_auth_chunks(inp->sctp_ep.local_auth_chunks, chunks->chunk_types); chunks->ph.param_type = htons(SCTP_CHUNK_LIST); chunks->ph.param_length = htons(parameter_len); padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; } SCTP_BUF_LEN(m) = chunk_len; m_last = m; /* now the addresses */ /* * To optimize this we could put the scoping stuff into a structure * and remove the individual uint8's from the stc structure. Then we * could just sifa in the address within the stc.. but for now this * is a quick hack to get the address stuff teased apart. */ scp.ipv4_addr_legal = stc.ipv4_addr_legal; scp.ipv6_addr_legal = stc.ipv6_addr_legal; scp.loopback_scope = stc.loopback_scope; scp.ipv4_local_scope = stc.ipv4_scope; scp.local_scope = stc.local_scope; scp.site_scope = stc.site_scope; m_last = sctp_add_addresses_to_i_ia(inp, stcb, &scp, m_last, cnt_inits_to, &padding_len, &chunk_len); /* padding_len can only be positive, if no addresses have been added */ if (padding_len > 0) { memset(mtod(m, caddr_t)+chunk_len, 0, padding_len); chunk_len += padding_len; SCTP_BUF_LEN(m) += padding_len; padding_len = 0; } /* tack on the operational error if present */ if (op_err) { parameter_len = 0; for (m_tmp = op_err; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) { parameter_len += SCTP_BUF_LEN(m_tmp); } padding_len = SCTP_SIZE32(parameter_len) - parameter_len; SCTP_BUF_NEXT(m_last) = op_err; while (SCTP_BUF_NEXT(m_last) != NULL) { m_last = SCTP_BUF_NEXT(m_last); } chunk_len += parameter_len; } if (padding_len > 0) { m_last = sctp_add_pad_tombuf(m_last, padding_len); if (m_last == NULL) { /* Houston we have a problem, no space */ sctp_m_freem(m); return; } chunk_len += padding_len; padding_len = 0; } /* Now we must build a cookie */ m_cookie = sctp_add_cookie(init_pkt, offset, m, 0, &stc, &signature); if (m_cookie == NULL) { /* memory problem */ sctp_m_freem(m); return; } /* Now append the cookie to the end and update the space/size */ SCTP_BUF_NEXT(m_last) = m_cookie; parameter_len = 0; for (m_tmp = m_cookie; m_tmp != NULL; m_tmp = SCTP_BUF_NEXT(m_tmp)) { parameter_len += SCTP_BUF_LEN(m_tmp); if (SCTP_BUF_NEXT(m_tmp) == NULL) { m_last = m_tmp; } } padding_len = SCTP_SIZE32(parameter_len) - parameter_len; chunk_len += parameter_len; /* * Place in the size, but we don't include the last pad (if any) in * the INIT-ACK. */ initack->ch.chunk_length = htons(chunk_len); /* * Time to sign the cookie, we don't sign over the cookie signature * though thus we set trailer. */ (void)sctp_hmac_m(SCTP_HMAC, (uint8_t *)inp->sctp_ep.secret_key[(int)(inp->sctp_ep.current_secret_number)], SCTP_SECRET_SIZE, m_cookie, sizeof(struct sctp_paramhdr), (uint8_t *)signature, SCTP_SIGNATURE_SIZE); /* * We sifa 0 here to NOT set IP_DF if its IPv4, we ignore the return * here since the timer will drive a retranmission. */ if (padding_len > 0) { if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) { sctp_m_freem(m); return; } } if (stc.loopback_scope) { over_addr = (union sctp_sockstore *)dst; } else { over_addr = NULL; } if ((error = sctp_lowlevel_chunk_output(inp, NULL, NULL, to, m, 0, NULL, 0, 0, 0, 0, inp->sctp_lport, sh->src_port, init_chk->init.initiate_tag, port, over_addr, mflowtype, mflowid, SCTP_SO_NOT_LOCKED))) { SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak send error %d\n", error); if (error == ENOBUFS) { if (asoc != NULL) { asoc->ifp_had_enobuf = 1; } SCTP_STAT_INCR(sctps_lowlevelerr); } } else { if (asoc != NULL) { asoc->ifp_had_enobuf = 0; } } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } static void sctp_prune_prsctp(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_sndrcvinfo *srcv, int dataout) { int freed_spc = 0; struct sctp_tmit_chunk *chk, *nchk; SCTP_TCB_LOCK_ASSERT(stcb); if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) { TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { /* * Look for chunks marked with the PR_SCTP flag AND * the buffer space flag. If the one being sent is * equal or greater priority then purge the old one * and free some space. */ if (PR_SCTP_BUF_ENABLED(chk->flags)) { /* * This one is PR-SCTP AND buffer space * limited type */ if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) { /* * Lower numbers equates to higher * priority so if the one we are * looking at has a larger or equal * priority we want to drop the data * and NOT retransmit it. */ if (chk->data) { /* * We release the book_size * if the mbuf is here */ int ret_spc; uint8_t sent; if (chk->sent > SCTP_DATAGRAM_UNSENT) sent = 1; else sent = 0; ret_spc = sctp_release_pr_sctp_chunk(stcb, chk, sent, SCTP_SO_LOCKED); freed_spc += ret_spc; if (freed_spc >= dataout) { return; } } /* if chunk was present */ } /* if of sufficient priority */ } /* if chunk has enabled */ } /* tailqforeach */ TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { /* Here we must move to the sent queue and mark */ if (PR_SCTP_BUF_ENABLED(chk->flags)) { if (chk->rec.data.timetodrop.tv_sec >= (long)srcv->sinfo_timetolive) { if (chk->data) { /* * We release the book_size * if the mbuf is here */ int ret_spc; ret_spc = sctp_release_pr_sctp_chunk(stcb, chk, 0, SCTP_SO_LOCKED); freed_spc += ret_spc; if (freed_spc >= dataout) { return; } } /* end if chk->data */ } /* end if right class */ } /* end if chk pr-sctp */ } /* tailqforeachsafe (chk) */ } /* if enabled in asoc */ } int sctp_get_frag_point(struct sctp_tcb *stcb, struct sctp_association *asoc) { int siz, ovh; /* * For endpoints that have both v6 and v4 addresses we must reserve * room for the ipv6 header, for those that are only dealing with V4 * we use a larger frag point. */ if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { ovh = SCTP_MIN_OVERHEAD; } else { ovh = SCTP_MIN_V4_OVERHEAD; } ovh += SCTP_DATA_CHUNK_OVERHEAD(stcb); if (stcb->asoc.sctp_frag_point > asoc->smallest_mtu) siz = asoc->smallest_mtu - ovh; else siz = (stcb->asoc.sctp_frag_point - ovh); /* * if (siz > (MCLBYTES-sizeof(struct sctp_data_chunk))) { */ /* A data chunk MUST fit in a cluster */ /* siz = (MCLBYTES - sizeof(struct sctp_data_chunk)); */ /* } */ /* adjust for an AUTH chunk if DATA requires auth */ if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) siz -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); if (siz % 4) { /* make it an even word boundary please */ siz -= (siz % 4); } return (siz); } static void sctp_set_prsctp_policy(struct sctp_stream_queue_pending *sp) { /* * We assume that the user wants PR_SCTP_TTL if the user provides a * positive lifetime but does not specify any PR_SCTP policy. */ if (PR_SCTP_ENABLED(sp->sinfo_flags)) { sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags); } else if (sp->timetolive > 0) { sp->sinfo_flags |= SCTP_PR_SCTP_TTL; sp->act_flags |= PR_SCTP_POLICY(sp->sinfo_flags); } else { return; } switch (PR_SCTP_POLICY(sp->sinfo_flags)) { case CHUNK_FLAGS_PR_SCTP_BUF: /* * Time to live is a priority stored in tv_sec when doing * the buffer drop thing. */ sp->ts.tv_sec = sp->timetolive; sp->ts.tv_usec = 0; break; case CHUNK_FLAGS_PR_SCTP_TTL: { struct timeval tv; (void)SCTP_GETTIME_TIMEVAL(&sp->ts); tv.tv_sec = sp->timetolive / 1000; tv.tv_usec = (sp->timetolive * 1000) % 1000000; /* * TODO sctp_constants.h needs alternative time * macros when _KERNEL is undefined. */ timevaladd(&sp->ts, &tv); } break; case CHUNK_FLAGS_PR_SCTP_RTX: /* * Time to live is a the number or retransmissions stored in * tv_sec. */ sp->ts.tv_sec = sp->timetolive; sp->ts.tv_usec = 0; break; default: SCTPDBG(SCTP_DEBUG_USRREQ1, "Unknown PR_SCTP policy %u.\n", PR_SCTP_POLICY(sp->sinfo_flags)); break; } } static int sctp_msg_append(struct sctp_tcb *stcb, struct sctp_nets *net, struct mbuf *m, struct sctp_sndrcvinfo *srcv, int hold_stcb_lock) { int error = 0; struct mbuf *at; struct sctp_stream_queue_pending *sp = NULL; struct sctp_stream_out *strm; /* * Given an mbuf chain, put it into the association send queue and * place it on the wheel */ if (srcv->sinfo_stream >= stcb->asoc.streamoutcnt) { /* Invalid stream number */ SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_now; } if ((stcb->asoc.stream_locked) && (stcb->asoc.stream_locked_on != srcv->sinfo_stream)) { SCTP_LTRACE_ERR_RET_PKT(m, NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_now; } strm = &stcb->asoc.strmout[srcv->sinfo_stream]; /* Now can we send this? */ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) || (stcb->asoc.state & SCTP_STATE_SHUTDOWN_PENDING)) { /* got data while shutting down */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET); error = ECONNRESET; goto out_now; } sctp_alloc_a_strmoq(stcb, sp); if (sp == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); error = ENOMEM; goto out_now; } sp->sinfo_flags = srcv->sinfo_flags; sp->timetolive = srcv->sinfo_timetolive; sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; sp->fsn = 0; if (sp->sinfo_flags & SCTP_ADDR_OVER) { sp->net = net; atomic_add_int(&sp->net->ref_count, 1); } else { sp->net = NULL; } (void)SCTP_GETTIME_TIMEVAL(&sp->ts); sp->sid = srcv->sinfo_stream; sp->msg_is_complete = 1; sp->sender_all_done = 1; sp->some_taken = 0; sp->data = m; sp->tail_mbuf = NULL; sctp_set_prsctp_policy(sp); /* * We could in theory (for sendall) sifa the length in, but we would * still have to hunt through the chain since we need to setup the * tail_mbuf */ sp->length = 0; for (at = m; at; at = SCTP_BUF_NEXT(at)) { if (SCTP_BUF_NEXT(at) == NULL) sp->tail_mbuf = at; sp->length += SCTP_BUF_LEN(at); } if (srcv->sinfo_keynumber_valid) { sp->auth_keyid = srcv->sinfo_keynumber; } else { sp->auth_keyid = stcb->asoc.authinfo.active_keyid; } if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { sctp_auth_key_acquire(stcb, sp->auth_keyid); sp->holds_key_ref = 1; } if (hold_stcb_lock == 0) { SCTP_TCB_SEND_LOCK(stcb); } sctp_snd_sb_alloc(stcb, sp->length); atomic_add_int(&stcb->asoc.stream_queue_cnt, 1); TAILQ_INSERT_TAIL(&strm->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, &stcb->asoc, strm, sp, 1); m = NULL; if (hold_stcb_lock == 0) { SCTP_TCB_SEND_UNLOCK(stcb); } out_now: if (m) { sctp_m_freem(m); } return (error); } static struct mbuf * sctp_copy_mbufchain(struct mbuf *clonechain, struct mbuf *outchain, struct mbuf **endofchain, int can_take_mbuf, int sizeofcpy, uint8_t copy_by_ref) { struct mbuf *m; struct mbuf *appendchain; caddr_t cp; int len; if (endofchain == NULL) { /* error */ error_out: if (outchain) sctp_m_freem(outchain); return (NULL); } if (can_take_mbuf) { appendchain = clonechain; } else { if (!copy_by_ref && (sizeofcpy <= (int)((((SCTP_BASE_SYSCTL(sctp_mbuf_threshold_count) - 1) * MLEN) + MHLEN))) ) { /* Its not in a cluster */ if (*endofchain == NULL) { /* lets get a mbuf cluster */ if (outchain == NULL) { /* This is the general case */ new_mbuf: outchain = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER); if (outchain == NULL) { goto error_out; } SCTP_BUF_LEN(outchain) = 0; *endofchain = outchain; /* get the prepend space */ SCTP_BUF_RESV_UF(outchain, (SCTP_FIRST_MBUF_RESV + 4)); } else { /* * We really should not get a NULL * in endofchain */ /* find end */ m = outchain; while (m) { if (SCTP_BUF_NEXT(m) == NULL) { *endofchain = m; break; } m = SCTP_BUF_NEXT(m); } /* sanity */ if (*endofchain == NULL) { /* * huh, TSNH XXX maybe we * should panic */ sctp_m_freem(outchain); goto new_mbuf; } } /* get the new end of length */ len = (int)M_TRAILINGSPACE(*endofchain); } else { /* how much is left at the end? */ len = (int)M_TRAILINGSPACE(*endofchain); } /* Find the end of the data, for appending */ cp = (mtod((*endofchain), caddr_t)+SCTP_BUF_LEN((*endofchain))); /* Now lets copy it out */ if (len >= sizeofcpy) { /* It all fits, copy it in */ m_copydata(clonechain, 0, sizeofcpy, cp); SCTP_BUF_LEN((*endofchain)) += sizeofcpy; } else { /* fill up the end of the chain */ if (len > 0) { m_copydata(clonechain, 0, len, cp); SCTP_BUF_LEN((*endofchain)) += len; /* now we need another one */ sizeofcpy -= len; } m = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_HEADER); if (m == NULL) { /* We failed */ goto error_out; } SCTP_BUF_NEXT((*endofchain)) = m; *endofchain = m; cp = mtod((*endofchain), caddr_t); m_copydata(clonechain, len, sizeofcpy, cp); SCTP_BUF_LEN((*endofchain)) += sizeofcpy; } return (outchain); } else { /* copy the old fashion way */ appendchain = SCTP_M_COPYM(clonechain, 0, M_COPYALL, M_NOWAIT); #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(appendchain, SCTP_MBUF_ICOPY); } #endif } } if (appendchain == NULL) { /* error */ if (outchain) sctp_m_freem(outchain); return (NULL); } if (outchain) { /* tack on to the end */ if (*endofchain != NULL) { SCTP_BUF_NEXT(((*endofchain))) = appendchain; } else { m = outchain; while (m) { if (SCTP_BUF_NEXT(m) == NULL) { SCTP_BUF_NEXT(m) = appendchain; break; } m = SCTP_BUF_NEXT(m); } } /* * save off the end and update the end-chain position */ m = appendchain; while (m) { if (SCTP_BUF_NEXT(m) == NULL) { *endofchain = m; break; } m = SCTP_BUF_NEXT(m); } return (outchain); } else { /* save off the end and update the end-chain position */ m = appendchain; while (m) { if (SCTP_BUF_NEXT(m) == NULL) { *endofchain = m; break; } m = SCTP_BUF_NEXT(m); } return (appendchain); } } static int sctp_med_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_association *asoc, int *num_out, int *reason_code, int control_only, int from_where, struct timeval *now, int *now_filled, int frag_point, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ); static void sctp_sendall_iterator(struct sctp_inpcb *inp, struct sctp_tcb *stcb, void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_copy_all *ca; struct mbuf *m; int ret = 0; int added_control = 0; int un_sent, do_chunk_output = 1; struct sctp_association *asoc; struct sctp_nets *net; ca = (struct sctp_copy_all *)ptr; if (ca->m == NULL) { return; } if (ca->inp != inp) { /* TSNH */ return; } if (ca->sndlen > 0) { m = SCTP_M_COPYM(ca->m, 0, M_COPYALL, M_NOWAIT); if (m == NULL) { /* can't copy so we are done */ ca->cnt_failed++; return; } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(m, SCTP_MBUF_ICOPY); } #endif } else { m = NULL; } SCTP_TCB_LOCK_ASSERT(stcb); if (stcb->asoc.alternate) { net = stcb->asoc.alternate; } else { net = stcb->asoc.primary_destination; } if (ca->sndrcv.sinfo_flags & SCTP_ABORT) { /* Abort this assoc with m as the user defined reason */ if (m != NULL) { SCTP_BUF_PREPEND(m, sizeof(struct sctp_paramhdr), M_NOWAIT); } else { m = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_NOWAIT, 1, MT_DATA); SCTP_BUF_LEN(m) = sizeof(struct sctp_paramhdr); } if (m != NULL) { struct sctp_paramhdr *ph; ph = mtod(m, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons((uint16_t)(sizeof(struct sctp_paramhdr) + ca->sndlen)); } /* * We add one here to keep the assoc from dis-appearing on * us. */ atomic_add_int(&stcb->asoc.refcnt, 1); sctp_abort_an_association(inp, stcb, m, SCTP_SO_NOT_LOCKED); /* * sctp_abort_an_association calls sctp_free_asoc() free * association will NOT free it since we incremented the * refcnt .. we do this to prevent it being freed and things * getting tricky since we could end up (from free_asoc) * calling inpcb_free which would get a recursive lock call * to the iterator lock.. But as a consequence of that the * stcb will return to us un-locked.. since free_asoc * returns with either no TCB or the TCB unlocked, we must * relock.. to unlock in the iterator timer :-0 */ SCTP_TCB_LOCK(stcb); atomic_add_int(&stcb->asoc.refcnt, -1); goto no_chunk_output; } else { if (m) { ret = sctp_msg_append(stcb, net, m, &ca->sndrcv, 1); } asoc = &stcb->asoc; if (ca->sndrcv.sinfo_flags & SCTP_EOF) { /* shutdown this assoc */ if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && sctp_is_there_unsent_data(stcb, SCTP_SO_NOT_LOCKED) == 0) { if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { goto abort_anyway; } /* * there is nothing queued to send, so I'm * done... */ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { /* * only send SHUTDOWN the first time * through */ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT); sctp_stop_timers_for_shutdown(stcb); sctp_send_shutdown(stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, net); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); added_control = 1; do_chunk_output = 0; } } else { /* * we still got (or just got) data to send, * so set SHUTDOWN_PENDING */ /* * XXX sockets draft says that SCTP_EOF * should be sent with no data. currently, * we will allow user data to be sent first * and move to SHUTDOWN-PENDING */ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT); } SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING); if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; abort_anyway: snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); atomic_add_int(&stcb->asoc.refcnt, 1); sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_NOT_LOCKED); atomic_add_int(&stcb->asoc.refcnt, -1); goto no_chunk_output; } sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } } } } un_sent = ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) + (stcb->asoc.stream_queue_cnt * SCTP_DATA_CHUNK_OVERHEAD(stcb))); if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) && (stcb->asoc.total_flight > 0) && (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) { do_chunk_output = 0; } if (do_chunk_output) sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_NOT_LOCKED); else if (added_control) { int num_out, reason, now_filled = 0; struct timeval now; int frag_point; frag_point = sctp_get_frag_point(stcb, &stcb->asoc); (void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out, &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_NOT_LOCKED); } no_chunk_output: if (ret) { ca->cnt_failed++; } else { ca->cnt_sent++; } } static void sctp_sendall_completes(void *ptr, uint32_t val SCTP_UNUSED) { struct sctp_copy_all *ca; ca = (struct sctp_copy_all *)ptr; /* * Do a notify here? Kacheong suggests that the notify be done at * the send time.. so you would push up a notification if any send * failed. Don't know if this is feasible since the only failures we * have is "memory" related and if you cannot get an mbuf to send * the data you surely can't get an mbuf to send up to notify the * user you can't send the data :-> */ /* now free everything */ sctp_m_freem(ca->m); SCTP_FREE(ca, SCTP_M_COPYAL); } static struct mbuf * sctp_copy_out_all(struct uio *uio, int len) { struct mbuf *ret, *at; int left, willcpy, cancpy, error; ret = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_WAITOK, 1, MT_DATA); if (ret == NULL) { /* TSNH */ return (NULL); } left = len; SCTP_BUF_LEN(ret) = 0; /* save space for the data chunk header */ cancpy = (int)M_TRAILINGSPACE(ret); willcpy = min(cancpy, left); at = ret; while (left > 0) { /* Align data to the end */ error = uiomove(mtod(at, caddr_t), willcpy, uio); if (error) { err_out_now: sctp_m_freem(at); return (NULL); } SCTP_BUF_LEN(at) = willcpy; SCTP_BUF_NEXT_PKT(at) = SCTP_BUF_NEXT(at) = 0; left -= willcpy; if (left > 0) { SCTP_BUF_NEXT(at) = sctp_get_mbuf_for_msg(left, 0, M_WAITOK, 1, MT_DATA); if (SCTP_BUF_NEXT(at) == NULL) { goto err_out_now; } at = SCTP_BUF_NEXT(at); SCTP_BUF_LEN(at) = 0; cancpy = (int)M_TRAILINGSPACE(at); willcpy = min(cancpy, left); } } return (ret); } static int sctp_sendall(struct sctp_inpcb *inp, struct uio *uio, struct mbuf *m, struct sctp_sndrcvinfo *srcv) { int ret; struct sctp_copy_all *ca; SCTP_MALLOC(ca, struct sctp_copy_all *, sizeof(struct sctp_copy_all), SCTP_M_COPYAL); if (ca == NULL) { sctp_m_freem(m); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } memset(ca, 0, sizeof(struct sctp_copy_all)); ca->inp = inp; if (srcv) { memcpy(&ca->sndrcv, srcv, sizeof(struct sctp_nonpad_sndrcvinfo)); } /* * take off the sendall flag, it would be bad if we failed to do * this :-0 */ ca->sndrcv.sinfo_flags &= ~SCTP_SENDALL; /* get length and mbuf chain */ if (uio) { ca->sndlen = (int)uio->uio_resid; ca->m = sctp_copy_out_all(uio, ca->sndlen); if (ca->m == NULL) { SCTP_FREE(ca, SCTP_M_COPYAL); SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } } else { /* Gather the length of the send */ struct mbuf *mat; ca->sndlen = 0; for (mat = m; mat; mat = SCTP_BUF_NEXT(mat)) { ca->sndlen += SCTP_BUF_LEN(mat); } } ret = sctp_initiate_iterator(NULL, sctp_sendall_iterator, NULL, SCTP_PCB_ANY_FLAGS, SCTP_PCB_ANY_FEATURES, SCTP_ASOC_ANY_STATE, (void *)ca, 0, sctp_sendall_completes, inp, 1); if (ret) { SCTP_PRINTF("Failed to initiate iterator for sendall\n"); SCTP_FREE(ca, SCTP_M_COPYAL); SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT); return (EFAULT); } return (0); } void sctp_toss_old_cookies(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_tmit_chunk *chk, *nchk; TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) { TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt--; if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } } } void sctp_toss_old_asconf(struct sctp_tcb *stcb) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk, *nchk; struct sctp_asconf_chunk *acp; asoc = &stcb->asoc; TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { /* find SCTP_ASCONF chunk in queue */ if (chk->rec.chunk_id.id == SCTP_ASCONF) { if (chk->data) { acp = mtod(chk->data, struct sctp_asconf_chunk *); if (SCTP_TSN_GT(ntohl(acp->serial_number), asoc->asconf_seq_out_acked)) { /* Not Acked yet */ break; } } TAILQ_REMOVE(&asoc->asconf_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt--; if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); } } } static void sctp_clean_up_datalist(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_tmit_chunk **data_list, int bundle_at, struct sctp_nets *net) { int i; struct sctp_tmit_chunk *tp1; for (i = 0; i < bundle_at; i++) { /* off of the send queue */ TAILQ_REMOVE(&asoc->send_queue, data_list[i], sctp_next); asoc->send_queue_cnt--; if (i > 0) { /* * Any chunk NOT 0 you zap the time chunk 0 gets * zapped or set based on if a RTO measurment is * needed. */ data_list[i]->do_rtt = 0; } /* record time */ data_list[i]->sent_rcv_time = net->last_sent_time; data_list[i]->rec.data.cwnd_at_send = net->cwnd; data_list[i]->rec.data.fast_retran_tsn = data_list[i]->rec.data.tsn; if (data_list[i]->whoTo == NULL) { data_list[i]->whoTo = net; atomic_add_int(&net->ref_count, 1); } /* on to the sent queue */ tp1 = TAILQ_LAST(&asoc->sent_queue, sctpchunk_listhead); if ((tp1) && SCTP_TSN_GT(tp1->rec.data.tsn, data_list[i]->rec.data.tsn)) { struct sctp_tmit_chunk *tpp; /* need to move back */ back_up_more: tpp = TAILQ_PREV(tp1, sctpchunk_listhead, sctp_next); if (tpp == NULL) { TAILQ_INSERT_BEFORE(tp1, data_list[i], sctp_next); goto all_done; } tp1 = tpp; if (SCTP_TSN_GT(tp1->rec.data.tsn, data_list[i]->rec.data.tsn)) { goto back_up_more; } TAILQ_INSERT_AFTER(&asoc->sent_queue, tp1, data_list[i], sctp_next); } else { TAILQ_INSERT_TAIL(&asoc->sent_queue, data_list[i], sctp_next); } all_done: /* This does not lower until the cum-ack passes it */ asoc->sent_queue_cnt++; if ((asoc->peers_rwnd <= 0) && (asoc->total_flight == 0) && (bundle_at == 1)) { /* Mark the chunk as being a window probe */ SCTP_STAT_INCR(sctps_windowprobed); } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xC2, 3); #endif data_list[i]->sent = SCTP_DATAGRAM_SENT; data_list[i]->snd_count = 1; data_list[i]->rec.data.chunk_was_revoked = 0; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_UP, data_list[i]->whoTo->flight_size, data_list[i]->book_size, (uint32_t)(uintptr_t)data_list[i]->whoTo, data_list[i]->rec.data.tsn); } sctp_flight_size_increase(data_list[i]); sctp_total_flight_increase(stcb, data_list[i]); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd(SCTP_DECREASE_PEER_RWND, asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)); } asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd, (uint32_t)(data_list[i]->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))); if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } } if (asoc->cc_functions.sctp_cwnd_update_packet_transmitted) { (*asoc->cc_functions.sctp_cwnd_update_packet_transmitted) (stcb, net); } } static void sctp_clean_up_ctl(struct sctp_tcb *stcb, struct sctp_association *asoc, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct sctp_tmit_chunk *chk, *nchk; TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) || /* EY */ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) || (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) || (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) || (chk->rec.chunk_id.id == SCTP_SHUTDOWN) || (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) || (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) || (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) || (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) || (chk->rec.chunk_id.id == SCTP_ECN_CWR) || (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) { /* Stray chunks must be cleaned up */ clean_up_anyway: TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt--; if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) { asoc->fwd_tsn_cnt--; } sctp_free_a_chunk(stcb, chk, so_locked); } else if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) { /* special handling, we must look into the param */ if (chk != asoc->str_reset) { goto clean_up_anyway; } } } } static uint32_t sctp_can_we_split_this(struct sctp_tcb *stcb, uint32_t length, uint32_t space_left, uint32_t frag_point, int eeor_on) { /* * Make a decision on if I should split a msg into multiple parts. * This is only asked of incomplete messages. */ if (eeor_on) { /* * If we are doing EEOR we need to always send it if its the * entire thing, since it might be all the guy is putting in * the hopper. */ if (space_left >= length) { /*- * If we have data outstanding, * we get another chance when the sack * arrives to transmit - wait for more data */ if (stcb->asoc.total_flight == 0) { /* * If nothing is in flight, we zero the * packet counter. */ return (length); } return (0); } else { /* You can fill the rest */ return (space_left); } } /*- * For those strange folk that make the send buffer * smaller than our fragmentation point, we can't * get a full msg in so we have to allow splitting. */ if (SCTP_SB_LIMIT_SND(stcb->sctp_socket) < frag_point) { return (length); } if ((length <= space_left) || ((length - space_left) < SCTP_BASE_SYSCTL(sctp_min_residual))) { /* Sub-optimial residual don't split in non-eeor mode. */ return (0); } /* * If we reach here length is larger than the space_left. Do we wish * to split it for the sake of packet putting together? */ if (space_left >= min(SCTP_BASE_SYSCTL(sctp_min_split_point), frag_point)) { /* Its ok to split it */ return (min(space_left, frag_point)); } /* Nope, can't split */ return (0); } static uint32_t sctp_move_to_outqueue(struct sctp_tcb *stcb, struct sctp_stream_out *strq, uint32_t space_left, uint32_t frag_point, int *giveup, int eeor_mode, int *bail, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { /* Move from the stream to the send_queue keeping track of the total */ struct sctp_association *asoc; struct sctp_stream_queue_pending *sp; struct sctp_tmit_chunk *chk; struct sctp_data_chunk *dchkh = NULL; struct sctp_idata_chunk *ndchkh = NULL; uint32_t to_move, length; int leading; uint8_t rcv_flags = 0; uint8_t some_taken; uint8_t send_lock_up = 0; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; one_more_time: /* sa_ignore FREED_MEMORY */ sp = TAILQ_FIRST(&strq->outqueue); if (sp == NULL) { if (send_lock_up == 0) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } sp = TAILQ_FIRST(&strq->outqueue); if (sp) { goto one_more_time; } if ((sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_EXPLICIT_EOR) == 0) && (stcb->asoc.idata_supported == 0) && (strq->last_msg_incomplete)) { SCTP_PRINTF("Huh? Stream:%d lm_in_c=%d but queue is NULL\n", strq->sid, strq->last_msg_incomplete); strq->last_msg_incomplete = 0; } to_move = 0; if (send_lock_up) { SCTP_TCB_SEND_UNLOCK(stcb); send_lock_up = 0; } goto out_of; } if ((sp->msg_is_complete) && (sp->length == 0)) { if (sp->sender_all_done) { /* - * We are doing defered cleanup. Last time through + * We are doing deferred cleanup. Last time through * when we took all the data the sender_all_done was * not set. */ if ((sp->put_last_out == 0) && (sp->discard_rest == 0)) { SCTP_PRINTF("Gak, put out entire msg with NO end!-1\n"); SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n", sp->sender_all_done, sp->length, sp->msg_is_complete, sp->put_last_out, send_lock_up); } if ((TAILQ_NEXT(sp, next) == NULL) && (send_lock_up == 0)) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } atomic_subtract_int(&asoc->stream_queue_cnt, 1); TAILQ_REMOVE(&strq->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up); if ((strq->state == SCTP_STREAM_RESET_PENDING) && (strq->chunks_on_queues == 0) && TAILQ_EMPTY(&strq->outqueue)) { stcb->asoc.trigger_reset = 1; } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; } sctp_free_a_strmoq(stcb, sp, so_locked); /* we can't be locked to it */ if (send_lock_up) { SCTP_TCB_SEND_UNLOCK(stcb); send_lock_up = 0; } /* back to get the next msg */ goto one_more_time; } else { /* * sender just finished this but still holds a * reference */ *giveup = 1; to_move = 0; goto out_of; } } else { /* is there some to get */ if (sp->length == 0) { /* no */ *giveup = 1; to_move = 0; goto out_of; } else if (sp->discard_rest) { if (send_lock_up == 0) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } /* Whack down the size */ atomic_subtract_int(&stcb->asoc.total_output_queue_size, sp->length); if ((stcb->sctp_socket != NULL) && ((stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) || (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL))) { atomic_subtract_int(&stcb->sctp_socket->so_snd.sb_cc, sp->length); } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; sp->tail_mbuf = NULL; } sp->length = 0; sp->some_taken = 1; *giveup = 1; to_move = 0; goto out_of; } } some_taken = sp->some_taken; re_look: length = sp->length; if (sp->msg_is_complete) { /* The message is complete */ to_move = min(length, frag_point); if (to_move == length) { /* All of it fits in the MTU */ if (sp->some_taken) { rcv_flags |= SCTP_DATA_LAST_FRAG; } else { rcv_flags |= SCTP_DATA_NOT_FRAG; } sp->put_last_out = 1; if (sp->sinfo_flags & SCTP_SACK_IMMEDIATELY) { rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY; } } else { /* Not all of it fits, we fragment */ if (sp->some_taken == 0) { rcv_flags |= SCTP_DATA_FIRST_FRAG; } sp->some_taken = 1; } } else { to_move = sctp_can_we_split_this(stcb, length, space_left, frag_point, eeor_mode); if (to_move) { /*- * We use a snapshot of length in case it * is expanding during the compare. */ uint32_t llen; llen = length; if (to_move >= llen) { to_move = llen; if (send_lock_up == 0) { /*- * We are taking all of an incomplete msg * thus we need a send lock. */ SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; if (sp->msg_is_complete) { /* * the sender finished the * msg */ goto re_look; } } } if (sp->some_taken == 0) { rcv_flags |= SCTP_DATA_FIRST_FRAG; sp->some_taken = 1; } } else { /* Nothing to take. */ *giveup = 1; to_move = 0; goto out_of; } } /* If we reach here, we can copy out a chunk */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* No chunk memory */ *giveup = 1; to_move = 0; goto out_of; } /* * Setup for unordered if needed by looking at the user sent info * flags. */ if (sp->sinfo_flags & SCTP_UNORDERED) { rcv_flags |= SCTP_DATA_UNORDERED; } if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) && (sp->sinfo_flags & SCTP_EOF) == SCTP_EOF) { rcv_flags |= SCTP_DATA_SACK_IMMEDIATELY; } /* clear out the chunk before setting up */ memset(chk, 0, sizeof(*chk)); chk->rec.data.rcv_flags = rcv_flags; if (to_move >= length) { /* we think we can steal the whole thing */ if ((sp->sender_all_done == 0) && (send_lock_up == 0)) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } if (to_move < sp->length) { /* bail, it changed */ goto dont_do_it; } chk->data = sp->data; chk->last_mbuf = sp->tail_mbuf; /* register the stealing */ sp->data = sp->tail_mbuf = NULL; } else { struct mbuf *m; dont_do_it: chk->data = SCTP_M_COPYM(sp->data, 0, to_move, M_NOWAIT); chk->last_mbuf = NULL; if (chk->data == NULL) { sp->some_taken = some_taken; sctp_free_a_chunk(stcb, chk, so_locked); *bail = 1; to_move = 0; goto out_of; } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(chk->data, SCTP_MBUF_ICOPY); } #endif /* Pull off the data */ m_adj(sp->data, to_move); /* Now lets work our way down and compact it */ m = sp->data; while (m && (SCTP_BUF_LEN(m) == 0)) { sp->data = SCTP_BUF_NEXT(m); SCTP_BUF_NEXT(m) = NULL; if (sp->tail_mbuf == m) { /*- * Freeing tail? TSNH since * we supposedly were taking less * than the sp->length. */ #ifdef INVARIANTS panic("Huh, freing tail? - TSNH"); #else SCTP_PRINTF("Huh, freeing tail? - TSNH\n"); sp->tail_mbuf = sp->data = NULL; sp->length = 0; #endif } sctp_m_free(m); m = sp->data; } } if (SCTP_BUF_IS_EXTENDED(chk->data)) { chk->copy_by_ref = 1; } else { chk->copy_by_ref = 0; } /* * get last_mbuf and counts of mb usage This is ugly but hopefully * its only one mbuf. */ if (chk->last_mbuf == NULL) { chk->last_mbuf = chk->data; while (SCTP_BUF_NEXT(chk->last_mbuf) != NULL) { chk->last_mbuf = SCTP_BUF_NEXT(chk->last_mbuf); } } if (to_move > length) { /*- This should not happen either * since we always lower to_move to the size * of sp->length if its larger. */ #ifdef INVARIANTS panic("Huh, how can to_move be larger?"); #else SCTP_PRINTF("Huh, how can to_move be larger?\n"); sp->length = 0; #endif } else { atomic_subtract_int(&sp->length, to_move); } leading = SCTP_DATA_CHUNK_OVERHEAD(stcb); if (M_LEADINGSPACE(chk->data) < leading) { /* Not enough room for a chunk header, get some */ struct mbuf *m; m = sctp_get_mbuf_for_msg(1, 0, M_NOWAIT, 1, MT_DATA); if (m == NULL) { /* * we're in trouble here. _PREPEND below will free * all the data if there is no leading space, so we * must put the data back and restore. */ if (send_lock_up == 0) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } if (sp->data == NULL) { /* unsteal the data */ sp->data = chk->data; sp->tail_mbuf = chk->last_mbuf; } else { struct mbuf *m_tmp; /* reassemble the data */ m_tmp = sp->data; sp->data = chk->data; SCTP_BUF_NEXT(chk->last_mbuf) = m_tmp; } sp->some_taken = some_taken; atomic_add_int(&sp->length, to_move); chk->data = NULL; *bail = 1; sctp_free_a_chunk(stcb, chk, so_locked); to_move = 0; goto out_of; } else { SCTP_BUF_LEN(m) = 0; SCTP_BUF_NEXT(m) = chk->data; chk->data = m; M_ALIGN(chk->data, 4); } } SCTP_BUF_PREPEND(chk->data, SCTP_DATA_CHUNK_OVERHEAD(stcb), M_NOWAIT); if (chk->data == NULL) { /* HELP, TSNH since we assured it would not above? */ #ifdef INVARIANTS panic("prepend failes HELP?"); #else SCTP_PRINTF("prepend fails HELP?\n"); sctp_free_a_chunk(stcb, chk, so_locked); #endif *bail = 1; to_move = 0; goto out_of; } sctp_snd_sb_alloc(stcb, SCTP_DATA_CHUNK_OVERHEAD(stcb)); chk->book_size = chk->send_size = (uint16_t)(to_move + SCTP_DATA_CHUNK_OVERHEAD(stcb)); chk->book_size_scale = 0; chk->sent = SCTP_DATAGRAM_UNSENT; chk->flags = 0; chk->asoc = &stcb->asoc; chk->pad_inplace = 0; chk->no_fr_allowed = 0; if (stcb->asoc.idata_supported == 0) { if (rcv_flags & SCTP_DATA_UNORDERED) { /* Just use 0. The receiver ignores the values. */ chk->rec.data.mid = 0; } else { chk->rec.data.mid = strq->next_mid_ordered; if (rcv_flags & SCTP_DATA_LAST_FRAG) { strq->next_mid_ordered++; } } } else { if (rcv_flags & SCTP_DATA_UNORDERED) { chk->rec.data.mid = strq->next_mid_unordered; if (rcv_flags & SCTP_DATA_LAST_FRAG) { strq->next_mid_unordered++; } } else { chk->rec.data.mid = strq->next_mid_ordered; if (rcv_flags & SCTP_DATA_LAST_FRAG) { strq->next_mid_ordered++; } } } chk->rec.data.sid = sp->sid; chk->rec.data.ppid = sp->ppid; chk->rec.data.context = sp->context; chk->rec.data.doing_fast_retransmit = 0; chk->rec.data.timetodrop = sp->ts; chk->flags = sp->act_flags; if (sp->net) { chk->whoTo = sp->net; atomic_add_int(&chk->whoTo->ref_count, 1); } else chk->whoTo = NULL; if (sp->holds_key_ref) { chk->auth_keyid = sp->auth_keyid; sctp_auth_key_acquire(stcb, chk->auth_keyid); chk->holds_key_ref = 1; } chk->rec.data.tsn = atomic_fetchadd_int(&asoc->sending_seq, 1); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_AT_SEND_2_OUTQ) { sctp_misc_ints(SCTP_STRMOUT_LOG_SEND, (uint32_t)(uintptr_t)stcb, sp->length, (uint32_t)((chk->rec.data.sid << 16) | (0x0000ffff & chk->rec.data.mid)), chk->rec.data.tsn); } if (stcb->asoc.idata_supported == 0) { dchkh = mtod(chk->data, struct sctp_data_chunk *); } else { ndchkh = mtod(chk->data, struct sctp_idata_chunk *); } /* * Put the rest of the things in place now. Size was done earlier in * previous loop prior to padding. */ #ifdef SCTP_ASOCLOG_OF_TSNS SCTP_TCB_LOCK_ASSERT(stcb); if (asoc->tsn_out_at >= SCTP_TSN_LOG_SIZE) { asoc->tsn_out_at = 0; asoc->tsn_out_wrapped = 1; } asoc->out_tsnlog[asoc->tsn_out_at].tsn = chk->rec.data.tsn; asoc->out_tsnlog[asoc->tsn_out_at].strm = chk->rec.data.sid; asoc->out_tsnlog[asoc->tsn_out_at].seq = chk->rec.data.mid; asoc->out_tsnlog[asoc->tsn_out_at].sz = chk->send_size; asoc->out_tsnlog[asoc->tsn_out_at].flgs = chk->rec.data.rcv_flags; asoc->out_tsnlog[asoc->tsn_out_at].stcb = (void *)stcb; asoc->out_tsnlog[asoc->tsn_out_at].in_pos = asoc->tsn_out_at; asoc->out_tsnlog[asoc->tsn_out_at].in_out = 2; asoc->tsn_out_at++; #endif if (stcb->asoc.idata_supported == 0) { dchkh->ch.chunk_type = SCTP_DATA; dchkh->ch.chunk_flags = chk->rec.data.rcv_flags; dchkh->dp.tsn = htonl(chk->rec.data.tsn); dchkh->dp.sid = htons(strq->sid); dchkh->dp.ssn = htons((uint16_t)chk->rec.data.mid); dchkh->dp.ppid = chk->rec.data.ppid; dchkh->ch.chunk_length = htons(chk->send_size); } else { ndchkh->ch.chunk_type = SCTP_IDATA; ndchkh->ch.chunk_flags = chk->rec.data.rcv_flags; ndchkh->dp.tsn = htonl(chk->rec.data.tsn); ndchkh->dp.sid = htons(strq->sid); ndchkh->dp.reserved = htons(0); ndchkh->dp.mid = htonl(chk->rec.data.mid); if (sp->fsn == 0) ndchkh->dp.ppid_fsn.ppid = chk->rec.data.ppid; else ndchkh->dp.ppid_fsn.fsn = htonl(sp->fsn); sp->fsn++; ndchkh->ch.chunk_length = htons(chk->send_size); } /* Now advance the chk->send_size by the actual pad needed. */ if (chk->send_size < SCTP_SIZE32(chk->book_size)) { /* need a pad */ struct mbuf *lm; int pads; pads = SCTP_SIZE32(chk->book_size) - chk->send_size; lm = sctp_pad_lastmbuf(chk->data, pads, chk->last_mbuf); if (lm != NULL) { chk->last_mbuf = lm; chk->pad_inplace = 1; } chk->send_size += pads; } if (PR_SCTP_ENABLED(chk->flags)) { asoc->pr_sctp_cnt++; } if (sp->msg_is_complete && (sp->length == 0) && (sp->sender_all_done)) { /* All done pull and kill the message */ if (sp->put_last_out == 0) { SCTP_PRINTF("Gak, put out entire msg with NO end!-2\n"); SCTP_PRINTF("sender_done:%d len:%d msg_comp:%d put_last_out:%d send_lock:%d\n", sp->sender_all_done, sp->length, sp->msg_is_complete, sp->put_last_out, send_lock_up); } if ((send_lock_up == 0) && (TAILQ_NEXT(sp, next) == NULL)) { SCTP_TCB_SEND_LOCK(stcb); send_lock_up = 1; } atomic_subtract_int(&asoc->stream_queue_cnt, 1); TAILQ_REMOVE(&strq->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_remove_from_stream(stcb, asoc, strq, sp, send_lock_up); if ((strq->state == SCTP_STREAM_RESET_PENDING) && (strq->chunks_on_queues == 0) && TAILQ_EMPTY(&strq->outqueue)) { stcb->asoc.trigger_reset = 1; } if (sp->net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } if (sp->data) { sctp_m_freem(sp->data); sp->data = NULL; } sctp_free_a_strmoq(stcb, sp, so_locked); } asoc->chunks_on_out_queue++; strq->chunks_on_queues++; TAILQ_INSERT_TAIL(&asoc->send_queue, chk, sctp_next); asoc->send_queue_cnt++; out_of: if (send_lock_up) { SCTP_TCB_SEND_UNLOCK(stcb); } return (to_move); } static void sctp_fill_outqueue(struct sctp_tcb *stcb, struct sctp_nets *net, int frag_point, int eeor_mode, int *quit_now, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct sctp_association *asoc; struct sctp_stream_out *strq; uint32_t space_left, moved, total_moved; int bail, giveup; SCTP_TCB_LOCK_ASSERT(stcb); asoc = &stcb->asoc; total_moved = 0; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: space_left = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: space_left = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ space_left = net->mtu; break; } /* Need an allowance for the data chunk header too */ space_left -= SCTP_DATA_CHUNK_OVERHEAD(stcb); /* must make even word boundary */ space_left &= 0xfffffffc; strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); giveup = 0; bail = 0; while ((space_left > 0) && (strq != NULL)) { moved = sctp_move_to_outqueue(stcb, strq, space_left, frag_point, &giveup, eeor_mode, &bail, so_locked); stcb->asoc.ss_functions.sctp_ss_scheduled(stcb, net, asoc, strq, moved); if ((giveup != 0) || (bail != 0)) { break; } strq = stcb->asoc.ss_functions.sctp_ss_select_stream(stcb, net, asoc); total_moved += moved; space_left -= moved; if (space_left >= SCTP_DATA_CHUNK_OVERHEAD(stcb)) { space_left -= SCTP_DATA_CHUNK_OVERHEAD(stcb); } else { space_left = 0; } space_left &= 0xfffffffc; } if (bail != 0) *quit_now = 1; stcb->asoc.ss_functions.sctp_ss_packet_done(stcb, net, asoc); if (total_moved == 0) { if ((stcb->asoc.sctp_cmt_on_off == 0) && (net == stcb->asoc.primary_destination)) { /* ran dry for primary network net */ SCTP_STAT_INCR(sctps_primary_randry); } else if (stcb->asoc.sctp_cmt_on_off > 0) { /* ran dry with CMT on */ SCTP_STAT_INCR(sctps_cmt_randry); } } } void sctp_fix_ecn_echo(struct sctp_association *asoc) { struct sctp_tmit_chunk *chk; TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) { chk->sent = SCTP_DATAGRAM_UNSENT; } } } void sctp_move_chunks_from_net(struct sctp_tcb *stcb, struct sctp_nets *net) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk; struct sctp_stream_queue_pending *sp; unsigned int i; if (net == NULL) { return; } asoc = &stcb->asoc; for (i = 0; i < stcb->asoc.streamoutcnt; i++) { TAILQ_FOREACH(sp, &stcb->asoc.strmout[i].outqueue, next) { if (sp->net == net) { sctp_free_remote_addr(sp->net); sp->net = NULL; } } } TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) { if (chk->whoTo == net) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = NULL; } } } int sctp_med_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_association *asoc, int *num_out, int *reason_code, int control_only, int from_where, struct timeval *now, int *now_filled, int frag_point, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { /** * Ok this is the generic chunk service queue. we must do the * following: * - Service the stream queue that is next, moving any * message (note I must get a complete message i.e. FIRST/MIDDLE and * LAST to the out queue in one pass) and assigning TSN's. This * only applys though if the peer does not support NDATA. For NDATA * chunks its ok to not send the entire message ;-) * - Check to see if the cwnd/rwnd allows any output, if so we go ahead and * fomulate and send the low level chunks. Making sure to combine * any control in the control chunk queue also. */ struct sctp_nets *net, *start_at, *sack_goes_to = NULL, *old_start_at = NULL; struct mbuf *outchain, *endoutchain; struct sctp_tmit_chunk *chk, *nchk; /* temp arrays for unlinking */ struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING]; int no_fragmentflg, error; unsigned int max_rwnd_per_dest, max_send_per_dest; int one_chunk, hbflag, skip_data_for_this_net; int asconf, cookie, no_out_cnt; int bundle_at, ctl_cnt, no_data_chunks, eeor_mode; unsigned int mtu, r_mtu, omtu, mx_mtu, to_out; int tsns_sent = 0; uint32_t auth_offset = 0; struct sctp_auth_chunk *auth = NULL; uint16_t auth_keyid; int override_ok = 1; int skip_fill_up = 0; int data_auth_reqd = 0; /* * JRS 5/14/07 - Add flag for whether a heartbeat is sent to the * destination. */ int quit_now = 0; *num_out = 0; *reason_code = 0; auth_keyid = stcb->asoc.authinfo.active_keyid; if ((asoc->state & SCTP_STATE_SHUTDOWN_PENDING) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) || (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) { eeor_mode = 1; } else { eeor_mode = 0; } ctl_cnt = no_out_cnt = asconf = cookie = 0; /* * First lets prime the pump. For each destination, if there is room * in the flight size, attempt to pull an MTU's worth out of the * stream queues into the general send_queue */ #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xC2, 2); #endif SCTP_TCB_LOCK_ASSERT(stcb); hbflag = 0; if (control_only) no_data_chunks = 1; else no_data_chunks = 0; /* Nothing to possible to send? */ if ((TAILQ_EMPTY(&asoc->control_send_queue) || (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) && TAILQ_EMPTY(&asoc->asconf_send_queue) && TAILQ_EMPTY(&asoc->send_queue) && sctp_is_there_unsent_data(stcb, so_locked) == 0) { nothing_to_send: *reason_code = 9; return (0); } if (asoc->peers_rwnd == 0) { /* No room in peers rwnd */ *reason_code = 1; if (asoc->total_flight > 0) { /* we are allowed one chunk in flight */ no_data_chunks = 1; } } if (stcb->asoc.ecn_echo_cnt_onq) { /* Record where a sack goes, if any */ if (no_data_chunks && (asoc->ctrl_queue_cnt == stcb->asoc.ecn_echo_cnt_onq)) { /* Nothing but ECNe to send - we don't do that */ goto nothing_to_send; } TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) { sack_goes_to = chk->whoTo; break; } } } max_rwnd_per_dest = ((asoc->peers_rwnd + asoc->total_flight) / asoc->numnets); if (stcb->sctp_socket) max_send_per_dest = SCTP_SB_LIMIT_SND(stcb->sctp_socket) / asoc->numnets; else max_send_per_dest = 0; if (no_data_chunks == 0) { /* How many non-directed chunks are there? */ TAILQ_FOREACH(chk, &asoc->send_queue, sctp_next) { if (chk->whoTo == NULL) { /* * We already have non-directed chunks on * the queue, no need to do a fill-up. */ skip_fill_up = 1; break; } } } if ((no_data_chunks == 0) && (skip_fill_up == 0) && (!stcb->asoc.ss_functions.sctp_ss_is_empty(stcb, asoc))) { TAILQ_FOREACH(net, &asoc->nets, sctp_next) { /* * This for loop we are in takes in each net, if * its's got space in cwnd and has data sent to it * (when CMT is off) then it calls * sctp_fill_outqueue for the net. This gets data on * the send queue for that network. * * In sctp_fill_outqueue TSN's are assigned and data * is copied out of the stream buffers. Note mostly * copy by reference (we hope). */ net->window_probe = 0; if ((net != stcb->asoc.alternate) && ((net->dest_state & SCTP_ADDR_PF) || (!(net->dest_state & SCTP_ADDR_REACHABLE)) || (net->dest_state & SCTP_ADDR_UNCONFIRMED))) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 1, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } continue; } if ((stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) && (net->flight_size == 0)) { (*stcb->asoc.cc_functions.sctp_cwnd_new_transmission_begins) (stcb, net); } if (net->flight_size >= net->cwnd) { /* skip this network, no room - can't fill */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 3, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } continue; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, 4, SCTP_CWND_LOG_FILL_OUTQ_CALLED); } sctp_fill_outqueue(stcb, net, frag_point, eeor_mode, &quit_now, so_locked); if (quit_now) { /* memory alloc failure */ no_data_chunks = 1; break; } } } /* now service each destination and send out what we can for it */ /* Nothing to send? */ if (TAILQ_EMPTY(&asoc->control_send_queue) && TAILQ_EMPTY(&asoc->asconf_send_queue) && TAILQ_EMPTY(&asoc->send_queue)) { *reason_code = 8; return (0); } if (asoc->sctp_cmt_on_off > 0) { /* get the last start point */ start_at = asoc->last_net_cmt_send_started; if (start_at == NULL) { /* null so to beginning */ start_at = TAILQ_FIRST(&asoc->nets); } else { start_at = TAILQ_NEXT(asoc->last_net_cmt_send_started, sctp_next); if (start_at == NULL) { start_at = TAILQ_FIRST(&asoc->nets); } } asoc->last_net_cmt_send_started = start_at; } else { start_at = TAILQ_FIRST(&asoc->nets); } TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if (chk->whoTo == NULL) { if (asoc->alternate) { chk->whoTo = asoc->alternate; } else { chk->whoTo = asoc->primary_destination; } atomic_add_int(&chk->whoTo->ref_count, 1); } } old_start_at = NULL; again_one_more_time: for (net = start_at; net != NULL; net = TAILQ_NEXT(net, sctp_next)) { /* how much can we send? */ /* SCTPDBG("Examine for sending net:%x\n", (uint32_t)net); */ if (old_start_at && (old_start_at == net)) { /* through list ocmpletely. */ break; } tsns_sent = 0xa; if (TAILQ_EMPTY(&asoc->control_send_queue) && TAILQ_EMPTY(&asoc->asconf_send_queue) && (net->flight_size >= net->cwnd)) { /* * Nothing on control or asconf and flight is full, * we can skip even in the CMT case. */ continue; } bundle_at = 0; endoutchain = outchain = NULL; no_fragmentflg = 1; one_chunk = 0; if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { skip_data_for_this_net = 1; } else { skip_data_for_this_net = 0; } switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { #ifdef INET case AF_INET: mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: mtu = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ mtu = net->mtu; break; } mx_mtu = mtu; to_out = 0; if (mtu > asoc->peers_rwnd) { if (asoc->total_flight > 0) { /* We have a packet in flight somewhere */ r_mtu = asoc->peers_rwnd; } else { /* We are always allowed to send one MTU out */ one_chunk = 1; r_mtu = mtu; } } else { r_mtu = mtu; } error = 0; /************************/ /* ASCONF transmission */ /************************/ /* Now first lets go through the asconf queue */ TAILQ_FOREACH_SAFE(chk, &asoc->asconf_send_queue, sctp_next, nchk) { if (chk->rec.chunk_id.id != SCTP_ASCONF) { continue; } if (chk->whoTo == NULL) { if (asoc->alternate == NULL) { if (asoc->primary_destination != net) { break; } } else { if (asoc->alternate != net) { break; } } } else { if (chk->whoTo != net) { break; } } if (chk->data == NULL) { break; } if (chk->sent != SCTP_DATAGRAM_UNSENT && chk->sent != SCTP_DATAGRAM_RESEND) { break; } /* * if no AUTH is yet included and this chunk * requires it, make sure to account for it. We * don't apply the size until the AUTH chunk is * actually added below in case there is no room for * this chunk. NOTE: we overload the use of "omtu" * here */ if ((auth == NULL) && sctp_auth_is_required_chunk(chk->rec.chunk_id.id, stcb->asoc.peer_auth_chunks)) { omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } else omtu = 0; /* Here we do NOT factor the r_mtu */ if ((chk->send_size < (int)(mtu - omtu)) || (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) { /* * We probably should glom the mbuf chain * from the chk->data for control but the * problem is it becomes yet one more level * of tracking to do if for some reason * output fails. Then I have got to * reconstruct the merged control chain.. el * yucko.. for now we take the easy way and * do the copy */ /* * Add an AUTH chunk, if chunk requires it * save the offset into the chain for AUTH */ if ((auth == NULL) && (sctp_auth_is_required_chunk(chk->rec.chunk_id.id, stcb->asoc.peer_auth_chunks))) { outchain = sctp_add_auth_chunk(outchain, &endoutchain, &auth, &auth_offset, stcb, chk->rec.chunk_id.id); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, (int)chk->rec.chunk_id.can_take_data, chk->send_size, chk->copy_by_ref); if (outchain == NULL) { *reason_code = 8; SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); /* update our MTU size */ if (mtu > (chk->send_size + omtu)) mtu -= (chk->send_size + omtu); else mtu = 0; to_out += (chk->send_size + omtu); /* Do clear IP_DF ? */ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) { no_fragmentflg = 0; } if (chk->rec.chunk_id.can_take_data) chk->data = NULL; /* * set hb flag since we can use these for * RTO */ hbflag = 1; asconf = 1; /* * should sysctl this: don't bundle data * with ASCONF since it requires AUTH */ no_data_chunks = 1; chk->sent = SCTP_DATAGRAM_SENT; if (chk->whoTo == NULL) { chk->whoTo = net; atomic_add_int(&net->ref_count, 1); } chk->snd_count++; if (mtu == 0) { /* * Ok we are out of room but we can * output without effecting the * flight size since this little guy * is a control only packet. */ sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net); /* * do NOT clear the asconf flag as * it is used to do appropriate * source address selection. */ if (*now_filled == 0) { (void)SCTP_GETTIME_TIMEVAL(now); *now_filled = 1; } net->last_sent_time = *now; hbflag = 0; if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, outchain, auth_offset, auth, stcb->asoc.authinfo.active_keyid, no_fragmentflg, 0, asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), net->port, NULL, 0, 0, so_locked))) { /* * error, we could not * output */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (from_where == 0) { SCTP_STAT_INCR(sctps_lowlevelerrusr); } if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } /* error, could not output */ if (error == EHOSTUNREACH) { /* * Destination went * unreachable * during this send */ sctp_move_chunks_from_net(stcb, net); } *reason_code = 7; break; } else { asoc->ifp_had_enobuf = 0; } /* * increase the number we sent, if a * cookie is sent we don't tell them * any was sent out. */ outchain = endoutchain = NULL; auth = NULL; auth_offset = 0; if (!no_out_cnt) *num_out += ctl_cnt; /* recalc a clean slate and setup */ switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: mtu = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ mtu = net->mtu; break; } to_out = 0; no_fragmentflg = 1; } } } if (error != 0) { /* try next net */ continue; } /************************/ /* Control transmission */ /************************/ /* Now first lets go through the control queue */ TAILQ_FOREACH_SAFE(chk, &asoc->control_send_queue, sctp_next, nchk) { if ((sack_goes_to) && (chk->rec.chunk_id.id == SCTP_ECN_ECHO) && (chk->whoTo != sack_goes_to)) { /* * if we have a sack in queue, and we are * looking at an ecn echo that is NOT queued * to where the sack is going.. */ if (chk->whoTo == net) { /* * Don't transmit it to where its * going (current net) */ continue; } else if (sack_goes_to == net) { /* * But do transmit it to this * address */ goto skip_net_check; } } if (chk->whoTo == NULL) { if (asoc->alternate == NULL) { if (asoc->primary_destination != net) { continue; } } else { if (asoc->alternate != net) { continue; } } } else { if (chk->whoTo != net) { continue; } } skip_net_check: if (chk->data == NULL) { continue; } if (chk->sent != SCTP_DATAGRAM_UNSENT) { /* * It must be unsent. Cookies and ASCONF's * hang around but there timers will force * when marked for resend. */ continue; } /* * if no AUTH is yet included and this chunk * requires it, make sure to account for it. We * don't apply the size until the AUTH chunk is * actually added below in case there is no room for * this chunk. NOTE: we overload the use of "omtu" * here */ if ((auth == NULL) && sctp_auth_is_required_chunk(chk->rec.chunk_id.id, stcb->asoc.peer_auth_chunks)) { omtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } else omtu = 0; /* Here we do NOT factor the r_mtu */ if ((chk->send_size <= (int)(mtu - omtu)) || (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) { /* * We probably should glom the mbuf chain * from the chk->data for control but the * problem is it becomes yet one more level * of tracking to do if for some reason * output fails. Then I have got to * reconstruct the merged control chain.. el * yucko.. for now we take the easy way and * do the copy */ /* * Add an AUTH chunk, if chunk requires it * save the offset into the chain for AUTH */ if ((auth == NULL) && (sctp_auth_is_required_chunk(chk->rec.chunk_id.id, stcb->asoc.peer_auth_chunks))) { outchain = sctp_add_auth_chunk(outchain, &endoutchain, &auth, &auth_offset, stcb, chk->rec.chunk_id.id); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, (int)chk->rec.chunk_id.can_take_data, chk->send_size, chk->copy_by_ref); if (outchain == NULL) { *reason_code = 8; SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); /* update our MTU size */ if (mtu > (chk->send_size + omtu)) mtu -= (chk->send_size + omtu); else mtu = 0; to_out += (chk->send_size + omtu); /* Do clear IP_DF ? */ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) { no_fragmentflg = 0; } if (chk->rec.chunk_id.can_take_data) chk->data = NULL; /* Mark things to be removed, if needed */ if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK) || /* EY */ (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) || (chk->rec.chunk_id.id == SCTP_HEARTBEAT_ACK) || (chk->rec.chunk_id.id == SCTP_SHUTDOWN) || (chk->rec.chunk_id.id == SCTP_SHUTDOWN_ACK) || (chk->rec.chunk_id.id == SCTP_OPERATION_ERROR) || (chk->rec.chunk_id.id == SCTP_COOKIE_ACK) || (chk->rec.chunk_id.id == SCTP_ECN_CWR) || (chk->rec.chunk_id.id == SCTP_PACKET_DROPPED) || (chk->rec.chunk_id.id == SCTP_ASCONF_ACK)) { if (chk->rec.chunk_id.id == SCTP_HEARTBEAT_REQUEST) { hbflag = 1; } /* remove these chunks at the end */ if ((chk->rec.chunk_id.id == SCTP_SELECTIVE_ACK) || (chk->rec.chunk_id.id == SCTP_NR_SELECTIVE_ACK)) { /* turn off the timer */ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { sctp_timer_stop(SCTP_TIMER_TYPE_RECV, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_1); } } ctl_cnt++; } else { /* * Other chunks, since they have * timers running (i.e. COOKIE) we * just "trust" that it gets sent or * retransmitted. */ ctl_cnt++; if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) { cookie = 1; no_out_cnt = 1; } else if (chk->rec.chunk_id.id == SCTP_ECN_ECHO) { /* * Increment ecne send count * here this means we may be * over-zealous in our * counting if the send * fails, but its the best * place to do it (we used * to do it in the queue of * the chunk, but that did * not tell how many times * it was sent. */ SCTP_STAT_INCR(sctps_sendecne); } chk->sent = SCTP_DATAGRAM_SENT; if (chk->whoTo == NULL) { chk->whoTo = net; atomic_add_int(&net->ref_count, 1); } chk->snd_count++; } if (mtu == 0) { /* * Ok we are out of room but we can * output without effecting the * flight size since this little guy * is a control only packet. */ if (asconf) { sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net); /* * do NOT clear the asconf * flag as it is used to do * appropriate source * address selection. */ } if (cookie) { sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net); cookie = 0; } /* Only HB or ASCONF advances time */ if (hbflag) { if (*now_filled == 0) { (void)SCTP_GETTIME_TIMEVAL(now); *now_filled = 1; } net->last_sent_time = *now; hbflag = 0; } if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, outchain, auth_offset, auth, stcb->asoc.authinfo.active_keyid, no_fragmentflg, 0, asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), net->port, NULL, 0, 0, so_locked))) { /* * error, we could not * output */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (from_where == 0) { SCTP_STAT_INCR(sctps_lowlevelerrusr); } if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } if (error == EHOSTUNREACH) { /* * Destination went * unreachable * during this send */ sctp_move_chunks_from_net(stcb, net); } *reason_code = 7; break; } else { asoc->ifp_had_enobuf = 0; } /* * increase the number we sent, if a * cookie is sent we don't tell them * any was sent out. */ outchain = endoutchain = NULL; auth = NULL; auth_offset = 0; if (!no_out_cnt) *num_out += ctl_cnt; /* recalc a clean slate and setup */ switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: mtu = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ mtu = net->mtu; break; } to_out = 0; no_fragmentflg = 1; } } } if (error != 0) { /* try next net */ continue; } /* JRI: if dest is in PF state, do not send data to it */ if ((asoc->sctp_cmt_on_off > 0) && (net != stcb->asoc.alternate) && (net->dest_state & SCTP_ADDR_PF)) { goto no_data_fill; } if (net->flight_size >= net->cwnd) { goto no_data_fill; } if ((asoc->sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_RECV_BUFFER_SPLITTING) && (net->flight_size > max_rwnd_per_dest)) { goto no_data_fill; } /* * We need a specific accounting for the usage of the send * buffer. We also need to check the number of messages per * net. For now, this is better than nothing and it disabled * by default... */ if ((asoc->sctp_cmt_on_off > 0) && (SCTP_BASE_SYSCTL(sctp_buffer_splitting) & SCTP_SEND_BUFFER_SPLITTING) && (max_send_per_dest > 0) && (net->flight_size > max_send_per_dest)) { goto no_data_fill; } /*********************/ /* Data transmission */ /*********************/ /* * if AUTH for DATA is required and no AUTH has been added * yet, account for this in the mtu now... if no data can be * bundled, this adjustment won't matter anyways since the * packet will be going out... */ data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks); if (data_auth_reqd && (auth == NULL)) { mtu -= sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } /* now lets add any data within the MTU constraints */ switch (((struct sockaddr *)&net->ro._l_addr)->sa_family) { #ifdef INET case AF_INET: if (net->mtu > SCTP_MIN_V4_OVERHEAD) omtu = net->mtu - SCTP_MIN_V4_OVERHEAD; else omtu = 0; break; #endif #ifdef INET6 case AF_INET6: if (net->mtu > SCTP_MIN_OVERHEAD) omtu = net->mtu - SCTP_MIN_OVERHEAD; else omtu = 0; break; #endif default: /* TSNH */ omtu = 0; break; } if ((((SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED)) && (skip_data_for_this_net == 0)) || (cookie)) { TAILQ_FOREACH_SAFE(chk, &asoc->send_queue, sctp_next, nchk) { if (no_data_chunks) { /* let only control go out */ *reason_code = 1; break; } if (net->flight_size >= net->cwnd) { /* skip this net, no room for data */ *reason_code = 2; break; } if ((chk->whoTo != NULL) && (chk->whoTo != net)) { /* Don't send the chunk on this net */ continue; } if (asoc->sctp_cmt_on_off == 0) { if ((asoc->alternate) && (asoc->alternate != net) && (chk->whoTo == NULL)) { continue; } else if ((net != asoc->primary_destination) && (asoc->alternate == NULL) && (chk->whoTo == NULL)) { continue; } } if ((chk->send_size > omtu) && ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) == 0)) { /*- * strange, we have a chunk that is * to big for its destination and * yet no fragment ok flag. * Something went wrong when the * PMTU changed...we did not mark * this chunk for some reason?? I * will fix it here by letting IP * fragment it for now and printing * a warning. This really should not * happen ... */ SCTP_PRINTF("Warning chunk of %d bytes > mtu:%d and yet PMTU disc missed\n", chk->send_size, mtu); chk->flags |= CHUNK_FLAGS_FRAGMENT_OK; } if (SCTP_BASE_SYSCTL(sctp_enable_sack_immediately) && (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) { struct sctp_data_chunk *dchkh; dchkh = mtod(chk->data, struct sctp_data_chunk *); dchkh->ch.chunk_flags |= SCTP_DATA_SACK_IMMEDIATELY; } if (((chk->send_size <= mtu) && (chk->send_size <= r_mtu)) || ((chk->flags & CHUNK_FLAGS_FRAGMENT_OK) && (chk->send_size <= asoc->peers_rwnd))) { /* ok we will add this one */ /* * Add an AUTH chunk, if chunk * requires it, save the offset into * the chain for AUTH */ if (data_auth_reqd) { if (auth == NULL) { outchain = sctp_add_auth_chunk(outchain, &endoutchain, &auth, &auth_offset, stcb, SCTP_DATA); auth_keyid = chk->auth_keyid; override_ok = 0; SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } else if (override_ok) { /* * use this data's * keyid */ auth_keyid = chk->auth_keyid; override_ok = 0; } else if (auth_keyid != chk->auth_keyid) { /* * different keyid, * so done bundling */ break; } } outchain = sctp_copy_mbufchain(chk->data, outchain, &endoutchain, 0, chk->send_size, chk->copy_by_ref); if (outchain == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "No memory?\n"); if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); } *reason_code = 3; SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } /* upate our MTU size */ /* Do clear IP_DF ? */ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) { no_fragmentflg = 0; } /* unsigned subtraction of mtu */ if (mtu > chk->send_size) mtu -= chk->send_size; else mtu = 0; /* unsigned subtraction of r_mtu */ if (r_mtu > chk->send_size) r_mtu -= chk->send_size; else r_mtu = 0; to_out += chk->send_size; if ((to_out > mx_mtu) && no_fragmentflg) { #ifdef INVARIANTS panic("Exceeding mtu of %d out size is %d", mx_mtu, to_out); #else SCTP_PRINTF("Exceeding mtu of %d out size is %d\n", mx_mtu, to_out); #endif } chk->window_probe = 0; data_list[bundle_at++] = chk; if (bundle_at >= SCTP_MAX_DATA_BUNDLING) { break; } if (chk->sent == SCTP_DATAGRAM_UNSENT) { if ((chk->rec.data.rcv_flags & SCTP_DATA_UNORDERED) == 0) { SCTP_STAT_INCR_COUNTER64(sctps_outorderchunks); } else { SCTP_STAT_INCR_COUNTER64(sctps_outunorderchunks); } if (((chk->rec.data.rcv_flags & SCTP_DATA_LAST_FRAG) == SCTP_DATA_LAST_FRAG) && ((chk->rec.data.rcv_flags & SCTP_DATA_FIRST_FRAG) == 0)) /* * Count number of * user msg's that * were fragmented * we do this by * counting when we * see a LAST * fragment only. */ SCTP_STAT_INCR_COUNTER64(sctps_fragusrmsgs); } if ((mtu == 0) || (r_mtu == 0) || (one_chunk)) { if ((one_chunk) && (stcb->asoc.total_flight == 0)) { data_list[0]->window_probe = 1; net->window_probe = 1; } break; } } else { /* * Must be sent in order of the * TSN's (on a network) */ break; } } /* for (chunk gather loop for this net) */ } /* if asoc.state OPEN */ no_data_fill: /* Is there something to send for this destination? */ if (outchain) { /* We may need to start a control timer or two */ if (asconf) { sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, net); /* * do NOT clear the asconf flag as it is * used to do appropriate source address * selection. */ } if (cookie) { sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, net); cookie = 0; } /* must start a send timer if data is being sent */ if (bundle_at && (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer))) { /* * no timer running on this destination * restart it. */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); } if (bundle_at || hbflag) { /* For data/asconf and hb set time */ if (*now_filled == 0) { (void)SCTP_GETTIME_TIMEVAL(now); *now_filled = 1; } net->last_sent_time = *now; } /* Now send it, if there is anything to send :> */ if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, outchain, auth_offset, auth, auth_keyid, no_fragmentflg, bundle_at, asconf, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), net->port, NULL, 0, 0, so_locked))) { /* error, we could not output */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (from_where == 0) { SCTP_STAT_INCR(sctps_lowlevelerrusr); } if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } if (error == EHOSTUNREACH) { /* * Destination went unreachable * during this send */ sctp_move_chunks_from_net(stcb, net); } *reason_code = 6; /*- * I add this line to be paranoid. As far as * I can tell the continue, takes us back to * the top of the for, but just to make sure * I will reset these again here. */ ctl_cnt = bundle_at = 0; continue; /* This takes us back to the * for() for the nets. */ } else { asoc->ifp_had_enobuf = 0; } endoutchain = NULL; auth = NULL; auth_offset = 0; if (!no_out_cnt) { *num_out += (ctl_cnt + bundle_at); } if (bundle_at) { /* setup for a RTO measurement */ tsns_sent = data_list[0]->rec.data.tsn; /* fill time if not already filled */ if (*now_filled == 0) { (void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent); *now_filled = 1; *now = asoc->time_last_sent; } else { asoc->time_last_sent = *now; } if (net->rto_needed) { data_list[0]->do_rtt = 1; net->rto_needed = 0; } SCTP_STAT_INCR_BY(sctps_senddata, bundle_at); sctp_clean_up_datalist(stcb, asoc, data_list, bundle_at, net); } if (one_chunk) { break; } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_SEND); } } if (old_start_at == NULL) { old_start_at = start_at; start_at = TAILQ_FIRST(&asoc->nets); if (old_start_at) goto again_one_more_time; } /* * At the end there should be no NON timed chunks hanging on this * queue. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, *num_out, SCTP_CWND_LOG_FROM_SEND); } if ((*num_out == 0) && (*reason_code == 0)) { *reason_code = 4; } else { *reason_code = 5; } sctp_clean_up_ctl(stcb, asoc, so_locked); return (0); } void sctp_queue_op_err(struct sctp_tcb *stcb, struct mbuf *op_err) { /*- * Prepend a OPERATIONAL_ERROR chunk header and put on the end of * the control chunk queue. */ struct sctp_chunkhdr *hdr; struct sctp_tmit_chunk *chk; struct mbuf *mat, *last_mbuf; uint32_t chunk_length; uint16_t padding_length; SCTP_TCB_LOCK_ASSERT(stcb); SCTP_BUF_PREPEND(op_err, sizeof(struct sctp_chunkhdr), M_NOWAIT); if (op_err == NULL) { return; } last_mbuf = NULL; chunk_length = 0; for (mat = op_err; mat != NULL; mat = SCTP_BUF_NEXT(mat)) { chunk_length += SCTP_BUF_LEN(mat); if (SCTP_BUF_NEXT(mat) == NULL) { last_mbuf = mat; } } if (chunk_length > SCTP_MAX_CHUNK_LENGTH) { sctp_m_freem(op_err); return; } padding_length = chunk_length % 4; if (padding_length != 0) { padding_length = 4 - padding_length; } if (padding_length != 0) { if (sctp_add_pad_tombuf(last_mbuf, padding_length) == NULL) { sctp_m_freem(op_err); return; } } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(op_err); return; } chk->copy_by_ref = 0; chk->send_size = (uint16_t)chunk_length; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->data = op_err; chk->whoTo = NULL; chk->rec.chunk_id.id = SCTP_OPERATION_ERROR; chk->rec.chunk_id.can_take_data = 0; hdr = mtod(op_err, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_OPERATION_ERROR; hdr->chunk_flags = 0; hdr->chunk_length = htons(chk->send_size); TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; } int sctp_send_cookie_echo(struct mbuf *m, int offset, struct sctp_tcb *stcb, struct sctp_nets *net) { /*- * pull out the cookie and put it at the front of the control chunk * queue. */ int at; struct mbuf *cookie; struct sctp_paramhdr param, *phdr; struct sctp_chunkhdr *hdr; struct sctp_tmit_chunk *chk; uint16_t ptype, plen; SCTP_TCB_LOCK_ASSERT(stcb); /* First find the cookie in the param area */ cookie = NULL; at = offset + sizeof(struct sctp_init_chunk); for (;;) { phdr = sctp_get_next_param(m, at, ¶m, sizeof(param)); if (phdr == NULL) { return (-3); } ptype = ntohs(phdr->param_type); plen = ntohs(phdr->param_length); if (ptype == SCTP_STATE_COOKIE) { int pad; /* found the cookie */ if ((pad = (plen % 4))) { plen += 4 - pad; } cookie = SCTP_M_COPYM(m, at, plen, M_NOWAIT); if (cookie == NULL) { /* No memory */ return (-2); } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(cookie, SCTP_MBUF_ICOPY); } #endif break; } at += SCTP_SIZE32(plen); } /* ok, we got the cookie lets change it into a cookie echo chunk */ /* first the change from param to cookie */ hdr = mtod(cookie, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_COOKIE_ECHO; hdr->chunk_flags = 0; /* get the chunk stuff now and place it in the FRONT of the queue */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(cookie); return (-5); } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_COOKIE_ECHO; chk->rec.chunk_id.can_take_data = 0; chk->flags = CHUNK_FLAGS_FRAGMENT_OK; chk->send_size = plen; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->data = cookie; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); TAILQ_INSERT_HEAD(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; return (0); } void sctp_send_heartbeat_ack(struct sctp_tcb *stcb, struct mbuf *m, int offset, int chk_length, struct sctp_nets *net) { /* * take a HB request and make it into a HB ack and send it. */ struct mbuf *outchain; struct sctp_chunkhdr *chdr; struct sctp_tmit_chunk *chk; if (net == NULL) /* must have a net pointer */ return; outchain = SCTP_M_COPYM(m, offset, chk_length, M_NOWAIT); if (outchain == NULL) { /* gak out of memory */ return; } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(outchain, SCTP_MBUF_ICOPY); } #endif chdr = mtod(outchain, struct sctp_chunkhdr *); chdr->chunk_type = SCTP_HEARTBEAT_ACK; chdr->chunk_flags = 0; if (chk_length % 4) { /* need pad */ uint32_t cpthis = 0; int padlen; padlen = 4 - (chk_length % 4); m_copyback(outchain, chk_length, padlen, (caddr_t)&cpthis); } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(outchain); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_HEARTBEAT_ACK; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->send_size = chk_length; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->data = outchain; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; } void sctp_send_cookie_ack(struct sctp_tcb *stcb) { /* formulate and queue a cookie-ack back to sender */ struct mbuf *cookie_ack; struct sctp_chunkhdr *hdr; struct sctp_tmit_chunk *chk; SCTP_TCB_LOCK_ASSERT(stcb); cookie_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER); if (cookie_ack == NULL) { /* no mbuf's */ return; } SCTP_BUF_RESV_UF(cookie_ack, SCTP_MIN_OVERHEAD); sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(cookie_ack); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_COOKIE_ACK; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->send_size = sizeof(struct sctp_chunkhdr); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->data = cookie_ack; if (chk->asoc->last_control_chunk_from != NULL) { chk->whoTo = chk->asoc->last_control_chunk_from; atomic_add_int(&chk->whoTo->ref_count, 1); } else { chk->whoTo = NULL; } hdr = mtod(cookie_ack, struct sctp_chunkhdr *); hdr->chunk_type = SCTP_COOKIE_ACK; hdr->chunk_flags = 0; hdr->chunk_length = htons(chk->send_size); SCTP_BUF_LEN(cookie_ack) = chk->send_size; TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; return; } void sctp_send_shutdown_ack(struct sctp_tcb *stcb, struct sctp_nets *net) { /* formulate and queue a SHUTDOWN-ACK back to the sender */ struct mbuf *m_shutdown_ack; struct sctp_shutdown_ack_chunk *ack_cp; struct sctp_tmit_chunk *chk; m_shutdown_ack = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_ack_chunk), 0, M_NOWAIT, 1, MT_HEADER); if (m_shutdown_ack == NULL) { /* no mbuf's */ return; } SCTP_BUF_RESV_UF(m_shutdown_ack, SCTP_MIN_OVERHEAD); sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(m_shutdown_ack); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_SHUTDOWN_ACK; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->send_size = sizeof(struct sctp_chunkhdr); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->data = m_shutdown_ack; chk->whoTo = net; if (chk->whoTo) { atomic_add_int(&chk->whoTo->ref_count, 1); } ack_cp = mtod(m_shutdown_ack, struct sctp_shutdown_ack_chunk *); ack_cp->ch.chunk_type = SCTP_SHUTDOWN_ACK; ack_cp->ch.chunk_flags = 0; ack_cp->ch.chunk_length = htons(chk->send_size); SCTP_BUF_LEN(m_shutdown_ack) = chk->send_size; TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; return; } void sctp_send_shutdown(struct sctp_tcb *stcb, struct sctp_nets *net) { /* formulate and queue a SHUTDOWN to the sender */ struct mbuf *m_shutdown; struct sctp_shutdown_chunk *shutdown_cp; struct sctp_tmit_chunk *chk; TAILQ_FOREACH(chk, &stcb->asoc.control_send_queue, sctp_next) { if (chk->rec.chunk_id.id == SCTP_SHUTDOWN) { /* We already have a SHUTDOWN queued. Reuse it. */ if (chk->whoTo) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = NULL; } break; } } if (chk == NULL) { m_shutdown = sctp_get_mbuf_for_msg(sizeof(struct sctp_shutdown_chunk), 0, M_NOWAIT, 1, MT_HEADER); if (m_shutdown == NULL) { /* no mbuf's */ return; } SCTP_BUF_RESV_UF(m_shutdown, SCTP_MIN_OVERHEAD); sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(m_shutdown); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_SHUTDOWN; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->send_size = sizeof(struct sctp_shutdown_chunk); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->data = m_shutdown; chk->whoTo = net; if (chk->whoTo) { atomic_add_int(&chk->whoTo->ref_count, 1); } shutdown_cp = mtod(m_shutdown, struct sctp_shutdown_chunk *); shutdown_cp->ch.chunk_type = SCTP_SHUTDOWN; shutdown_cp->ch.chunk_flags = 0; shutdown_cp->ch.chunk_length = htons(chk->send_size); shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn); SCTP_BUF_LEN(m_shutdown) = chk->send_size; TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; } else { TAILQ_REMOVE(&stcb->asoc.control_send_queue, chk, sctp_next); chk->whoTo = net; if (chk->whoTo) { atomic_add_int(&chk->whoTo->ref_count, 1); } shutdown_cp = mtod(chk->data, struct sctp_shutdown_chunk *); shutdown_cp->cumulative_tsn_ack = htonl(stcb->asoc.cumulative_tsn); TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); } return; } void sctp_send_asconf(struct sctp_tcb *stcb, struct sctp_nets *net, int addr_locked) { /* * formulate and queue an ASCONF to the peer. ASCONF parameters * should be queued on the assoc queue. */ struct sctp_tmit_chunk *chk; struct mbuf *m_asconf; int len; SCTP_TCB_LOCK_ASSERT(stcb); if ((!TAILQ_EMPTY(&stcb->asoc.asconf_send_queue)) && (!sctp_is_feature_on(stcb->sctp_ep, SCTP_PCB_FLAGS_MULTIPLE_ASCONFS))) { /* can't send a new one if there is one in flight already */ return; } /* compose an ASCONF chunk, maximum length is PMTU */ m_asconf = sctp_compose_asconf(stcb, &len, addr_locked); if (m_asconf == NULL) { return; } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ sctp_m_freem(m_asconf); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_ASCONF; chk->rec.chunk_id.can_take_data = 0; chk->flags = CHUNK_FLAGS_FRAGMENT_OK; chk->data = m_asconf; chk->send_size = len; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; chk->whoTo = net; if (chk->whoTo) { atomic_add_int(&chk->whoTo->ref_count, 1); } TAILQ_INSERT_TAIL(&chk->asoc->asconf_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; return; } void sctp_send_asconf_ack(struct sctp_tcb *stcb) { /* * formulate and queue a asconf-ack back to sender. the asconf-ack * must be stored in the tcb. */ struct sctp_tmit_chunk *chk; struct sctp_asconf_ack *ack, *latest_ack; struct mbuf *m_ack; struct sctp_nets *net = NULL; SCTP_TCB_LOCK_ASSERT(stcb); /* Get the latest ASCONF-ACK */ latest_ack = TAILQ_LAST(&stcb->asoc.asconf_ack_sent, sctp_asconf_ackhead); if (latest_ack == NULL) { return; } if (latest_ack->last_sent_to != NULL && latest_ack->last_sent_to == stcb->asoc.last_control_chunk_from) { /* we're doing a retransmission */ net = sctp_find_alternate_net(stcb, stcb->asoc.last_control_chunk_from, 0); if (net == NULL) { /* no alternate */ if (stcb->asoc.last_control_chunk_from == NULL) { if (stcb->asoc.alternate) { net = stcb->asoc.alternate; } else { net = stcb->asoc.primary_destination; } } else { net = stcb->asoc.last_control_chunk_from; } } } else { /* normal case */ if (stcb->asoc.last_control_chunk_from == NULL) { if (stcb->asoc.alternate) { net = stcb->asoc.alternate; } else { net = stcb->asoc.primary_destination; } } else { net = stcb->asoc.last_control_chunk_from; } } latest_ack->last_sent_to = net; TAILQ_FOREACH(ack, &stcb->asoc.asconf_ack_sent, next) { if (ack->data == NULL) { continue; } /* copy the asconf_ack */ m_ack = SCTP_M_COPYM(ack->data, 0, M_COPYALL, M_NOWAIT); if (m_ack == NULL) { /* couldn't copy it */ return; } #ifdef SCTP_MBUF_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_MBUF_LOGGING_ENABLE) { sctp_log_mbc(m_ack, SCTP_MBUF_ICOPY); } #endif sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { /* no memory */ if (m_ack) sctp_m_freem(m_ack); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_ASCONF_ACK; chk->rec.chunk_id.can_take_data = 1; chk->flags = CHUNK_FLAGS_FRAGMENT_OK; chk->whoTo = net; if (chk->whoTo) { atomic_add_int(&chk->whoTo->ref_count, 1); } chk->data = m_ack; chk->send_size = ack->len; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->asoc = &stcb->asoc; TAILQ_INSERT_TAIL(&chk->asoc->control_send_queue, chk, sctp_next); chk->asoc->ctrl_queue_cnt++; } return; } static int sctp_chunk_retransmission(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_association *asoc, int *cnt_out, struct timeval *now, int *now_filled, int *fr_done, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { /*- * send out one MTU of retransmission. If fast_retransmit is * happening we ignore the cwnd. Otherwise we obey the cwnd and * rwnd. For a Cookie or Asconf in the control chunk queue we * retransmit them by themselves. * * For data chunks we will pick out the lowest TSN's in the sent_queue * marked for resend and bundle them all together (up to a MTU of * destination). The address to send to should have been * selected/changed where the retransmission was marked (i.e. in FR * or t3-timeout routines). */ struct sctp_tmit_chunk *data_list[SCTP_MAX_DATA_BUNDLING]; struct sctp_tmit_chunk *chk, *fwd; struct mbuf *m, *endofchain; struct sctp_nets *net = NULL; uint32_t tsns_sent = 0; int no_fragmentflg, bundle_at, cnt_thru; unsigned int mtu; int error, i, one_chunk, fwd_tsn, ctl_cnt, tmr_started; struct sctp_auth_chunk *auth = NULL; uint32_t auth_offset = 0; uint16_t auth_keyid; int override_ok = 1; int data_auth_reqd = 0; uint32_t dmtu = 0; SCTP_TCB_LOCK_ASSERT(stcb); tmr_started = ctl_cnt = bundle_at = error = 0; no_fragmentflg = 1; fwd_tsn = 0; *cnt_out = 0; fwd = NULL; endofchain = m = NULL; auth_keyid = stcb->asoc.authinfo.active_keyid; #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xC3, 1); #endif if ((TAILQ_EMPTY(&asoc->sent_queue)) && (TAILQ_EMPTY(&asoc->control_send_queue))) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "SCTP hits empty queue with cnt set to %d?\n", asoc->sent_queue_retran_cnt); asoc->sent_queue_cnt = 0; asoc->sent_queue_cnt_removeable = 0; /* send back 0/0 so we enter normal transmission */ *cnt_out = 0; return (0); } TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if ((chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) || (chk->rec.chunk_id.id == SCTP_STREAM_RESET) || (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN)) { if (chk->sent != SCTP_DATAGRAM_RESEND) { continue; } if (chk->rec.chunk_id.id == SCTP_STREAM_RESET) { if (chk != asoc->str_reset) { /* * not eligible for retran if its * not ours */ continue; } } ctl_cnt++; if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) { fwd_tsn = 1; } /* * Add an AUTH chunk, if chunk requires it save the * offset into the chain for AUTH */ if ((auth == NULL) && (sctp_auth_is_required_chunk(chk->rec.chunk_id.id, stcb->asoc.peer_auth_chunks))) { m = sctp_add_auth_chunk(m, &endofchain, &auth, &auth_offset, stcb, chk->rec.chunk_id.id); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref); break; } } one_chunk = 0; cnt_thru = 0; /* do we have control chunks to retransmit? */ if (m != NULL) { /* Start a timer no matter if we succeed or fail */ if (chk->rec.chunk_id.id == SCTP_COOKIE_ECHO) { sctp_timer_start(SCTP_TIMER_TYPE_COOKIE, inp, stcb, chk->whoTo); } else if (chk->rec.chunk_id.id == SCTP_ASCONF) sctp_timer_start(SCTP_TIMER_TYPE_ASCONF, inp, stcb, chk->whoTo); chk->snd_count++; /* update our count */ if ((error = sctp_lowlevel_chunk_output(inp, stcb, chk->whoTo, (struct sockaddr *)&chk->whoTo->ro._l_addr, m, auth_offset, auth, stcb->asoc.authinfo.active_keyid, no_fragmentflg, 0, 0, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), chk->whoTo->port, NULL, 0, 0, so_locked))) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } return (error); } else { asoc->ifp_had_enobuf = 0; } endofchain = NULL; auth = NULL; auth_offset = 0; /* * We don't want to mark the net->sent time here since this * we use this for HB and retrans cannot measure RTT */ /* (void)SCTP_GETTIME_TIMEVAL(&chk->whoTo->last_sent_time); */ *cnt_out += 1; chk->sent = SCTP_DATAGRAM_SENT; sctp_ucount_decr(stcb->asoc.sent_queue_retran_cnt); if (fwd_tsn == 0) { return (0); } else { /* Clean up the fwd-tsn list */ sctp_clean_up_ctl(stcb, asoc, so_locked); return (0); } } /* * Ok, it is just data retransmission we need to do or that and a * fwd-tsn with it all. */ if (TAILQ_EMPTY(&asoc->sent_queue)) { return (SCTP_RETRAN_DONE); } if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED) || (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT)) { /* not yet open, resend the cookie and that is it */ return (1); } #ifdef SCTP_AUDITING_ENABLED sctp_auditing(20, inp, stcb, NULL); #endif data_auth_reqd = sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks); TAILQ_FOREACH(chk, &asoc->sent_queue, sctp_next) { if (chk->sent != SCTP_DATAGRAM_RESEND) { /* No, not sent to this net or not ready for rtx */ continue; } if (chk->data == NULL) { SCTP_PRINTF("TSN:%x chk->snd_count:%d chk->sent:%d can't retran - no data\n", chk->rec.data.tsn, chk->snd_count, chk->sent); continue; } if ((SCTP_BASE_SYSCTL(sctp_max_retran_chunk)) && (chk->snd_count >= SCTP_BASE_SYSCTL(sctp_max_retran_chunk))) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; snprintf(msg, sizeof(msg), "TSN %8.8x retransmitted %d times, giving up", chk->rec.data.tsn, chk->snd_count); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); atomic_add_int(&stcb->asoc.refcnt, 1); sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, so_locked); SCTP_TCB_LOCK(stcb); atomic_subtract_int(&stcb->asoc.refcnt, 1); return (SCTP_RETRAN_EXIT); } /* pick up the net */ net = chk->whoTo; switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: mtu = net->mtu - SCTP_MIN_V4_OVERHEAD; break; #endif #ifdef INET6 case AF_INET6: mtu = net->mtu - SCTP_MIN_OVERHEAD; break; #endif default: /* TSNH */ mtu = net->mtu; break; } if ((asoc->peers_rwnd < mtu) && (asoc->total_flight > 0)) { /* No room in peers rwnd */ uint32_t tsn; tsn = asoc->last_acked_seq + 1; if (tsn == chk->rec.data.tsn) { /* * we make a special exception for this * case. The peer has no rwnd but is missing * the lowest chunk.. which is probably what * is holding up the rwnd. */ goto one_chunk_around; } return (1); } one_chunk_around: if (asoc->peers_rwnd < mtu) { one_chunk = 1; if ((asoc->peers_rwnd == 0) && (asoc->total_flight == 0)) { chk->window_probe = 1; chk->whoTo->window_probe = 1; } } #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xC3, 2); #endif bundle_at = 0; m = NULL; net->fast_retran_ip = 0; if (chk->rec.data.doing_fast_retransmit == 0) { /* * if no FR in progress skip destination that have * flight_size > cwnd. */ if (net->flight_size >= net->cwnd) { continue; } } else { /* * Mark the destination net to have FR recovery * limits put on it. */ *fr_done = 1; net->fast_retran_ip = 1; } /* * if no AUTH is yet included and this chunk requires it, * make sure to account for it. We don't apply the size * until the AUTH chunk is actually added below in case * there is no room for this chunk. */ if (data_auth_reqd && (auth == NULL)) { dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } else dmtu = 0; if ((chk->send_size <= (mtu - dmtu)) || (chk->flags & CHUNK_FLAGS_FRAGMENT_OK)) { /* ok we will add this one */ if (data_auth_reqd) { if (auth == NULL) { m = sctp_add_auth_chunk(m, &endofchain, &auth, &auth_offset, stcb, SCTP_DATA); auth_keyid = chk->auth_keyid; override_ok = 0; SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } else if (override_ok) { auth_keyid = chk->auth_keyid; override_ok = 0; } else if (chk->auth_keyid != auth_keyid) { /* different keyid, so done bundling */ break; } } m = sctp_copy_mbufchain(chk->data, m, &endofchain, 0, chk->send_size, chk->copy_by_ref); if (m == NULL) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } /* Do clear IP_DF ? */ if (chk->flags & CHUNK_FLAGS_FRAGMENT_OK) { no_fragmentflg = 0; } /* upate our MTU size */ if (mtu > (chk->send_size + dmtu)) mtu -= (chk->send_size + dmtu); else mtu = 0; data_list[bundle_at++] = chk; if (one_chunk && (asoc->total_flight <= 0)) { SCTP_STAT_INCR(sctps_windowprobed); } } if (one_chunk == 0) { /* * now are there anymore forward from chk to pick * up? */ for (fwd = TAILQ_NEXT(chk, sctp_next); fwd != NULL; fwd = TAILQ_NEXT(fwd, sctp_next)) { if (fwd->sent != SCTP_DATAGRAM_RESEND) { /* Nope, not for retran */ continue; } if (fwd->whoTo != net) { /* Nope, not the net in question */ continue; } if (data_auth_reqd && (auth == NULL)) { dmtu = sctp_get_auth_chunk_len(stcb->asoc.peer_hmac_id); } else dmtu = 0; if (fwd->send_size <= (mtu - dmtu)) { if (data_auth_reqd) { if (auth == NULL) { m = sctp_add_auth_chunk(m, &endofchain, &auth, &auth_offset, stcb, SCTP_DATA); auth_keyid = fwd->auth_keyid; override_ok = 0; SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } else if (override_ok) { auth_keyid = fwd->auth_keyid; override_ok = 0; } else if (fwd->auth_keyid != auth_keyid) { /* * different keyid, * so done bundling */ break; } } m = sctp_copy_mbufchain(fwd->data, m, &endofchain, 0, fwd->send_size, fwd->copy_by_ref); if (m == NULL) { SCTP_LTRACE_ERR_RET(inp, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } /* Do clear IP_DF ? */ if (fwd->flags & CHUNK_FLAGS_FRAGMENT_OK) { no_fragmentflg = 0; } /* upate our MTU size */ if (mtu > (fwd->send_size + dmtu)) mtu -= (fwd->send_size + dmtu); else mtu = 0; data_list[bundle_at++] = fwd; if (bundle_at >= SCTP_MAX_DATA_BUNDLING) { break; } } else { /* can't fit so we are done */ break; } } } /* Is there something to send for this destination? */ if (m) { /* * No matter if we fail/or succeed we should start a * timer. A failure is like a lost IP packet :-) */ if (!SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { /* * no timer running on this destination * restart it. */ sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); tmr_started = 1; } /* Now lets send it, if there is anything to send :> */ if ((error = sctp_lowlevel_chunk_output(inp, stcb, net, (struct sockaddr *)&net->ro._l_addr, m, auth_offset, auth, auth_keyid, no_fragmentflg, 0, 0, inp->sctp_lport, stcb->rport, htonl(stcb->asoc.peer_vtag), net->port, NULL, 0, 0, so_locked))) { /* error, we could not output */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (error == ENOBUFS) { asoc->ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } return (error); } else { asoc->ifp_had_enobuf = 0; } endofchain = NULL; auth = NULL; auth_offset = 0; /* For HB's */ /* * We don't want to mark the net->sent time here * since this we use this for HB and retrans cannot * measure RTT */ /* (void)SCTP_GETTIME_TIMEVAL(&net->last_sent_time); */ /* For auto-close */ cnt_thru++; if (*now_filled == 0) { (void)SCTP_GETTIME_TIMEVAL(&asoc->time_last_sent); *now = asoc->time_last_sent; *now_filled = 1; } else { asoc->time_last_sent = *now; } *cnt_out += bundle_at; #ifdef SCTP_AUDITING_ENABLED sctp_audit_log(0xC4, bundle_at); #endif if (bundle_at) { tsns_sent = data_list[0]->rec.data.tsn; } for (i = 0; i < bundle_at; i++) { SCTP_STAT_INCR(sctps_sendretransdata); data_list[i]->sent = SCTP_DATAGRAM_SENT; /* * When we have a revoked data, and we * retransmit it, then we clear the revoked * flag since this flag dictates if we * subtracted from the fs */ if (data_list[i]->rec.data.chunk_was_revoked) { /* Deflate the cwnd */ data_list[i]->whoTo->cwnd -= data_list[i]->book_size; data_list[i]->rec.data.chunk_was_revoked = 0; } data_list[i]->snd_count++; sctp_ucount_decr(asoc->sent_queue_retran_cnt); /* record the time */ data_list[i]->sent_rcv_time = asoc->time_last_sent; if (data_list[i]->book_size_scale) { /* * need to double the book size on * this one */ data_list[i]->book_size_scale = 0; /* * Since we double the booksize, we * must also double the output queue * size, since this get shrunk when * we free by this amount. */ atomic_add_int(&((asoc)->total_output_queue_size), data_list[i]->book_size); data_list[i]->book_size *= 2; } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_RWND_ENABLE) { sctp_log_rwnd(SCTP_DECREASE_PEER_RWND, asoc->peers_rwnd, data_list[i]->send_size, SCTP_BASE_SYSCTL(sctp_peer_chunk_oh)); } asoc->peers_rwnd = sctp_sbspace_sub(asoc->peers_rwnd, (uint32_t)(data_list[i]->send_size + SCTP_BASE_SYSCTL(sctp_peer_chunk_oh))); } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FLIGHT_LOGGING_ENABLE) { sctp_misc_ints(SCTP_FLIGHT_LOG_UP_RSND, data_list[i]->whoTo->flight_size, data_list[i]->book_size, (uint32_t)(uintptr_t)data_list[i]->whoTo, data_list[i]->rec.data.tsn); } sctp_flight_size_increase(data_list[i]); sctp_total_flight_increase(stcb, data_list[i]); if (asoc->peers_rwnd < stcb->sctp_ep->sctp_ep.sctp_sws_sender) { /* SWS sender side engages */ asoc->peers_rwnd = 0; } if ((i == 0) && (data_list[i]->rec.data.doing_fast_retransmit)) { SCTP_STAT_INCR(sctps_sendfastretrans); if ((data_list[i] == TAILQ_FIRST(&asoc->sent_queue)) && (tmr_started == 0)) { /*- * ok we just fast-retrans'd * the lowest TSN, i.e the * first on the list. In * this case we want to give * some more time to get a * SACK back without a * t3-expiring. */ sctp_timer_stop(SCTP_TIMER_TYPE_SEND, inp, stcb, net, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_2); sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, net); } } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, net, tsns_sent, SCTP_CWND_LOG_FROM_RESEND); } #ifdef SCTP_AUDITING_ENABLED sctp_auditing(21, inp, stcb, NULL); #endif } else { /* None will fit */ return (1); } if (asoc->sent_queue_retran_cnt <= 0) { /* all done we have no more to retran */ asoc->sent_queue_retran_cnt = 0; break; } if (one_chunk) { /* No more room in rwnd */ return (1); } /* stop the for loop here. we sent out a packet */ break; } return (0); } static void sctp_timer_validation(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_nets *net; /* Validate that a timer is running somewhere */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (SCTP_OS_TIMER_PENDING(&net->rxt_timer.timer)) { /* Here is a timer */ return; } } SCTP_TCB_LOCK_ASSERT(stcb); /* Gak, we did not have a timer somewhere */ SCTPDBG(SCTP_DEBUG_OUTPUT3, "Deadlock avoided starting timer on a dest at retran\n"); if (asoc->alternate) { sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->alternate); } else { sctp_timer_start(SCTP_TIMER_TYPE_SEND, inp, stcb, asoc->primary_destination); } return; } void sctp_chunk_output(struct sctp_inpcb *inp, struct sctp_tcb *stcb, int from_where, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { /*- * Ok this is the generic chunk service queue. we must do the * following: * - See if there are retransmits pending, if so we must * do these first. * - Service the stream queue that is next, moving any * message (note I must get a complete message i.e. * FIRST/MIDDLE and LAST to the out queue in one pass) and assigning * TSN's * - Check to see if the cwnd/rwnd allows any output, if so we * go ahead and fomulate and send the low level chunks. Making sure * to combine any control in the control chunk queue also. */ struct sctp_association *asoc; struct sctp_nets *net; int error = 0, num_out, tot_out = 0, ret = 0, reason_code; unsigned int burst_cnt = 0; struct timeval now; int now_filled = 0; int nagle_on; int frag_point = sctp_get_frag_point(stcb, &stcb->asoc); int un_sent = 0; int fr_done; unsigned int tot_frs = 0; asoc = &stcb->asoc; do_it_again: /* The Nagle algorithm is only applied when handling a send call. */ if (from_where == SCTP_OUTPUT_FROM_USR_SEND) { if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NODELAY)) { nagle_on = 0; } else { nagle_on = 1; } } else { nagle_on = 0; } SCTP_TCB_LOCK_ASSERT(stcb); un_sent = (stcb->asoc.total_output_queue_size - stcb->asoc.total_flight); if ((un_sent <= 0) && (TAILQ_EMPTY(&asoc->control_send_queue)) && (TAILQ_EMPTY(&asoc->asconf_send_queue)) && (asoc->sent_queue_retran_cnt == 0) && (asoc->trigger_reset == 0)) { /* Nothing to do unless there is something to be sent left */ return; } /* * Do we have something to send, data or control AND a sack timer * running, if so piggy-back the sack. */ if (SCTP_OS_TIMER_PENDING(&stcb->asoc.dack_timer.timer)) { sctp_send_sack(stcb, so_locked); (void)SCTP_OS_TIMER_STOP(&stcb->asoc.dack_timer.timer); } while (asoc->sent_queue_retran_cnt) { /*- * Ok, it is retransmission time only, we send out only ONE * packet with a single call off to the retran code. */ if (from_where == SCTP_OUTPUT_FROM_COOKIE_ACK) { /*- * Special hook for handling cookiess discarded * by peer that carried data. Send cookie-ack only * and then the next call with get the retran's. */ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where, &now, &now_filled, frag_point, so_locked); return; } else if (from_where != SCTP_OUTPUT_FROM_HB_TMR) { /* if its not from a HB then do it */ fr_done = 0; ret = sctp_chunk_retransmission(inp, stcb, asoc, &num_out, &now, &now_filled, &fr_done, so_locked); if (fr_done) { tot_frs++; } } else { /* * its from any other place, we don't allow retran * output (only control) */ ret = 1; } if (ret > 0) { /* Can't send anymore */ /*- * now lets push out control by calling med-level * output once. this assures that we WILL send HB's * if queued too. */ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where, &now, &now_filled, frag_point, so_locked); #ifdef SCTP_AUDITING_ENABLED sctp_auditing(8, inp, stcb, NULL); #endif sctp_timer_validation(inp, stcb, asoc); return; } if (ret < 0) { /*- * The count was off.. retran is not happening so do * the normal retransmission. */ #ifdef SCTP_AUDITING_ENABLED sctp_auditing(9, inp, stcb, NULL); #endif if (ret == SCTP_RETRAN_EXIT) { return; } break; } if (from_where == SCTP_OUTPUT_FROM_T3) { /* Only one transmission allowed out of a timeout */ #ifdef SCTP_AUDITING_ENABLED sctp_auditing(10, inp, stcb, NULL); #endif /* Push out any control */ (void)sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 1, from_where, &now, &now_filled, frag_point, so_locked); return; } if ((asoc->fr_max_burst > 0) && (tot_frs >= asoc->fr_max_burst)) { /* Hit FR burst limit */ return; } if ((num_out == 0) && (ret == 0)) { /* No more retrans to send */ break; } } #ifdef SCTP_AUDITING_ENABLED sctp_auditing(12, inp, stcb, NULL); #endif /* Check for bad destinations, if they exist move chunks around. */ TAILQ_FOREACH(net, &asoc->nets, sctp_next) { if (!(net->dest_state & SCTP_ADDR_REACHABLE)) { /*- * if possible move things off of this address we * still may send below due to the dormant state but * we try to find an alternate address to send to * and if we have one we move all queued data on the * out wheel to this alternate address. */ if (net->ref_count > 1) sctp_move_chunks_from_net(stcb, net); } else { /*- * if ((asoc->sat_network) || (net->addr_is_local)) * { burst_limit = asoc->max_burst * * SCTP_SAT_NETWORK_BURST_INCR; } */ if (asoc->max_burst > 0) { if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst)) { if ((net->flight_size + (asoc->max_burst * net->mtu)) < net->cwnd) { /* * JRS - Use the congestion * control given in the * congestion control module */ asoc->cc_functions.sctp_cwnd_update_after_output(stcb, net, asoc->max_burst); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { sctp_log_maxburst(stcb, net, 0, asoc->max_burst, SCTP_MAX_BURST_APPLIED); } SCTP_STAT_INCR(sctps_maxburstqueued); } net->fast_retran_ip = 0; } else { if (net->flight_size == 0) { /* * Should be decaying the * cwnd here */ ; } } } } } burst_cnt = 0; do { error = sctp_med_chunk_output(inp, stcb, asoc, &num_out, &reason_code, 0, from_where, &now, &now_filled, frag_point, so_locked); if (error) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "Error %d was returned from med-c-op\n", error); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { sctp_log_maxburst(stcb, asoc->primary_destination, error, burst_cnt, SCTP_MAX_BURST_ERROR_STOP); } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, NULL, error, SCTP_SEND_NOW_COMPLETES); sctp_log_cwnd(stcb, NULL, 0xdeadbeef, SCTP_SEND_NOW_COMPLETES); } break; } SCTPDBG(SCTP_DEBUG_OUTPUT3, "m-c-o put out %d\n", num_out); tot_out += num_out; burst_cnt++; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, NULL, num_out, SCTP_SEND_NOW_COMPLETES); if (num_out == 0) { sctp_log_cwnd(stcb, NULL, reason_code, SCTP_SEND_NOW_COMPLETES); } } if (nagle_on) { /* * When the Nagle algorithm is used, look at how * much is unsent, then if its smaller than an MTU * and we have data in flight we stop, except if we * are handling a fragmented user message. */ un_sent = stcb->asoc.total_output_queue_size - stcb->asoc.total_flight; if ((un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD)) && (stcb->asoc.total_flight > 0)) { /* && sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR))) {*/ break; } } if (TAILQ_EMPTY(&asoc->control_send_queue) && TAILQ_EMPTY(&asoc->send_queue) && sctp_is_there_unsent_data(stcb, so_locked) == 0) { /* Nothing left to send */ break; } if ((stcb->asoc.total_output_queue_size - stcb->asoc.total_flight) <= 0) { /* Nothing left to send */ break; } } while (num_out && ((asoc->max_burst == 0) || SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) || (burst_cnt < asoc->max_burst))); if (SCTP_BASE_SYSCTL(sctp_use_cwnd_based_maxburst) == 0) { if ((asoc->max_burst > 0) && (burst_cnt >= asoc->max_burst)) { SCTP_STAT_INCR(sctps_maxburstqueued); asoc->burst_limit_applied = 1; if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_MAXBURST_ENABLE) { sctp_log_maxburst(stcb, asoc->primary_destination, 0, burst_cnt, SCTP_MAX_BURST_APPLIED); } } else { asoc->burst_limit_applied = 0; } } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_CWND_LOGGING_ENABLE) { sctp_log_cwnd(stcb, NULL, tot_out, SCTP_SEND_NOW_COMPLETES); } SCTPDBG(SCTP_DEBUG_OUTPUT1, "Ok, we have put out %d chunks\n", tot_out); /*- * Now we need to clean up the control chunk chain if a ECNE is on * it. It must be marked as UNSENT again so next call will continue * to send it until such time that we get a CWR, to remove it. */ if (stcb->asoc.ecn_echo_cnt_onq) sctp_fix_ecn_echo(asoc); if (stcb->asoc.trigger_reset) { if (sctp_send_stream_reset_out_if_possible(stcb, so_locked) == 0) { goto do_it_again; } } return; } int sctp_output( struct sctp_inpcb *inp, struct mbuf *m, struct sockaddr *addr, struct mbuf *control, struct thread *p, int flags) { if (inp == NULL) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } if (inp->sctp_socket == NULL) { SCTP_LTRACE_ERR_RET_PKT(m, inp, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } return (sctp_sosend(inp->sctp_socket, addr, (struct uio *)NULL, m, control, flags, p )); } void send_forward_tsn(struct sctp_tcb *stcb, struct sctp_association *asoc) { struct sctp_tmit_chunk *chk, *at, *tp1, *last; struct sctp_forward_tsn_chunk *fwdtsn; struct sctp_strseq *strseq; struct sctp_strseq_mid *strseq_m; uint32_t advance_peer_ack_point; unsigned int cnt_of_space, i, ovh; unsigned int space_needed; unsigned int cnt_of_skipped = 0; SCTP_TCB_LOCK_ASSERT(stcb); TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if (chk->rec.chunk_id.id == SCTP_FORWARD_CUM_TSN) { /* mark it to unsent */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; /* Do we correct its output location? */ if (chk->whoTo) { sctp_free_remote_addr(chk->whoTo); chk->whoTo = NULL; } goto sctp_fill_in_rest; } } /* Ok if we reach here we must build one */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return; } asoc->fwd_tsn_cnt++; chk->copy_by_ref = 0; /* * We don't do the old thing here since this is used not for on-wire * but to tell if we are sending a fwd-tsn by the stack during * output. And if its a IFORWARD or a FORWARD it is a fwd-tsn. */ chk->rec.chunk_id.id = SCTP_FORWARD_CUM_TSN; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = asoc; chk->whoTo = NULL; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; sctp_fill_in_rest: /*- * Here we go through and fill out the part that deals with * stream/seq of the ones we skip. */ SCTP_BUF_LEN(chk->data) = 0; TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) { if ((at->sent != SCTP_FORWARD_TSN_SKIP) && (at->sent != SCTP_DATAGRAM_NR_ACKED)) { /* no more to look at */ break; } if (!asoc->idata_supported && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) { /* We don't report these */ continue; } cnt_of_skipped++; } if (asoc->idata_supported) { space_needed = (sizeof(struct sctp_forward_tsn_chunk) + (cnt_of_skipped * sizeof(struct sctp_strseq_mid))); } else { space_needed = (sizeof(struct sctp_forward_tsn_chunk) + (cnt_of_skipped * sizeof(struct sctp_strseq))); } cnt_of_space = (unsigned int)M_TRAILINGSPACE(chk->data); if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) { ovh = SCTP_MIN_OVERHEAD; } else { ovh = SCTP_MIN_V4_OVERHEAD; } if (cnt_of_space > (asoc->smallest_mtu - ovh)) { /* trim to a mtu size */ cnt_of_space = asoc->smallest_mtu - ovh; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, 0xff, 0, cnt_of_skipped, asoc->advanced_peer_ack_point); } advance_peer_ack_point = asoc->advanced_peer_ack_point; if (cnt_of_space < space_needed) { /*- * ok we must trim down the chunk by lowering the * advance peer ack point. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, 0xff, 0xff, cnt_of_space, space_needed); } cnt_of_skipped = cnt_of_space - sizeof(struct sctp_forward_tsn_chunk); if (asoc->idata_supported) { cnt_of_skipped /= sizeof(struct sctp_strseq_mid); } else { cnt_of_skipped /= sizeof(struct sctp_strseq); } /*- * Go through and find the TSN that will be the one * we report. */ at = TAILQ_FIRST(&asoc->sent_queue); if (at != NULL) { for (i = 0; i < cnt_of_skipped; i++) { tp1 = TAILQ_NEXT(at, sctp_next); if (tp1 == NULL) { break; } at = tp1; } } if (at && SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOG_TRY_ADVANCE) { sctp_misc_ints(SCTP_FWD_TSN_CHECK, 0xff, cnt_of_skipped, at->rec.data.tsn, asoc->advanced_peer_ack_point); } last = at; /*- * last now points to last one I can report, update * peer ack point */ if (last) { advance_peer_ack_point = last->rec.data.tsn; } if (asoc->idata_supported) { space_needed = sizeof(struct sctp_forward_tsn_chunk) + cnt_of_skipped * sizeof(struct sctp_strseq_mid); } else { space_needed = sizeof(struct sctp_forward_tsn_chunk) + cnt_of_skipped * sizeof(struct sctp_strseq); } } chk->send_size = space_needed; /* Setup the chunk */ fwdtsn = mtod(chk->data, struct sctp_forward_tsn_chunk *); fwdtsn->ch.chunk_length = htons(chk->send_size); fwdtsn->ch.chunk_flags = 0; if (asoc->idata_supported) { fwdtsn->ch.chunk_type = SCTP_IFORWARD_CUM_TSN; } else { fwdtsn->ch.chunk_type = SCTP_FORWARD_CUM_TSN; } fwdtsn->new_cumulative_tsn = htonl(advance_peer_ack_point); SCTP_BUF_LEN(chk->data) = chk->send_size; fwdtsn++; /*- * Move pointer to after the fwdtsn and transfer to the * strseq pointer. */ if (asoc->idata_supported) { strseq_m = (struct sctp_strseq_mid *)fwdtsn; strseq = NULL; } else { strseq = (struct sctp_strseq *)fwdtsn; strseq_m = NULL; } /*- * Now populate the strseq list. This is done blindly * without pulling out duplicate stream info. This is * inefficent but won't harm the process since the peer will * look at these in sequence and will thus release anything. * It could mean we exceed the PMTU and chop off some that * we could have included.. but this is unlikely (aka 1432/4 * would mean 300+ stream seq's would have to be reported in * one FWD-TSN. With a bit of work we can later FIX this to * optimize and pull out duplicates.. but it does add more * overhead. So for now... not! */ i = 0; TAILQ_FOREACH(at, &asoc->sent_queue, sctp_next) { if (i >= cnt_of_skipped) { break; } if (!asoc->idata_supported && (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED)) { /* We don't report these */ continue; } if (at->rec.data.tsn == advance_peer_ack_point) { at->rec.data.fwd_tsn_cnt = 0; } if (asoc->idata_supported) { strseq_m->sid = htons(at->rec.data.sid); if (at->rec.data.rcv_flags & SCTP_DATA_UNORDERED) { strseq_m->flags = htons(PR_SCTP_UNORDERED_FLAG); } else { strseq_m->flags = 0; } strseq_m->mid = htonl(at->rec.data.mid); strseq_m++; } else { strseq->sid = htons(at->rec.data.sid); strseq->ssn = htons((uint16_t)at->rec.data.mid); strseq++; } i++; } return; } void sctp_send_sack(struct sctp_tcb *stcb, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { /*- * Queue up a SACK or NR-SACK in the control queue. * We must first check to see if a SACK or NR-SACK is * somehow on the control queue. * If so, we will take and and remove the old one. */ struct sctp_association *asoc; struct sctp_tmit_chunk *chk, *a_chk; struct sctp_sack_chunk *sack; struct sctp_nr_sack_chunk *nr_sack; struct sctp_gap_ack_block *gap_descriptor; const struct sack_track *selector; int mergeable = 0; int offset; caddr_t limit; uint32_t *dup; int limit_reached = 0; unsigned int i, siz, j; unsigned int num_gap_blocks = 0, num_nr_gap_blocks = 0, space; int num_dups = 0; int space_req; uint32_t highest_tsn; uint8_t flags; uint8_t type; uint8_t tsn_map; if (stcb->asoc.nrsack_supported == 1) { type = SCTP_NR_SELECTIVE_ACK; } else { type = SCTP_SELECTIVE_ACK; } a_chk = NULL; asoc = &stcb->asoc; SCTP_TCB_LOCK_ASSERT(stcb); if (asoc->last_data_chunk_from == NULL) { /* Hmm we never received anything */ return; } sctp_slide_mapping_arrays(stcb); sctp_set_rwnd(stcb, asoc); TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if (chk->rec.chunk_id.id == type) { /* Hmm, found a sack already on queue, remove it */ TAILQ_REMOVE(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt--; a_chk = chk; if (a_chk->data) { sctp_m_freem(a_chk->data); a_chk->data = NULL; } if (a_chk->whoTo) { sctp_free_remote_addr(a_chk->whoTo); a_chk->whoTo = NULL; } break; } } if (a_chk == NULL) { sctp_alloc_a_chunk(stcb, a_chk); if (a_chk == NULL) { /* No memory so we drop the idea, and set a timer */ if (stcb->asoc.delayed_ack) { sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_3); sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL); } else { stcb->asoc.send_sack = 1; } return; } a_chk->copy_by_ref = 0; a_chk->rec.chunk_id.id = type; a_chk->rec.chunk_id.can_take_data = 1; } /* Clear our pkt counts */ asoc->data_pkts_seen = 0; a_chk->flags = 0; a_chk->asoc = asoc; a_chk->snd_count = 0; a_chk->send_size = 0; /* fill in later */ a_chk->sent = SCTP_DATAGRAM_UNSENT; a_chk->whoTo = NULL; if (!(asoc->last_data_chunk_from->dest_state & SCTP_ADDR_REACHABLE)) { /*- * Ok, the destination for the SACK is unreachable, lets see if * we can select an alternate to asoc->last_data_chunk_from */ a_chk->whoTo = sctp_find_alternate_net(stcb, asoc->last_data_chunk_from, 0); if (a_chk->whoTo == NULL) { /* Nope, no alternate */ a_chk->whoTo = asoc->last_data_chunk_from; } } else { a_chk->whoTo = asoc->last_data_chunk_from; } if (a_chk->whoTo) { atomic_add_int(&a_chk->whoTo->ref_count, 1); } if (SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->highest_tsn_inside_nr_map)) { highest_tsn = asoc->highest_tsn_inside_map; } else { highest_tsn = asoc->highest_tsn_inside_nr_map; } if (highest_tsn == asoc->cumulative_tsn) { /* no gaps */ if (type == SCTP_SELECTIVE_ACK) { space_req = sizeof(struct sctp_sack_chunk); } else { space_req = sizeof(struct sctp_nr_sack_chunk); } } else { /* gaps get a cluster */ space_req = MCLBYTES; } /* Ok now lets formulate a MBUF with our sack */ a_chk->data = sctp_get_mbuf_for_msg(space_req, 0, M_NOWAIT, 1, MT_DATA); if ((a_chk->data == NULL) || (a_chk->whoTo == NULL)) { /* rats, no mbuf memory */ if (a_chk->data) { /* was a problem with the destination */ sctp_m_freem(a_chk->data); a_chk->data = NULL; } sctp_free_a_chunk(stcb, a_chk, so_locked); /* sa_ignore NO_NULL_CHK */ if (stcb->asoc.delayed_ack) { sctp_timer_stop(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_4); sctp_timer_start(SCTP_TIMER_TYPE_RECV, stcb->sctp_ep, stcb, NULL); } else { stcb->asoc.send_sack = 1; } return; } /* ok, lets go through and fill it in */ SCTP_BUF_RESV_UF(a_chk->data, SCTP_MIN_OVERHEAD); space = (unsigned int)M_TRAILINGSPACE(a_chk->data); if (space > (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD)) { space = (a_chk->whoTo->mtu - SCTP_MIN_OVERHEAD); } limit = mtod(a_chk->data, caddr_t); limit += space; flags = 0; if ((asoc->sctp_cmt_on_off > 0) && SCTP_BASE_SYSCTL(sctp_cmt_use_dac)) { /*- * CMT DAC algorithm: If 2 (i.e., 0x10) packets have been * received, then set high bit to 1, else 0. Reset * pkts_rcvd. */ flags |= (asoc->cmt_dac_pkts_rcvd << 6); asoc->cmt_dac_pkts_rcvd = 0; } #ifdef SCTP_ASOCLOG_OF_TSNS stcb->asoc.cumack_logsnt[stcb->asoc.cumack_log_atsnt] = asoc->cumulative_tsn; stcb->asoc.cumack_log_atsnt++; if (stcb->asoc.cumack_log_atsnt >= SCTP_TSN_LOG_SIZE) { stcb->asoc.cumack_log_atsnt = 0; } #endif /* reset the readers interpretation */ stcb->freed_by_sorcv_sincelast = 0; if (type == SCTP_SELECTIVE_ACK) { sack = mtod(a_chk->data, struct sctp_sack_chunk *); nr_sack = NULL; gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)sack + sizeof(struct sctp_sack_chunk)); if (highest_tsn > asoc->mapping_array_base_tsn) { siz = (((highest_tsn - asoc->mapping_array_base_tsn) + 1) + 7) / 8; } else { siz = (((MAX_TSN - highest_tsn) + 1) + highest_tsn + 7) / 8; } } else { sack = NULL; nr_sack = mtod(a_chk->data, struct sctp_nr_sack_chunk *); gap_descriptor = (struct sctp_gap_ack_block *)((caddr_t)nr_sack + sizeof(struct sctp_nr_sack_chunk)); if (asoc->highest_tsn_inside_map > asoc->mapping_array_base_tsn) { siz = (((asoc->highest_tsn_inside_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8; } else { siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_map + 7) / 8; } } if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) { offset = 1; } else { offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn; } if (((type == SCTP_SELECTIVE_ACK) && SCTP_TSN_GT(highest_tsn, asoc->cumulative_tsn)) || ((type == SCTP_NR_SELECTIVE_ACK) && SCTP_TSN_GT(asoc->highest_tsn_inside_map, asoc->cumulative_tsn))) { /* we have a gap .. maybe */ for (i = 0; i < siz; i++) { tsn_map = asoc->mapping_array[i]; if (type == SCTP_SELECTIVE_ACK) { tsn_map |= asoc->nr_mapping_array[i]; } if (i == 0) { /* * Clear all bits corresponding to TSNs * smaller or equal to the cumulative TSN. */ tsn_map &= (~0U << (1 - offset)); } selector = &sack_array[tsn_map]; if (mergeable && selector->right_edge) { /* * Backup, left and right edges were ok to * merge. */ num_gap_blocks--; gap_descriptor--; } if (selector->num_entries == 0) mergeable = 0; else { for (j = 0; j < selector->num_entries; j++) { if (mergeable && selector->right_edge) { /* * do a merge by NOT setting * the left side */ mergeable = 0; } else { /* * no merge, set the left * side */ mergeable = 0; gap_descriptor->start = htons((selector->gaps[j].start + offset)); } gap_descriptor->end = htons((selector->gaps[j].end + offset)); num_gap_blocks++; gap_descriptor++; if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) { /* no more room */ limit_reached = 1; break; } } if (selector->left_edge) { mergeable = 1; } } if (limit_reached) { /* Reached the limit stop */ break; } offset += 8; } } if ((type == SCTP_NR_SELECTIVE_ACK) && (limit_reached == 0)) { mergeable = 0; if (asoc->highest_tsn_inside_nr_map > asoc->mapping_array_base_tsn) { siz = (((asoc->highest_tsn_inside_nr_map - asoc->mapping_array_base_tsn) + 1) + 7) / 8; } else { siz = (((MAX_TSN - asoc->mapping_array_base_tsn) + 1) + asoc->highest_tsn_inside_nr_map + 7) / 8; } if (SCTP_TSN_GT(asoc->mapping_array_base_tsn, asoc->cumulative_tsn)) { offset = 1; } else { offset = asoc->mapping_array_base_tsn - asoc->cumulative_tsn; } if (SCTP_TSN_GT(asoc->highest_tsn_inside_nr_map, asoc->cumulative_tsn)) { /* we have a gap .. maybe */ for (i = 0; i < siz; i++) { tsn_map = asoc->nr_mapping_array[i]; if (i == 0) { /* * Clear all bits corresponding to * TSNs smaller or equal to the * cumulative TSN. */ tsn_map &= (~0U << (1 - offset)); } selector = &sack_array[tsn_map]; if (mergeable && selector->right_edge) { /* * Backup, left and right edges were * ok to merge. */ num_nr_gap_blocks--; gap_descriptor--; } if (selector->num_entries == 0) mergeable = 0; else { for (j = 0; j < selector->num_entries; j++) { if (mergeable && selector->right_edge) { /* * do a merge by NOT * setting the left * side */ mergeable = 0; } else { /* * no merge, set the * left side */ mergeable = 0; gap_descriptor->start = htons((selector->gaps[j].start + offset)); } gap_descriptor->end = htons((selector->gaps[j].end + offset)); num_nr_gap_blocks++; gap_descriptor++; if (((caddr_t)gap_descriptor + sizeof(struct sctp_gap_ack_block)) > limit) { /* no more room */ limit_reached = 1; break; } } if (selector->left_edge) { mergeable = 1; } } if (limit_reached) { /* Reached the limit stop */ break; } offset += 8; } } } /* now we must add any dups we are going to report. */ if ((limit_reached == 0) && (asoc->numduptsns)) { dup = (uint32_t *)gap_descriptor; for (i = 0; i < asoc->numduptsns; i++) { *dup = htonl(asoc->dup_tsns[i]); dup++; num_dups++; if (((caddr_t)dup + sizeof(uint32_t)) > limit) { /* no more room */ break; } } asoc->numduptsns = 0; } /* * now that the chunk is prepared queue it to the control chunk * queue. */ if (type == SCTP_SELECTIVE_ACK) { a_chk->send_size = (uint16_t)(sizeof(struct sctp_sack_chunk) + (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) + num_dups * sizeof(int32_t)); SCTP_BUF_LEN(a_chk->data) = a_chk->send_size; sack->sack.cum_tsn_ack = htonl(asoc->cumulative_tsn); sack->sack.a_rwnd = htonl(asoc->my_rwnd); sack->sack.num_gap_ack_blks = htons(num_gap_blocks); sack->sack.num_dup_tsns = htons(num_dups); sack->ch.chunk_type = type; sack->ch.chunk_flags = flags; sack->ch.chunk_length = htons(a_chk->send_size); } else { a_chk->send_size = (uint16_t)(sizeof(struct sctp_nr_sack_chunk) + (num_gap_blocks + num_nr_gap_blocks) * sizeof(struct sctp_gap_ack_block) + num_dups * sizeof(int32_t)); SCTP_BUF_LEN(a_chk->data) = a_chk->send_size; nr_sack->nr_sack.cum_tsn_ack = htonl(asoc->cumulative_tsn); nr_sack->nr_sack.a_rwnd = htonl(asoc->my_rwnd); nr_sack->nr_sack.num_gap_ack_blks = htons(num_gap_blocks); nr_sack->nr_sack.num_nr_gap_ack_blks = htons(num_nr_gap_blocks); nr_sack->nr_sack.num_dup_tsns = htons(num_dups); nr_sack->nr_sack.reserved = 0; nr_sack->ch.chunk_type = type; nr_sack->ch.chunk_flags = flags; nr_sack->ch.chunk_length = htons(a_chk->send_size); } TAILQ_INSERT_TAIL(&asoc->control_send_queue, a_chk, sctp_next); asoc->my_last_reported_rwnd = asoc->my_rwnd; asoc->ctrl_queue_cnt++; asoc->send_sack = 0; SCTP_STAT_INCR(sctps_sendsacks); return; } void sctp_send_abort_tcb(struct sctp_tcb *stcb, struct mbuf *operr, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct mbuf *m_abort, *m, *m_last; struct mbuf *m_out, *m_end = NULL; struct sctp_abort_chunk *abort; struct sctp_auth_chunk *auth = NULL; struct sctp_nets *net; uint32_t vtag; uint32_t auth_offset = 0; int error; uint16_t cause_len, chunk_len, padding_len; SCTP_TCB_LOCK_ASSERT(stcb); /*- * Add an AUTH chunk, if chunk requires it and save the offset into * the chain for AUTH */ if (sctp_auth_is_required_chunk(SCTP_ABORT_ASSOCIATION, stcb->asoc.peer_auth_chunks)) { m_out = sctp_add_auth_chunk(NULL, &m_end, &auth, &auth_offset, stcb, SCTP_ABORT_ASSOCIATION); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } else { m_out = NULL; } m_abort = sctp_get_mbuf_for_msg(sizeof(struct sctp_abort_chunk), 0, M_NOWAIT, 1, MT_HEADER); if (m_abort == NULL) { if (m_out) { sctp_m_freem(m_out); } if (operr) { sctp_m_freem(operr); } return; } /* link in any error */ SCTP_BUF_NEXT(m_abort) = operr; cause_len = 0; m_last = NULL; for (m = operr; m; m = SCTP_BUF_NEXT(m)) { cause_len += (uint16_t)SCTP_BUF_LEN(m); if (SCTP_BUF_NEXT(m) == NULL) { m_last = m; } } SCTP_BUF_LEN(m_abort) = sizeof(struct sctp_abort_chunk); chunk_len = (uint16_t)sizeof(struct sctp_abort_chunk) + cause_len; padding_len = SCTP_SIZE32(chunk_len) - chunk_len; if (m_out == NULL) { /* NO Auth chunk prepended, so reserve space in front */ SCTP_BUF_RESV_UF(m_abort, SCTP_MIN_OVERHEAD); m_out = m_abort; } else { /* Put AUTH chunk at the front of the chain */ SCTP_BUF_NEXT(m_end) = m_abort; } if (stcb->asoc.alternate) { net = stcb->asoc.alternate; } else { net = stcb->asoc.primary_destination; } /* Fill in the ABORT chunk header. */ abort = mtod(m_abort, struct sctp_abort_chunk *); abort->ch.chunk_type = SCTP_ABORT_ASSOCIATION; if (stcb->asoc.peer_vtag == 0) { /* This happens iff the assoc is in COOKIE-WAIT state. */ vtag = stcb->asoc.my_vtag; abort->ch.chunk_flags = SCTP_HAD_NO_TCB; } else { vtag = stcb->asoc.peer_vtag; abort->ch.chunk_flags = 0; } abort->ch.chunk_length = htons(chunk_len); /* Add padding, if necessary. */ if (padding_len > 0) { if ((m_last == NULL) || (sctp_add_pad_tombuf(m_last, padding_len) == NULL)) { sctp_m_freem(m_out); return; } } if ((error = sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net, (struct sockaddr *)&net->ro._l_addr, m_out, auth_offset, auth, stcb->asoc.authinfo.active_keyid, 1, 0, 0, stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag), stcb->asoc.primary_destination->port, NULL, 0, 0, so_locked))) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (error == ENOBUFS) { stcb->asoc.ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } } else { stcb->asoc.ifp_had_enobuf = 0; } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); } void sctp_send_shutdown_complete(struct sctp_tcb *stcb, struct sctp_nets *net, int reflect_vtag) { /* formulate and SEND a SHUTDOWN-COMPLETE */ struct mbuf *m_shutdown_comp; struct sctp_shutdown_complete_chunk *shutdown_complete; uint32_t vtag; int error; uint8_t flags; m_shutdown_comp = sctp_get_mbuf_for_msg(sizeof(struct sctp_chunkhdr), 0, M_NOWAIT, 1, MT_HEADER); if (m_shutdown_comp == NULL) { /* no mbuf's */ return; } if (reflect_vtag) { flags = SCTP_HAD_NO_TCB; vtag = stcb->asoc.my_vtag; } else { flags = 0; vtag = stcb->asoc.peer_vtag; } shutdown_complete = mtod(m_shutdown_comp, struct sctp_shutdown_complete_chunk *); shutdown_complete->ch.chunk_type = SCTP_SHUTDOWN_COMPLETE; shutdown_complete->ch.chunk_flags = flags; shutdown_complete->ch.chunk_length = htons(sizeof(struct sctp_shutdown_complete_chunk)); SCTP_BUF_LEN(m_shutdown_comp) = sizeof(struct sctp_shutdown_complete_chunk); if ((error = sctp_lowlevel_chunk_output(stcb->sctp_ep, stcb, net, (struct sockaddr *)&net->ro._l_addr, m_shutdown_comp, 0, NULL, 0, 1, 0, 0, stcb->sctp_ep->sctp_lport, stcb->rport, htonl(vtag), net->port, NULL, 0, 0, SCTP_SO_NOT_LOCKED))) { SCTPDBG(SCTP_DEBUG_OUTPUT3, "Gak send error %d\n", error); if (error == ENOBUFS) { stcb->asoc.ifp_had_enobuf = 1; SCTP_STAT_INCR(sctps_lowlevelerr); } } else { stcb->asoc.ifp_had_enobuf = 0; } SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); return; } static void sctp_send_resp_msg(struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, uint32_t vtag, uint8_t type, struct mbuf *cause, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { struct mbuf *o_pak; struct mbuf *mout; struct sctphdr *shout; struct sctp_chunkhdr *ch; #if defined(INET) || defined(INET6) struct udphdr *udp; #endif int ret, len, cause_len, padding_len; #ifdef INET struct sockaddr_in *src_sin, *dst_sin; struct ip *ip; #endif #ifdef INET6 struct sockaddr_in6 *src_sin6, *dst_sin6; struct ip6_hdr *ip6; #endif /* Compute the length of the cause and add final padding. */ cause_len = 0; if (cause != NULL) { struct mbuf *m_at, *m_last = NULL; for (m_at = cause; m_at; m_at = SCTP_BUF_NEXT(m_at)) { if (SCTP_BUF_NEXT(m_at) == NULL) m_last = m_at; cause_len += SCTP_BUF_LEN(m_at); } padding_len = cause_len % 4; if (padding_len != 0) { padding_len = 4 - padding_len; } if (padding_len != 0) { if (sctp_add_pad_tombuf(m_last, padding_len) == NULL) { sctp_m_freem(cause); return; } } } else { padding_len = 0; } /* Get an mbuf for the header. */ len = sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr); switch (dst->sa_family) { #ifdef INET case AF_INET: len += sizeof(struct ip); break; #endif #ifdef INET6 case AF_INET6: len += sizeof(struct ip6_hdr); break; #endif default: break; } #if defined(INET) || defined(INET6) if (port) { len += sizeof(struct udphdr); } #endif mout = sctp_get_mbuf_for_msg(len + max_linkhdr, 1, M_NOWAIT, 1, MT_DATA); if (mout == NULL) { if (cause) { sctp_m_freem(cause); } return; } SCTP_BUF_RESV_UF(mout, max_linkhdr); SCTP_BUF_LEN(mout) = len; SCTP_BUF_NEXT(mout) = cause; M_SETFIB(mout, fibnum); mout->m_pkthdr.flowid = mflowid; M_HASHTYPE_SET(mout, mflowtype); #ifdef INET ip = NULL; #endif #ifdef INET6 ip6 = NULL; #endif switch (dst->sa_family) { #ifdef INET case AF_INET: src_sin = (struct sockaddr_in *)src; dst_sin = (struct sockaddr_in *)dst; ip = mtod(mout, struct ip *); ip->ip_v = IPVERSION; ip->ip_hl = (sizeof(struct ip) >> 2); ip->ip_tos = 0; ip->ip_off = htons(IP_DF); ip_fillid(ip); ip->ip_ttl = MODULE_GLOBAL(ip_defttl); if (port) { ip->ip_p = IPPROTO_UDP; } else { ip->ip_p = IPPROTO_SCTP; } ip->ip_src.s_addr = dst_sin->sin_addr.s_addr; ip->ip_dst.s_addr = src_sin->sin_addr.s_addr; ip->ip_sum = 0; len = sizeof(struct ip); shout = (struct sctphdr *)((caddr_t)ip + len); break; #endif #ifdef INET6 case AF_INET6: src_sin6 = (struct sockaddr_in6 *)src; dst_sin6 = (struct sockaddr_in6 *)dst; ip6 = mtod(mout, struct ip6_hdr *); ip6->ip6_flow = htonl(0x60000000); if (V_ip6_auto_flowlabel) { ip6->ip6_flow |= (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); } ip6->ip6_hlim = MODULE_GLOBAL(ip6_defhlim); if (port) { ip6->ip6_nxt = IPPROTO_UDP; } else { ip6->ip6_nxt = IPPROTO_SCTP; } ip6->ip6_src = dst_sin6->sin6_addr; ip6->ip6_dst = src_sin6->sin6_addr; len = sizeof(struct ip6_hdr); shout = (struct sctphdr *)((caddr_t)ip6 + len); break; #endif default: len = 0; shout = mtod(mout, struct sctphdr *); break; } #if defined(INET) || defined(INET6) if (port) { if (htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)) == 0) { sctp_m_freem(mout); return; } udp = (struct udphdr *)shout; udp->uh_sport = htons(SCTP_BASE_SYSCTL(sctp_udp_tunneling_port)); udp->uh_dport = port; udp->uh_sum = 0; udp->uh_ulen = htons((uint16_t)(sizeof(struct udphdr) + sizeof(struct sctphdr) + sizeof(struct sctp_chunkhdr) + cause_len + padding_len)); len += sizeof(struct udphdr); shout = (struct sctphdr *)((caddr_t)shout + sizeof(struct udphdr)); } else { udp = NULL; } #endif shout->src_port = sh->dest_port; shout->dest_port = sh->src_port; shout->checksum = 0; if (vtag) { shout->v_tag = htonl(vtag); } else { shout->v_tag = sh->v_tag; } len += sizeof(struct sctphdr); ch = (struct sctp_chunkhdr *)((caddr_t)shout + sizeof(struct sctphdr)); ch->chunk_type = type; if (vtag) { ch->chunk_flags = 0; } else { ch->chunk_flags = SCTP_HAD_NO_TCB; } ch->chunk_length = htons((uint16_t)(sizeof(struct sctp_chunkhdr) + cause_len)); len += sizeof(struct sctp_chunkhdr); len += cause_len + padding_len; if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { sctp_m_freem(mout); return; } SCTP_ATTACH_CHAIN(o_pak, mout, len); switch (dst->sa_family) { #ifdef INET case AF_INET: if (port) { if (V_udp_cksum) { udp->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, udp->uh_ulen + htons(IPPROTO_UDP)); } else { udp->uh_sum = 0; } } ip->ip_len = htons(len); if (port) { shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); if (V_udp_cksum) { SCTP_ENABLE_UDP_CSUM(o_pak); } } else { mout->m_pkthdr.csum_flags = CSUM_SCTP; mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum); SCTP_STAT_INCR(sctps_sendhwcrc); } #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { sctp_packet_log(o_pak); } #endif SCTP_PROBE5(send, NULL, NULL, ip, NULL, shout); SCTP_IP_OUTPUT(ret, o_pak, NULL, NULL, vrf_id); break; #endif #ifdef INET6 case AF_INET6: ip6->ip6_plen = htons((uint16_t)(len - sizeof(struct ip6_hdr))); if (port) { shout->checksum = sctp_calculate_cksum(mout, sizeof(struct ip6_hdr) + sizeof(struct udphdr)); SCTP_STAT_INCR(sctps_sendswcrc); if ((udp->uh_sum = in6_cksum(o_pak, IPPROTO_UDP, sizeof(struct ip6_hdr), len - sizeof(struct ip6_hdr))) == 0) { udp->uh_sum = 0xffff; } } else { mout->m_pkthdr.csum_flags = CSUM_SCTP_IPV6; mout->m_pkthdr.csum_data = offsetof(struct sctphdr, checksum); SCTP_STAT_INCR(sctps_sendhwcrc); } #ifdef SCTP_PACKET_LOGGING if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LAST_PACKET_TRACING) { sctp_packet_log(o_pak); } #endif SCTP_PROBE5(send, NULL, NULL, ip6, NULL, shout); SCTP_IP6_OUTPUT(ret, o_pak, NULL, NULL, NULL, vrf_id); break; #endif default: SCTPDBG(SCTP_DEBUG_OUTPUT1, "Unknown protocol (TSNH) type %d\n", dst->sa_family); sctp_m_freem(mout); SCTP_LTRACE_ERR_RET_PKT(mout, NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EFAULT); return; } SCTPDBG(SCTP_DEBUG_OUTPUT3, "return from send is %d\n", ret); SCTP_STAT_INCR(sctps_sendpackets); SCTP_STAT_INCR_COUNTER64(sctps_outpackets); SCTP_STAT_INCR_COUNTER64(sctps_outcontrolchunks); if (ret) { SCTP_STAT_INCR(sctps_senderrors); } return; } void sctp_send_shutdown_complete2(struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { sctp_send_resp_msg(src, dst, sh, 0, SCTP_SHUTDOWN_COMPLETE, NULL, mflowtype, mflowid, fibnum, vrf_id, port); } void sctp_send_hb(struct sctp_tcb *stcb, struct sctp_nets *net, int so_locked #if !defined(__APPLE__) && !defined(SCTP_SO_LOCK_TESTING) SCTP_UNUSED #endif ) { struct sctp_tmit_chunk *chk; struct sctp_heartbeat_chunk *hb; struct timeval now; SCTP_TCB_LOCK_ASSERT(stcb); if (net == NULL) { return; } (void)SCTP_GETTIME_TIMEVAL(&now); switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: break; #endif #ifdef INET6 case AF_INET6: break; #endif default: return; } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { SCTPDBG(SCTP_DEBUG_OUTPUT4, "Gak, can't get a chunk for hb\n"); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_HEARTBEAT_REQUEST; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->asoc = &stcb->asoc; chk->send_size = sizeof(struct sctp_heartbeat_chunk); chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, so_locked); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); SCTP_BUF_LEN(chk->data) = chk->send_size; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); /* Now we have a mbuf that we can fill in with the details */ hb = mtod(chk->data, struct sctp_heartbeat_chunk *); memset(hb, 0, sizeof(struct sctp_heartbeat_chunk)); /* fill out chunk header */ hb->ch.chunk_type = SCTP_HEARTBEAT_REQUEST; hb->ch.chunk_flags = 0; hb->ch.chunk_length = htons(chk->send_size); /* Fill out hb parameter */ hb->heartbeat.hb_info.ph.param_type = htons(SCTP_HEARTBEAT_INFO); hb->heartbeat.hb_info.ph.param_length = htons(sizeof(struct sctp_heartbeat_info_param)); hb->heartbeat.hb_info.time_value_1 = now.tv_sec; hb->heartbeat.hb_info.time_value_2 = now.tv_usec; /* Did our user request this one, put it in */ hb->heartbeat.hb_info.addr_family = (uint8_t)net->ro._l_addr.sa.sa_family; hb->heartbeat.hb_info.addr_len = net->ro._l_addr.sa.sa_len; if (net->dest_state & SCTP_ADDR_UNCONFIRMED) { /* * we only take from the entropy pool if the address is not * confirmed. */ net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep); net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = sctp_select_initial_TSN(&stcb->sctp_ep->sctp_ep); } else { net->heartbeat_random1 = hb->heartbeat.hb_info.random_value1 = 0; net->heartbeat_random2 = hb->heartbeat.hb_info.random_value2 = 0; } switch (net->ro._l_addr.sa.sa_family) { #ifdef INET case AF_INET: memcpy(hb->heartbeat.hb_info.address, &net->ro._l_addr.sin.sin_addr, sizeof(net->ro._l_addr.sin.sin_addr)); break; #endif #ifdef INET6 case AF_INET6: memcpy(hb->heartbeat.hb_info.address, &net->ro._l_addr.sin6.sin6_addr, sizeof(net->ro._l_addr.sin6.sin6_addr)); break; #endif default: if (chk->data) { sctp_m_freem(chk->data); chk->data = NULL; } sctp_free_a_chunk(stcb, chk, so_locked); return; break; } net->hb_responded = 0; TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); stcb->asoc.ctrl_queue_cnt++; SCTP_STAT_INCR(sctps_sendheartbeat); return; } void sctp_send_ecn_echo(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn) { struct sctp_association *asoc; struct sctp_ecne_chunk *ecne; struct sctp_tmit_chunk *chk; if (net == NULL) { return; } asoc = &stcb->asoc; SCTP_TCB_LOCK_ASSERT(stcb); TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if ((chk->rec.chunk_id.id == SCTP_ECN_ECHO) && (net == chk->whoTo)) { /* found a previous ECN_ECHO update it if needed */ uint32_t cnt, ctsn; ecne = mtod(chk->data, struct sctp_ecne_chunk *); ctsn = ntohl(ecne->tsn); if (SCTP_TSN_GT(high_tsn, ctsn)) { ecne->tsn = htonl(high_tsn); SCTP_STAT_INCR(sctps_queue_upd_ecne); } cnt = ntohl(ecne->num_pkts_since_cwr); cnt++; ecne->num_pkts_since_cwr = htonl(cnt); return; } } /* nope could not find one to update so we must build one */ sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return; } SCTP_STAT_INCR(sctps_queue_upd_ecne); chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_ECN_ECHO; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->send_size = sizeof(struct sctp_ecne_chunk); chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); SCTP_BUF_LEN(chk->data) = chk->send_size; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); stcb->asoc.ecn_echo_cnt_onq++; ecne = mtod(chk->data, struct sctp_ecne_chunk *); ecne->ch.chunk_type = SCTP_ECN_ECHO; ecne->ch.chunk_flags = 0; ecne->ch.chunk_length = htons(sizeof(struct sctp_ecne_chunk)); ecne->tsn = htonl(high_tsn); ecne->num_pkts_since_cwr = htonl(1); TAILQ_INSERT_HEAD(&stcb->asoc.control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; } void sctp_send_packet_dropped(struct sctp_tcb *stcb, struct sctp_nets *net, struct mbuf *m, int len, int iphlen, int bad_crc) { struct sctp_association *asoc; struct sctp_pktdrop_chunk *drp; struct sctp_tmit_chunk *chk; uint8_t *datap; int was_trunc = 0; int fullsz = 0; long spc; int offset; struct sctp_chunkhdr *ch, chunk_buf; unsigned int chk_length; if (!stcb) { return; } asoc = &stcb->asoc; SCTP_TCB_LOCK_ASSERT(stcb); if (asoc->pktdrop_supported == 0) { /*- * peer must declare support before I send one. */ return; } if (stcb->sctp_socket == NULL) { return; } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_PACKET_DROPPED; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; len -= iphlen; chk->send_size = len; /* Validate that we do not have an ABORT in here. */ offset = iphlen + sizeof(struct sctphdr); ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch), (uint8_t *)&chunk_buf); while (ch != NULL) { chk_length = ntohs(ch->chunk_length); if (chk_length < sizeof(*ch)) { /* break to abort land */ break; } switch (ch->chunk_type) { case SCTP_PACKET_DROPPED: case SCTP_ABORT_ASSOCIATION: case SCTP_INITIATION_ACK: /** * We don't respond with an PKT-DROP to an ABORT * or PKT-DROP. We also do not respond to an * INIT-ACK, because we can't know if the initiation * tag is correct or not. */ sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; default: break; } offset += SCTP_SIZE32(chk_length); ch = (struct sctp_chunkhdr *)sctp_m_getptr(m, offset, sizeof(*ch), (uint8_t *)&chunk_buf); } if ((len + SCTP_MAX_OVERHEAD + sizeof(struct sctp_pktdrop_chunk)) > min(stcb->asoc.smallest_mtu, MCLBYTES)) { /* * only send 1 mtu worth, trim off the excess on the end. */ fullsz = len; len = min(stcb->asoc.smallest_mtu, MCLBYTES) - SCTP_MAX_OVERHEAD; was_trunc = 1; } chk->asoc = &stcb->asoc; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { jump_out: sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); drp = mtod(chk->data, struct sctp_pktdrop_chunk *); if (drp == NULL) { sctp_m_freem(chk->data); chk->data = NULL; goto jump_out; } chk->book_size = SCTP_SIZE32((chk->send_size + sizeof(struct sctp_pktdrop_chunk) + sizeof(struct sctphdr) + SCTP_MED_OVERHEAD)); chk->book_size_scale = 0; if (was_trunc) { drp->ch.chunk_flags = SCTP_PACKET_TRUNCATED; drp->trunc_len = htons(fullsz); /* * Len is already adjusted to size minus overhead above take * out the pkt_drop chunk itself from it. */ chk->send_size = (uint16_t)(len - sizeof(struct sctp_pktdrop_chunk)); len = chk->send_size; } else { /* no truncation needed */ drp->ch.chunk_flags = 0; drp->trunc_len = htons(0); } if (bad_crc) { drp->ch.chunk_flags |= SCTP_BADCRC; } chk->send_size += sizeof(struct sctp_pktdrop_chunk); SCTP_BUF_LEN(chk->data) = chk->send_size; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; if (net) { /* we should hit here */ chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); } else { chk->whoTo = NULL; } drp->ch.chunk_type = SCTP_PACKET_DROPPED; drp->ch.chunk_length = htons(chk->send_size); spc = SCTP_SB_LIMIT_RCV(stcb->sctp_socket); if (spc < 0) { spc = 0; } drp->bottle_bw = htonl(spc); if (asoc->my_rwnd) { drp->current_onq = htonl(asoc->size_on_reasm_queue + asoc->size_on_all_streams + asoc->my_rwnd_control_len + stcb->sctp_socket->so_rcv.sb_cc); } else { /*- * If my rwnd is 0, possibly from mbuf depletion as well as * space used, tell the peer there is NO space aka onq == bw */ drp->current_onq = htonl(spc); } drp->reserved = 0; datap = drp->data; m_copydata(m, iphlen, len, (caddr_t)datap); TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; } void sctp_send_cwr(struct sctp_tcb *stcb, struct sctp_nets *net, uint32_t high_tsn, uint8_t override) { struct sctp_association *asoc; struct sctp_cwr_chunk *cwr; struct sctp_tmit_chunk *chk; SCTP_TCB_LOCK_ASSERT(stcb); if (net == NULL) { return; } asoc = &stcb->asoc; TAILQ_FOREACH(chk, &asoc->control_send_queue, sctp_next) { if ((chk->rec.chunk_id.id == SCTP_ECN_CWR) && (net == chk->whoTo)) { /* * found a previous CWR queued to same destination * update it if needed */ uint32_t ctsn; cwr = mtod(chk->data, struct sctp_cwr_chunk *); ctsn = ntohl(cwr->tsn); if (SCTP_TSN_GT(high_tsn, ctsn)) { cwr->tsn = htonl(high_tsn); } if (override & SCTP_CWR_REDUCE_OVERRIDE) { /* Make sure override is carried */ cwr->ch.chunk_flags |= SCTP_CWR_REDUCE_OVERRIDE; } return; } } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_ECN_CWR; chk->rec.chunk_id.can_take_data = 1; chk->flags = 0; chk->asoc = &stcb->asoc; chk->send_size = sizeof(struct sctp_cwr_chunk); chk->data = sctp_get_mbuf_for_msg(chk->send_size, 0, M_NOWAIT, 1, MT_HEADER); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_NOT_LOCKED); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); SCTP_BUF_LEN(chk->data) = chk->send_size; chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; chk->whoTo = net; atomic_add_int(&chk->whoTo->ref_count, 1); cwr = mtod(chk->data, struct sctp_cwr_chunk *); cwr->ch.chunk_type = SCTP_ECN_CWR; cwr->ch.chunk_flags = override; cwr->ch.chunk_length = htons(sizeof(struct sctp_cwr_chunk)); cwr->tsn = htonl(high_tsn); TAILQ_INSERT_TAIL(&stcb->asoc.control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; } static int sctp_add_stream_reset_out(struct sctp_tcb *stcb, struct sctp_tmit_chunk *chk, uint32_t seq, uint32_t resp_seq, uint32_t last_sent) { uint16_t len, old_len, i; struct sctp_stream_reset_out_request *req_out; struct sctp_chunkhdr *ch; int at; int number_entries = 0; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ req_out = (struct sctp_stream_reset_out_request *)((caddr_t)ch + len); /* now how long will this param be? */ for (i = 0; i < stcb->asoc.streamoutcnt; i++) { if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) && (stcb->asoc.strmout[i].chunks_on_queues == 0) && TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { number_entries++; } } if (number_entries == 0) { return (0); } if (number_entries == stcb->asoc.streamoutcnt) { number_entries = 0; } if (number_entries > SCTP_MAX_STREAMS_AT_ONCE_RESET) { number_entries = SCTP_MAX_STREAMS_AT_ONCE_RESET; } len = (uint16_t)(sizeof(struct sctp_stream_reset_out_request) + (sizeof(uint16_t) * number_entries)); req_out->ph.param_type = htons(SCTP_STR_RESET_OUT_REQUEST); req_out->ph.param_length = htons(len); req_out->request_seq = htonl(seq); req_out->response_seq = htonl(resp_seq); req_out->send_reset_at_tsn = htonl(last_sent); at = 0; if (number_entries) { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { if ((stcb->asoc.strmout[i].state == SCTP_STREAM_RESET_PENDING) && (stcb->asoc.strmout[i].chunks_on_queues == 0) && TAILQ_EMPTY(&stcb->asoc.strmout[i].outqueue)) { req_out->list_of_streams[at] = htons(i); at++; stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT; if (at >= number_entries) { break; } } } } else { for (i = 0; i < stcb->asoc.streamoutcnt; i++) { stcb->asoc.strmout[i].state = SCTP_STREAM_RESET_IN_FLIGHT; } } if (SCTP_SIZE32(len) > len) { /*- * Need to worry about the pad we may end up adding to the * end. This is easy since the struct is either aligned to 4 * bytes or 2 bytes off. */ req_out->list_of_streams[number_entries] = 0; } /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->book_size = len + old_len; chk->book_size_scale = 0; chk->send_size = SCTP_SIZE32(chk->book_size); SCTP_BUF_LEN(chk->data) = chk->send_size; return (1); } static void sctp_add_stream_reset_in(struct sctp_tmit_chunk *chk, int number_entries, uint16_t *list, uint32_t seq) { uint16_t len, old_len, i; struct sctp_stream_reset_in_request *req_in; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ req_in = (struct sctp_stream_reset_in_request *)((caddr_t)ch + len); /* now how long will this param be? */ len = (uint16_t)(sizeof(struct sctp_stream_reset_in_request) + (sizeof(uint16_t) * number_entries)); req_in->ph.param_type = htons(SCTP_STR_RESET_IN_REQUEST); req_in->ph.param_length = htons(len); req_in->request_seq = htonl(seq); if (number_entries) { for (i = 0; i < number_entries; i++) { req_in->list_of_streams[i] = htons(list[i]); } } if (SCTP_SIZE32(len) > len) { /*- * Need to worry about the pad we may end up adding to the * end. This is easy since the struct is either aligned to 4 * bytes or 2 bytes off. */ req_in->list_of_streams[number_entries] = 0; } /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->book_size = len + old_len; chk->book_size_scale = 0; chk->send_size = SCTP_SIZE32(chk->book_size); SCTP_BUF_LEN(chk->data) = chk->send_size; return; } static void sctp_add_stream_reset_tsn(struct sctp_tmit_chunk *chk, uint32_t seq) { uint16_t len, old_len; struct sctp_stream_reset_tsn_request *req_tsn; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ req_tsn = (struct sctp_stream_reset_tsn_request *)((caddr_t)ch + len); /* now how long will this param be? */ len = sizeof(struct sctp_stream_reset_tsn_request); req_tsn->ph.param_type = htons(SCTP_STR_RESET_TSN_REQUEST); req_tsn->ph.param_length = htons(len); req_tsn->request_seq = htonl(seq); /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->send_size = len + old_len; chk->book_size = SCTP_SIZE32(chk->send_size); chk->book_size_scale = 0; SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size); return; } void sctp_add_stream_reset_result(struct sctp_tmit_chunk *chk, uint32_t resp_seq, uint32_t result) { uint16_t len, old_len; struct sctp_stream_reset_response *resp; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ resp = (struct sctp_stream_reset_response *)((caddr_t)ch + len); /* now how long will this param be? */ len = sizeof(struct sctp_stream_reset_response); resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE); resp->ph.param_length = htons(len); resp->response_seq = htonl(resp_seq); resp->result = ntohl(result); /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->book_size = len + old_len; chk->book_size_scale = 0; chk->send_size = SCTP_SIZE32(chk->book_size); SCTP_BUF_LEN(chk->data) = chk->send_size; return; } void sctp_send_deferred_reset_response(struct sctp_tcb *stcb, struct sctp_stream_reset_list *ent, int response) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk; struct sctp_chunkhdr *ch; asoc = &stcb->asoc; /* * Reset our last reset action to the new one IP -> response * (PERFORMED probably). This assures that if we fail to send, a * retran from the peer will get the new response. */ asoc->last_reset_action[0] = response; if (asoc->stream_reset_outstanding) { return; } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return; } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_STREAM_RESET; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->book_size = sizeof(struct sctp_chunkhdr); chk->send_size = SCTP_SIZE32(chk->book_size); chk->book_size_scale = 0; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED); SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return; } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); /* setup chunk parameters */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; if (stcb->asoc.alternate) { chk->whoTo = stcb->asoc.alternate; } else { chk->whoTo = stcb->asoc.primary_destination; } ch = mtod(chk->data, struct sctp_chunkhdr *); ch->chunk_type = SCTP_STREAM_RESET; ch->chunk_flags = 0; ch->chunk_length = htons(chk->book_size); atomic_add_int(&chk->whoTo->ref_count, 1); SCTP_BUF_LEN(chk->data) = chk->send_size; sctp_add_stream_reset_result(chk, ent->seq, response); /* insert the chunk for sending */ TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; } void sctp_add_stream_reset_result_tsn(struct sctp_tmit_chunk *chk, uint32_t resp_seq, uint32_t result, uint32_t send_una, uint32_t recv_next) { uint16_t len, old_len; struct sctp_stream_reset_response_tsn *resp; struct sctp_chunkhdr *ch; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ resp = (struct sctp_stream_reset_response_tsn *)((caddr_t)ch + len); /* now how long will this param be? */ len = sizeof(struct sctp_stream_reset_response_tsn); resp->ph.param_type = htons(SCTP_STR_RESET_RESPONSE); resp->ph.param_length = htons(len); resp->response_seq = htonl(resp_seq); resp->result = htonl(result); resp->senders_next_tsn = htonl(send_una); resp->receivers_next_tsn = htonl(recv_next); /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->book_size = len + old_len; chk->send_size = SCTP_SIZE32(chk->book_size); chk->book_size_scale = 0; SCTP_BUF_LEN(chk->data) = chk->send_size; return; } static void sctp_add_an_out_stream(struct sctp_tmit_chunk *chk, uint32_t seq, uint16_t adding) { uint16_t len, old_len; struct sctp_chunkhdr *ch; struct sctp_stream_reset_add_strm *addstr; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len); /* now how long will this param be? */ len = sizeof(struct sctp_stream_reset_add_strm); /* Fill it out. */ addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_OUT_STREAMS); addstr->ph.param_length = htons(len); addstr->request_seq = htonl(seq); addstr->number_of_streams = htons(adding); addstr->reserved = 0; /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->send_size = len + old_len; chk->book_size = SCTP_SIZE32(chk->send_size); chk->book_size_scale = 0; SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size); return; } static void sctp_add_an_in_stream(struct sctp_tmit_chunk *chk, uint32_t seq, uint16_t adding) { uint16_t len, old_len; struct sctp_chunkhdr *ch; struct sctp_stream_reset_add_strm *addstr; ch = mtod(chk->data, struct sctp_chunkhdr *); old_len = len = SCTP_SIZE32(ntohs(ch->chunk_length)); /* get to new offset for the param. */ addstr = (struct sctp_stream_reset_add_strm *)((caddr_t)ch + len); /* now how long will this param be? */ len = sizeof(struct sctp_stream_reset_add_strm); /* Fill it out. */ addstr->ph.param_type = htons(SCTP_STR_RESET_ADD_IN_STREAMS); addstr->ph.param_length = htons(len); addstr->request_seq = htonl(seq); addstr->number_of_streams = htons(adding); addstr->reserved = 0; /* now fix the chunk length */ ch->chunk_length = htons(len + old_len); chk->send_size = len + old_len; chk->book_size = SCTP_SIZE32(chk->send_size); chk->book_size_scale = 0; SCTP_BUF_LEN(chk->data) = SCTP_SIZE32(chk->send_size); return; } int sctp_send_stream_reset_out_if_possible(struct sctp_tcb *stcb, int so_locked) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk; struct sctp_chunkhdr *ch; uint32_t seq; asoc = &stcb->asoc; asoc->trigger_reset = 0; if (asoc->stream_reset_outstanding) { return (EALREADY); } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_STREAM_RESET; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->book_size = sizeof(struct sctp_chunkhdr); chk->send_size = SCTP_SIZE32(chk->book_size); chk->book_size_scale = 0; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, so_locked); SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); /* setup chunk parameters */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; if (stcb->asoc.alternate) { chk->whoTo = stcb->asoc.alternate; } else { chk->whoTo = stcb->asoc.primary_destination; } ch = mtod(chk->data, struct sctp_chunkhdr *); ch->chunk_type = SCTP_STREAM_RESET; ch->chunk_flags = 0; ch->chunk_length = htons(chk->book_size); atomic_add_int(&chk->whoTo->ref_count, 1); SCTP_BUF_LEN(chk->data) = chk->send_size; seq = stcb->asoc.str_reset_seq_out; if (sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1))) { seq++; asoc->stream_reset_outstanding++; } else { m_freem(chk->data); chk->data = NULL; sctp_free_a_chunk(stcb, chk, so_locked); return (ENOENT); } asoc->str_reset = chk; /* insert the chunk for sending */ TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; if (stcb->asoc.send_sack) { sctp_send_sack(stcb, so_locked); } sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo); return (0); } int sctp_send_str_reset_req(struct sctp_tcb *stcb, uint16_t number_entries, uint16_t *list, uint8_t send_in_req, uint8_t send_tsn_req, uint8_t add_stream, uint16_t adding_o, uint16_t adding_i, uint8_t peer_asked) { struct sctp_association *asoc; struct sctp_tmit_chunk *chk; struct sctp_chunkhdr *ch; int can_send_out_req = 0; uint32_t seq; asoc = &stcb->asoc; if (asoc->stream_reset_outstanding) { /*- * Already one pending, must get ACK back to clear the flag. */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EBUSY); return (EBUSY); } if ((send_in_req == 0) && (send_tsn_req == 0) && (add_stream == 0)) { /* nothing to do */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } if (send_tsn_req && send_in_req) { /* error, can't do that */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } else if (send_in_req) { can_send_out_req = 1; } if (number_entries > (MCLBYTES - SCTP_MIN_OVERHEAD - sizeof(struct sctp_chunkhdr) - sizeof(struct sctp_stream_reset_out_request)) / sizeof(uint16_t)) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } sctp_alloc_a_chunk(stcb, chk); if (chk == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } chk->copy_by_ref = 0; chk->rec.chunk_id.id = SCTP_STREAM_RESET; chk->rec.chunk_id.can_take_data = 0; chk->flags = 0; chk->asoc = &stcb->asoc; chk->book_size = sizeof(struct sctp_chunkhdr); chk->send_size = SCTP_SIZE32(chk->book_size); chk->book_size_scale = 0; chk->data = sctp_get_mbuf_for_msg(MCLBYTES, 0, M_NOWAIT, 1, MT_DATA); if (chk->data == NULL) { sctp_free_a_chunk(stcb, chk, SCTP_SO_LOCKED); SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ENOMEM); return (ENOMEM); } SCTP_BUF_RESV_UF(chk->data, SCTP_MIN_OVERHEAD); /* setup chunk parameters */ chk->sent = SCTP_DATAGRAM_UNSENT; chk->snd_count = 0; if (stcb->asoc.alternate) { chk->whoTo = stcb->asoc.alternate; } else { chk->whoTo = stcb->asoc.primary_destination; } atomic_add_int(&chk->whoTo->ref_count, 1); ch = mtod(chk->data, struct sctp_chunkhdr *); ch->chunk_type = SCTP_STREAM_RESET; ch->chunk_flags = 0; ch->chunk_length = htons(chk->book_size); SCTP_BUF_LEN(chk->data) = chk->send_size; seq = stcb->asoc.str_reset_seq_out; if (can_send_out_req) { int ret; ret = sctp_add_stream_reset_out(stcb, chk, seq, (stcb->asoc.str_reset_seq_in - 1), (stcb->asoc.sending_seq - 1)); if (ret) { seq++; asoc->stream_reset_outstanding++; } } if ((add_stream & 1) && ((stcb->asoc.strm_realoutsize - stcb->asoc.streamoutcnt) < adding_o)) { /* Need to allocate more */ struct sctp_stream_out *oldstream; struct sctp_stream_queue_pending *sp, *nsp; int i; #if defined(SCTP_DETAILED_STR_STATS) int j; #endif oldstream = stcb->asoc.strmout; /* get some more */ SCTP_MALLOC(stcb->asoc.strmout, struct sctp_stream_out *, (stcb->asoc.streamoutcnt + adding_o) * sizeof(struct sctp_stream_out), SCTP_M_STRMO); if (stcb->asoc.strmout == NULL) { uint8_t x; stcb->asoc.strmout = oldstream; /* Turn off the bit */ x = add_stream & 0xfe; add_stream = x; goto skip_stuff; } /* * Ok now we proceed with copying the old out stuff and * initializing the new stuff. */ SCTP_TCB_SEND_LOCK(stcb); stcb->asoc.ss_functions.sctp_ss_clear(stcb, &stcb->asoc, 0, 1); for (i = 0; i < stcb->asoc.streamoutcnt; i++) { TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); stcb->asoc.strmout[i].chunks_on_queues = oldstream[i].chunks_on_queues; stcb->asoc.strmout[i].next_mid_ordered = oldstream[i].next_mid_ordered; stcb->asoc.strmout[i].next_mid_unordered = oldstream[i].next_mid_unordered; stcb->asoc.strmout[i].last_msg_incomplete = oldstream[i].last_msg_incomplete; stcb->asoc.strmout[i].sid = i; stcb->asoc.strmout[i].state = oldstream[i].state; /* FIX ME FIX ME */ /* * This should be a SS_COPY operation FIX ME STREAM * SCHEDULER EXPERT */ stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], &oldstream[i]); /* now anything on those queues? */ TAILQ_FOREACH_SAFE(sp, &oldstream[i].outqueue, next, nsp) { TAILQ_REMOVE(&oldstream[i].outqueue, sp, next); TAILQ_INSERT_TAIL(&stcb->asoc.strmout[i].outqueue, sp, next); } } /* now the new streams */ stcb->asoc.ss_functions.sctp_ss_init(stcb, &stcb->asoc, 1); for (i = stcb->asoc.streamoutcnt; i < (stcb->asoc.streamoutcnt + adding_o); i++) { TAILQ_INIT(&stcb->asoc.strmout[i].outqueue); stcb->asoc.strmout[i].chunks_on_queues = 0; #if defined(SCTP_DETAILED_STR_STATS) for (j = 0; j < SCTP_PR_SCTP_MAX + 1; j++) { stcb->asoc.strmout[i].abandoned_sent[j] = 0; stcb->asoc.strmout[i].abandoned_unsent[j] = 0; } #else stcb->asoc.strmout[i].abandoned_sent[0] = 0; stcb->asoc.strmout[i].abandoned_unsent[0] = 0; #endif stcb->asoc.strmout[i].next_mid_ordered = 0; stcb->asoc.strmout[i].next_mid_unordered = 0; stcb->asoc.strmout[i].sid = i; stcb->asoc.strmout[i].last_msg_incomplete = 0; stcb->asoc.ss_functions.sctp_ss_init_stream(stcb, &stcb->asoc.strmout[i], NULL); stcb->asoc.strmout[i].state = SCTP_STREAM_CLOSED; } stcb->asoc.strm_realoutsize = stcb->asoc.streamoutcnt + adding_o; SCTP_FREE(oldstream, SCTP_M_STRMO); SCTP_TCB_SEND_UNLOCK(stcb); } skip_stuff: if ((add_stream & 1) && (adding_o > 0)) { asoc->strm_pending_add_size = adding_o; asoc->peer_req_out = peer_asked; sctp_add_an_out_stream(chk, seq, adding_o); seq++; asoc->stream_reset_outstanding++; } if ((add_stream & 2) && (adding_i > 0)) { sctp_add_an_in_stream(chk, seq, adding_i); seq++; asoc->stream_reset_outstanding++; } if (send_in_req) { sctp_add_stream_reset_in(chk, number_entries, list, seq); seq++; asoc->stream_reset_outstanding++; } if (send_tsn_req) { sctp_add_stream_reset_tsn(chk, seq); asoc->stream_reset_outstanding++; } asoc->str_reset = chk; /* insert the chunk for sending */ TAILQ_INSERT_TAIL(&asoc->control_send_queue, chk, sctp_next); asoc->ctrl_queue_cnt++; if (stcb->asoc.send_sack) { sctp_send_sack(stcb, SCTP_SO_LOCKED); } sctp_timer_start(SCTP_TIMER_TYPE_STRRESET, stcb->sctp_ep, stcb, chk->whoTo); return (0); } void sctp_send_abort(struct mbuf *m, int iphlen, struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, uint32_t vtag, struct mbuf *cause, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { /* Don't respond to an ABORT with an ABORT. */ if (sctp_is_there_an_abort_here(m, iphlen, &vtag)) { if (cause) sctp_m_freem(cause); return; } sctp_send_resp_msg(src, dst, sh, vtag, SCTP_ABORT_ASSOCIATION, cause, mflowtype, mflowid, fibnum, vrf_id, port); return; } void sctp_send_operr_to(struct sockaddr *src, struct sockaddr *dst, struct sctphdr *sh, uint32_t vtag, struct mbuf *cause, uint8_t mflowtype, uint32_t mflowid, uint16_t fibnum, uint32_t vrf_id, uint16_t port) { sctp_send_resp_msg(src, dst, sh, vtag, SCTP_OPERATION_ERROR, cause, mflowtype, mflowid, fibnum, vrf_id, port); return; } static struct mbuf * sctp_copy_resume(struct uio *uio, int max_send_len, int user_marks_eor, int *error, uint32_t *sndout, struct mbuf **new_tail) { struct mbuf *m; m = m_uiotombuf(uio, M_WAITOK, max_send_len, 0, (M_PKTHDR | (user_marks_eor ? M_EOR : 0))); if (m == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS); *error = ENOBUFS; } else { *sndout = m_length(m, NULL); *new_tail = m_last(m); } return (m); } static int sctp_copy_one(struct sctp_stream_queue_pending *sp, struct uio *uio, int resv_upfront) { sp->data = m_uiotombuf(uio, M_WAITOK, sp->length, resv_upfront, 0); if (sp->data == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOBUFS); return (ENOBUFS); } sp->tail_mbuf = m_last(sp->data); return (0); } static struct sctp_stream_queue_pending * sctp_copy_it_in(struct sctp_tcb *stcb, struct sctp_association *asoc, struct sctp_sndrcvinfo *srcv, struct uio *uio, struct sctp_nets *net, int max_send_len, int user_marks_eor, int *error) { /*- * This routine must be very careful in its work. Protocol * processing is up and running so care must be taken to spl...() * when you need to do something that may effect the stcb/asoc. The * sb is locked however. When data is copied the protocol processing * should be enabled since this is a slower operation... */ struct sctp_stream_queue_pending *sp = NULL; int resv_in_first; *error = 0; /* Now can we send this? */ if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) || (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) { /* got data while shutting down */ SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET); *error = ECONNRESET; goto out_now; } sctp_alloc_a_strmoq(stcb, sp); if (sp == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM); *error = ENOMEM; goto out_now; } sp->act_flags = 0; sp->sender_all_done = 0; sp->sinfo_flags = srcv->sinfo_flags; sp->timetolive = srcv->sinfo_timetolive; sp->ppid = srcv->sinfo_ppid; sp->context = srcv->sinfo_context; sp->fsn = 0; (void)SCTP_GETTIME_TIMEVAL(&sp->ts); sp->sid = srcv->sinfo_stream; sp->length = (uint32_t)min(uio->uio_resid, max_send_len); if ((sp->length == (uint32_t)uio->uio_resid) && ((user_marks_eor == 0) || (srcv->sinfo_flags & SCTP_EOF) || (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) { sp->msg_is_complete = 1; } else { sp->msg_is_complete = 0; } sp->sender_all_done = 0; sp->some_taken = 0; sp->put_last_out = 0; resv_in_first = SCTP_DATA_CHUNK_OVERHEAD(stcb); sp->data = sp->tail_mbuf = NULL; if (sp->length == 0) { goto skip_copy; } if (srcv->sinfo_keynumber_valid) { sp->auth_keyid = srcv->sinfo_keynumber; } else { sp->auth_keyid = stcb->asoc.authinfo.active_keyid; } if (sctp_auth_is_required_chunk(SCTP_DATA, stcb->asoc.peer_auth_chunks)) { sctp_auth_key_acquire(stcb, sp->auth_keyid); sp->holds_key_ref = 1; } *error = sctp_copy_one(sp, uio, resv_in_first); skip_copy: if (*error) { sctp_free_a_strmoq(stcb, sp, SCTP_SO_LOCKED); sp = NULL; } else { if (sp->sinfo_flags & SCTP_ADDR_OVER) { sp->net = net; atomic_add_int(&sp->net->ref_count, 1); } else { sp->net = NULL; } sctp_set_prsctp_policy(sp); } out_now: return (sp); } int sctp_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *p ) { int error, use_sndinfo = 0; struct sctp_sndrcvinfo sndrcvninfo; struct sockaddr *addr_to_use; #if defined(INET) && defined(INET6) struct sockaddr_in sin; #endif if (control) { /* process cmsg snd/rcv info (maybe a assoc-id) */ if (sctp_find_cmsg(SCTP_SNDRCV, (void *)&sndrcvninfo, control, sizeof(sndrcvninfo))) { /* got one */ use_sndinfo = 1; } } addr_to_use = addr; #if defined(INET) && defined(INET6) if ((addr) && (addr->sa_family == AF_INET6)) { struct sockaddr_in6 *sin6; sin6 = (struct sockaddr_in6 *)addr; if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) { in6_sin6_2_sin(&sin, sin6); addr_to_use = (struct sockaddr *)&sin; } } #endif error = sctp_lower_sosend(so, addr_to_use, uio, top, control, flags, use_sndinfo ? &sndrcvninfo : NULL ,p ); return (error); } int sctp_lower_sosend(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *i_pak, struct mbuf *control, int flags, struct sctp_sndrcvinfo *srcv , struct thread *p ) { unsigned int sndlen = 0, max_len; int error, len; struct mbuf *top = NULL; int queue_only = 0, queue_only_for_init = 0; int free_cnt_applied = 0; int un_sent; int now_filled = 0; unsigned int inqueue_bytes = 0; struct sctp_block_entry be; struct sctp_inpcb *inp; struct sctp_tcb *stcb = NULL; struct timeval now; struct sctp_nets *net; struct sctp_association *asoc; struct sctp_inpcb *t_inp; int user_marks_eor; int create_lock_applied = 0; int nagle_applies = 0; int some_on_control = 0; int got_all_of_the_send = 0; int hold_tcblock = 0; int non_blocking = 0; uint32_t local_add_more, local_soresv = 0; uint16_t port; uint16_t sinfo_flags; sctp_assoc_t sinfo_assoc_id; error = 0; net = NULL; stcb = NULL; asoc = NULL; t_inp = inp = (struct sctp_inpcb *)so->so_pcb; if (inp == NULL) { SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; if (i_pak) { SCTP_RELEASE_PKT(i_pak); } return (error); } if ((uio == NULL) && (i_pak == NULL)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } user_marks_eor = sctp_is_feature_on(inp, SCTP_PCB_FLAGS_EXPLICIT_EOR); atomic_add_int(&inp->total_sends, 1); if (uio) { if (uio->uio_resid < 0) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); return (EINVAL); } sndlen = (unsigned int)uio->uio_resid; } else { top = SCTP_HEADER_TO_CHAIN(i_pak); sndlen = SCTP_HEADER_LEN(i_pak); } SCTPDBG(SCTP_DEBUG_OUTPUT1, "Send called addr:%p send length %d\n", (void *)addr, sndlen); if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) && SCTP_IS_LISTENING(inp)) { /* The listener can NOT send */ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); error = ENOTCONN; goto out_unlocked; } /** * Pre-screen address, if one is given the sin-len * must be set correctly! */ if (addr) { union sctp_sockstore *raddr = (union sctp_sockstore *)addr; switch (raddr->sa.sa_family) { #ifdef INET case AF_INET: if (raddr->sin.sin_len != sizeof(struct sockaddr_in)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } port = raddr->sin.sin_port; break; #endif #ifdef INET6 case AF_INET6: if (raddr->sin6.sin6_len != sizeof(struct sockaddr_in6)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } port = raddr->sin6.sin6_port; break; #endif default: SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EAFNOSUPPORT); error = EAFNOSUPPORT; goto out_unlocked; } } else port = 0; if (srcv) { sinfo_flags = srcv->sinfo_flags; sinfo_assoc_id = srcv->sinfo_assoc_id; if (INVALID_SINFO_FLAG(sinfo_flags) || PR_SCTP_INVALID_POLICY(sinfo_flags)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } if (srcv->sinfo_flags) SCTP_STAT_INCR(sctps_sends_with_flags); } else { sinfo_flags = inp->def_send.sinfo_flags; sinfo_assoc_id = inp->def_send.sinfo_assoc_id; } if (sinfo_flags & SCTP_SENDALL) { /* its a sendall */ error = sctp_sendall(inp, uio, top, srcv); top = NULL; goto out_unlocked; } if ((sinfo_flags & SCTP_ADDR_OVER) && (addr == NULL)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } /* now we must find the assoc */ if ((inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED) || (inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) { SCTP_INP_RLOCK(inp); stcb = LIST_FIRST(&inp->sctp_asoc_list); if (stcb) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } SCTP_INP_RUNLOCK(inp); } else if (sinfo_assoc_id) { stcb = sctp_findassociation_ep_asocid(inp, sinfo_assoc_id, 1); if (stcb != NULL) { hold_tcblock = 1; } } else if (addr) { /*- * Since we did not use findep we must * increment it, and if we don't find a tcb * decrement it. */ SCTP_INP_WLOCK(inp); SCTP_INP_INCR_REF(inp); SCTP_INP_WUNLOCK(inp); stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL); if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } else { hold_tcblock = 1; } } if ((stcb == NULL) && (addr)) { /* Possible implicit send? */ SCTP_ASOC_CREATE_LOCK(inp); create_lock_applied = 1; if ((inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_GONE) || (inp->sctp_flags & SCTP_PCB_FLAGS_SOCKET_ALLGONE)) { /* Should I really unlock ? */ SCTP_LTRACE_ERR_RET(NULL, NULL, NULL, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } if (((inp->sctp_flags & SCTP_PCB_FLAGS_BOUND_V6) == 0) && (addr->sa_family == AF_INET6)) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } SCTP_INP_WLOCK(inp); SCTP_INP_INCR_REF(inp); SCTP_INP_WUNLOCK(inp); /* With the lock applied look again */ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL); if ((stcb == NULL) && (control != NULL) && (port > 0)) { stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error); } if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); SCTP_INP_WUNLOCK(inp); } else { hold_tcblock = 1; } if (error) { goto out_unlocked; } if (t_inp != inp) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOTCONN); error = ENOTCONN; goto out_unlocked; } } if (stcb == NULL) { if (addr == NULL) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT); error = ENOENT; goto out_unlocked; } else { /* We must go ahead and start the INIT process */ uint32_t vrf_id; if ((sinfo_flags & SCTP_ABORT) || ((sinfo_flags & SCTP_EOF) && (sndlen == 0))) { /*- * User asks to abort a non-existant assoc, * or EOF a non-existant assoc with no data */ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOENT); error = ENOENT; goto out_unlocked; } /* get an asoc/stcb struct */ vrf_id = inp->def_vrf_id; #ifdef INVARIANTS if (create_lock_applied == 0) { panic("Error, should hold create lock and I don't?"); } #endif stcb = sctp_aloc_assoc(inp, addr, &error, 0, vrf_id, inp->sctp_ep.pre_open_stream_count, inp->sctp_ep.port, p); if (stcb == NULL) { /* Error is setup for us in the call */ goto out_unlocked; } if (stcb->sctp_ep->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) { stcb->sctp_ep->sctp_flags |= SCTP_PCB_FLAGS_CONNECTED; /* * Set the connected flag so we can queue * data */ soisconnecting(so); } hold_tcblock = 1; if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); create_lock_applied = 0; } else { SCTP_PRINTF("Huh-3? create lock should have been on??\n"); } /* * Turn on queue only flag to prevent data from * being sent */ queue_only = 1; asoc = &stcb->asoc; SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); (void)SCTP_GETTIME_TIMEVAL(&asoc->time_entered); /* initialize authentication params for the assoc */ sctp_initialize_auth_params(inp, stcb); if (control) { if (sctp_process_cmsgs_for_init(stcb, control, &error)) { sctp_free_assoc(inp, stcb, SCTP_PCBFREE_FORCE, SCTP_FROM_SCTP_OUTPUT + SCTP_LOC_5); hold_tcblock = 0; stcb = NULL; goto out_unlocked; } } /* out with the INIT */ queue_only_for_init = 1; /*- * we may want to dig in after this call and adjust the MTU * value. It defaulted to 1500 (constant) but the ro * structure may now have an update and thus we may need to * change it BEFORE we append the message. */ } } else asoc = &stcb->asoc; if (srcv == NULL) srcv = (struct sctp_sndrcvinfo *)&asoc->def_send; if (srcv->sinfo_flags & SCTP_ADDR_OVER) { if (addr) net = sctp_findnet(stcb, addr); else net = NULL; if ((net == NULL) || ((port != 0) && (port != stcb->rport))) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } } else { if (stcb->asoc.alternate) { net = stcb->asoc.alternate; } else { net = stcb->asoc.primary_destination; } } atomic_add_int(&stcb->total_sends, 1); /* Keep the stcb from being freed under our feet */ atomic_add_int(&asoc->refcnt, 1); free_cnt_applied = 1; if (sctp_is_feature_on(inp, SCTP_PCB_FLAGS_NO_FRAGMENT)) { if (sndlen > asoc->smallest_mtu) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE); error = EMSGSIZE; goto out_unlocked; } } if (SCTP_SO_IS_NBIO(so) || (flags & MSG_NBIO) ) { non_blocking = 1; } /* would we block? */ if (non_blocking) { uint32_t amount; if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); if (user_marks_eor == 0) { amount = sndlen; } else { amount = 1; } if ((SCTP_SB_LIMIT_SND(so) < (amount + inqueue_bytes + stcb->asoc.sb_send_resv)) || (stcb->asoc.chunks_on_out_queue >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EWOULDBLOCK); if (sndlen > SCTP_SB_LIMIT_SND(so)) error = EMSGSIZE; else error = EWOULDBLOCK; goto out_unlocked; } stcb->asoc.sb_send_resv += sndlen; SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } else { atomic_add_int(&stcb->asoc.sb_send_resv, sndlen); } local_soresv = sndlen; if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET); error = ECONNRESET; goto out_unlocked; } if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); create_lock_applied = 0; } /* Is the stream no. valid? */ if (srcv->sinfo_stream >= asoc->streamoutcnt) { /* Invalid stream number */ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } if ((asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPEN) && (asoc->strmout[srcv->sinfo_stream].state != SCTP_STREAM_OPENING)) { /* * Can't queue any data while stream reset is underway. */ if (asoc->strmout[srcv->sinfo_stream].state > SCTP_STREAM_OPEN) { error = EAGAIN; } else { error = EINVAL; } SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, error); goto out_unlocked; } if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) { queue_only = 1; } /* we are now done with all control */ if (control) { sctp_m_freem(control); control = NULL; } if ((SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_SENT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_RECEIVED) || (SCTP_GET_STATE(stcb) == SCTP_STATE_SHUTDOWN_ACK_SENT) || (asoc->state & SCTP_STATE_SHUTDOWN_PENDING)) { if (srcv->sinfo_flags & SCTP_ABORT) { ; } else { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET); error = ECONNRESET; goto out_unlocked; } } /* Ok, we will attempt a msgsnd :> */ if (p) { p->td_ru.ru_msgsnd++; } /* Are we aborting? */ if (srcv->sinfo_flags & SCTP_ABORT) { struct mbuf *mm; int tot_demand, tot_out = 0, max_out; SCTP_STAT_INCR(sctps_sends_with_abort); if ((SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_WAIT) || (SCTP_GET_STATE(stcb) == SCTP_STATE_COOKIE_ECHOED)) { /* It has to be up before we abort */ /* how big is the user initiated abort? */ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out; } if (hold_tcblock) { SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } if (top) { struct mbuf *cntm = NULL; mm = sctp_get_mbuf_for_msg(sizeof(struct sctp_paramhdr), 0, M_WAITOK, 1, MT_DATA); if (sndlen != 0) { for (cntm = top; cntm; cntm = SCTP_BUF_NEXT(cntm)) { tot_out += SCTP_BUF_LEN(cntm); } } } else { /* Must fit in a MTU */ tot_out = sndlen; tot_demand = (tot_out + sizeof(struct sctp_paramhdr)); if (tot_demand > SCTP_DEFAULT_ADD_MORE) { /* To big */ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE); error = EMSGSIZE; goto out; } mm = sctp_get_mbuf_for_msg(tot_demand, 0, M_WAITOK, 1, MT_DATA); } if (mm == NULL) { SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, ENOMEM); error = ENOMEM; goto out; } max_out = asoc->smallest_mtu - sizeof(struct sctp_paramhdr); max_out -= sizeof(struct sctp_abort_msg); if (tot_out > max_out) { tot_out = max_out; } if (mm) { struct sctp_paramhdr *ph; /* now move forward the data pointer */ ph = mtod(mm, struct sctp_paramhdr *); ph->param_type = htons(SCTP_CAUSE_USER_INITIATED_ABT); ph->param_length = htons((uint16_t)(sizeof(struct sctp_paramhdr) + tot_out)); ph++; SCTP_BUF_LEN(mm) = tot_out + sizeof(struct sctp_paramhdr); if (top == NULL) { error = uiomove((caddr_t)ph, (int)tot_out, uio); if (error) { /*- * Here if we can't get his data we * still abort we just don't get to * send the users note :-0 */ sctp_m_freem(mm); mm = NULL; } } else { if (sndlen != 0) { SCTP_BUF_NEXT(mm) = top; } } } if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); } atomic_add_int(&stcb->asoc.refcnt, -1); free_cnt_applied = 0; /* release this lock, otherwise we hang on ourselves */ sctp_abort_an_association(stcb->sctp_ep, stcb, mm, SCTP_SO_LOCKED); /* now relock the stcb so everything is sane */ hold_tcblock = 0; stcb = NULL; /* * In this case top is already chained to mm avoid double * free, since we free it below if top != NULL and driver * would free it after sending the packet out */ if (sndlen != 0) { top = NULL; } goto out_unlocked; } /* Calculate the maximum we can send */ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) { if (non_blocking) { /* we already checked for non-blocking above. */ max_len = sndlen; } else { max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes; } } else { max_len = 0; } if (hold_tcblock) { SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } if (asoc->strmout == NULL) { /* huh? software error */ SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EFAULT); error = EFAULT; goto out_unlocked; } /* Unless E_EOR mode is on, we must make a send FIT in one call. */ if ((user_marks_eor == 0) && (sndlen > SCTP_SB_LIMIT_SND(stcb->sctp_socket))) { /* It will NEVER fit */ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EMSGSIZE); error = EMSGSIZE; goto out_unlocked; } if ((uio == NULL) && user_marks_eor) { /*- * We do not support eeor mode for * sending with mbuf chains (like sendfile). */ SCTP_LTRACE_ERR_RET(NULL, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out_unlocked; } if (user_marks_eor) { local_add_more = min(SCTP_SB_LIMIT_SND(so), SCTP_BASE_SYSCTL(sctp_add_more_threshold)); } else { /*- * For non-eeor the whole message must fit in * the socket send buffer. */ local_add_more = sndlen; } len = 0; if (non_blocking) { goto skip_preblock; } if (((max_len <= local_add_more) && (SCTP_SB_LIMIT_SND(so) >= local_add_more)) || (max_len == 0) || ((stcb->asoc.chunks_on_out_queue + stcb->asoc.stream_queue_cnt) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) { /* No room right now ! */ SOCKBUF_LOCK(&so->so_snd); inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); while ((SCTP_SB_LIMIT_SND(so) < (inqueue_bytes + local_add_more)) || ((stcb->asoc.stream_queue_cnt + stcb->asoc.chunks_on_out_queue) >= SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue))) { SCTPDBG(SCTP_DEBUG_OUTPUT1, "pre_block limit:%u <(inq:%d + %d) || (%d+%d > %d)\n", (unsigned int)SCTP_SB_LIMIT_SND(so), inqueue_bytes, local_add_more, stcb->asoc.stream_queue_cnt, stcb->asoc.chunks_on_out_queue, SCTP_BASE_SYSCTL(sctp_max_chunks_on_queue)); if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_INTO_BLKA, asoc, sndlen); } be.error = 0; stcb->block_entry = &be; error = sbwait(&so->so_snd); stcb->block_entry = NULL; if (error || so->so_error || be.error) { if (error == 0) { if (so->so_error) error = so->so_error; if (be.error) { error = be.error; } } SOCKBUF_UNLOCK(&so->so_snd); goto out_unlocked; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK, asoc, stcb->asoc.total_output_queue_size); } if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SOCKBUF_UNLOCK(&so->so_snd); goto out_unlocked; } inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); } if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) { max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes; } else { max_len = 0; } SOCKBUF_UNLOCK(&so->so_snd); } skip_preblock: if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { goto out_unlocked; } /* * sndlen covers for mbuf case uio_resid covers for the non-mbuf * case NOTE: uio will be null when top/mbuf is passed */ if (sndlen == 0) { if (srcv->sinfo_flags & SCTP_EOF) { got_all_of_the_send = 1; goto dataless_eof; } else { SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out; } } if (top == NULL) { struct sctp_stream_queue_pending *sp; struct sctp_stream_out *strm; uint32_t sndout; SCTP_TCB_SEND_LOCK(stcb); if ((asoc->stream_locked) && (asoc->stream_locked_on != srcv->sinfo_stream)) { SCTP_TCB_SEND_UNLOCK(stcb); SCTP_LTRACE_ERR_RET(inp, stcb, net, SCTP_FROM_SCTP_OUTPUT, EINVAL); error = EINVAL; goto out; } SCTP_TCB_SEND_UNLOCK(stcb); strm = &stcb->asoc.strmout[srcv->sinfo_stream]; if (strm->last_msg_incomplete == 0) { do_a_copy_in: sp = sctp_copy_it_in(stcb, asoc, srcv, uio, net, max_len, user_marks_eor, &error); if (error) { goto out; } SCTP_TCB_SEND_LOCK(stcb); if (sp->msg_is_complete) { strm->last_msg_incomplete = 0; asoc->stream_locked = 0; } else { /* * Just got locked to this guy in case of an * interrupt. */ strm->last_msg_incomplete = 1; if (stcb->asoc.idata_supported == 0) { asoc->stream_locked = 1; asoc->stream_locked_on = srcv->sinfo_stream; } sp->sender_all_done = 0; } sctp_snd_sb_alloc(stcb, sp->length); atomic_add_int(&asoc->stream_queue_cnt, 1); if (srcv->sinfo_flags & SCTP_UNORDERED) { SCTP_STAT_INCR(sctps_sends_with_unord); } TAILQ_INSERT_TAIL(&strm->outqueue, sp, next); stcb->asoc.ss_functions.sctp_ss_add_to_stream(stcb, asoc, strm, sp, 1); SCTP_TCB_SEND_UNLOCK(stcb); } else { SCTP_TCB_SEND_LOCK(stcb); sp = TAILQ_LAST(&strm->outqueue, sctp_streamhead); SCTP_TCB_SEND_UNLOCK(stcb); if (sp == NULL) { /* ???? Huh ??? last msg is gone */ #ifdef INVARIANTS panic("Warning: Last msg marked incomplete, yet nothing left?"); #else SCTP_PRINTF("Warning: Last msg marked incomplete, yet nothing left?\n"); strm->last_msg_incomplete = 0; #endif goto do_a_copy_in; } } while (uio->uio_resid > 0) { /* How much room do we have? */ struct mbuf *new_tail, *mm; inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes; else max_len = 0; if ((max_len > SCTP_BASE_SYSCTL(sctp_add_more_threshold)) || (max_len && (SCTP_SB_LIMIT_SND(so) < SCTP_BASE_SYSCTL(sctp_add_more_threshold))) || (uio->uio_resid && (uio->uio_resid <= (int)max_len))) { sndout = 0; new_tail = NULL; if (hold_tcblock) { SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } mm = sctp_copy_resume(uio, max_len, user_marks_eor, &error, &sndout, &new_tail); if ((mm == NULL) || error) { if (mm) { sctp_m_freem(mm); } goto out; } /* Update the mbuf and count */ SCTP_TCB_SEND_LOCK(stcb); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { /* * we need to get out. Peer probably * aborted. */ sctp_m_freem(mm); if (stcb->asoc.state & SCTP_PCB_FLAGS_WAS_ABORTED) { SCTP_LTRACE_ERR_RET(NULL, stcb, NULL, SCTP_FROM_SCTP_OUTPUT, ECONNRESET); error = ECONNRESET; } SCTP_TCB_SEND_UNLOCK(stcb); goto out; } if (sp->tail_mbuf) { /* tack it to the end */ SCTP_BUF_NEXT(sp->tail_mbuf) = mm; sp->tail_mbuf = new_tail; } else { /* A stolen mbuf */ sp->data = mm; sp->tail_mbuf = new_tail; } sctp_snd_sb_alloc(stcb, sndout); atomic_add_int(&sp->length, sndout); len += sndout; if (srcv->sinfo_flags & SCTP_SACK_IMMEDIATELY) { sp->sinfo_flags |= SCTP_SACK_IMMEDIATELY; } /* Did we reach EOR? */ if ((uio->uio_resid == 0) && ((user_marks_eor == 0) || (srcv->sinfo_flags & SCTP_EOF) || (user_marks_eor && (srcv->sinfo_flags & SCTP_EOR)))) { sp->msg_is_complete = 1; } else { sp->msg_is_complete = 0; } SCTP_TCB_SEND_UNLOCK(stcb); } if (uio->uio_resid == 0) { /* got it all? */ continue; } /* PR-SCTP? */ if ((asoc->prsctp_supported) && (asoc->sent_queue_cnt_removeable > 0)) { /* * This is ugly but we must assure locking * order */ if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } sctp_prune_prsctp(stcb, asoc, srcv, sndlen); inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); if (SCTP_SB_LIMIT_SND(so) > inqueue_bytes) max_len = SCTP_SB_LIMIT_SND(so) - inqueue_bytes; else max_len = 0; if (max_len > 0) { continue; } SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } /* wait for space now */ if (non_blocking) { /* Non-blocking io in place out */ goto skip_out_eof; } /* What about the INIT, send it maybe */ if (queue_only_for_init) { if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) { /* a collision took us forward? */ queue_only = 0; } else { sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); queue_only = 1; } } if ((net->flight_size > net->cwnd) && (asoc->sctp_cmt_on_off == 0)) { SCTP_STAT_INCR(sctps_send_cwnd_avoid); queue_only = 1; } else if (asoc->ifp_had_enobuf) { SCTP_STAT_INCR(sctps_ifnomemqueued); if (net->flight_size > (2 * net->mtu)) { queue_only = 1; } asoc->ifp_had_enobuf = 0; } un_sent = stcb->asoc.total_output_queue_size - stcb->asoc.total_flight; if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) && (stcb->asoc.total_flight > 0) && (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) && (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) { /*- * Ok, Nagle is set on and we have data outstanding. * Don't send anything and let SACKs drive out the * data unless we have a "full" segment to send. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) { sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED); } SCTP_STAT_INCR(sctps_naglequeued); nagle_applies = 1; } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) { if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED); } SCTP_STAT_INCR(sctps_naglesent); nagle_applies = 0; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only, nagle_applies, un_sent); sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size, stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count); } if (queue_only_for_init) queue_only_for_init = 0; if ((queue_only == 0) && (nagle_applies == 0)) { /*- * need to start chunk output * before blocking.. note that if * a lock is already applied, then * the input via the net is happening * and I don't need to start output :-D */ if (hold_tcblock == 0) { if (SCTP_TCB_TRYLOCK(stcb)) { hold_tcblock = 1; sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED); } } else { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED); } if (hold_tcblock == 1) { SCTP_TCB_UNLOCK(stcb); hold_tcblock = 0; } } SOCKBUF_LOCK(&so->so_snd); /*- * This is a bit strange, but I think it will * work. The total_output_queue_size is locked and * protected by the TCB_LOCK, which we just released. * There is a race that can occur between releasing it * above, and me getting the socket lock, where sacks * come in but we have not put the SB_WAIT on the * so_snd buffer to get the wakeup. After the LOCK * is applied the sack_processing will also need to * LOCK the so->so_snd to do the actual sowwakeup(). So * once we have the socket buffer lock if we recheck the * size we KNOW we will get to sleep safely with the * wakeup flag in place. */ inqueue_bytes = stcb->asoc.total_output_queue_size - (stcb->asoc.chunks_on_out_queue * SCTP_DATA_CHUNK_OVERHEAD(stcb)); if (SCTP_SB_LIMIT_SND(so) <= (inqueue_bytes + min(SCTP_BASE_SYSCTL(sctp_add_more_threshold), SCTP_SB_LIMIT_SND(so)))) { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_INTO_BLK, asoc, (size_t)uio->uio_resid); } be.error = 0; stcb->block_entry = &be; error = sbwait(&so->so_snd); stcb->block_entry = NULL; if (error || so->so_error || be.error) { if (error == 0) { if (so->so_error) error = so->so_error; if (be.error) { error = be.error; } } SOCKBUF_UNLOCK(&so->so_snd); goto out_unlocked; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_log_block(SCTP_BLOCK_LOG_OUTOF_BLK, asoc, stcb->asoc.total_output_queue_size); } } SOCKBUF_UNLOCK(&so->so_snd); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { goto out_unlocked; } } SCTP_TCB_SEND_LOCK(stcb); if (stcb->asoc.state & SCTP_STATE_ABOUT_TO_BE_FREED) { SCTP_TCB_SEND_UNLOCK(stcb); goto out_unlocked; } if (sp) { if (sp->msg_is_complete == 0) { strm->last_msg_incomplete = 1; if (stcb->asoc.idata_supported == 0) { asoc->stream_locked = 1; asoc->stream_locked_on = srcv->sinfo_stream; } } else { sp->sender_all_done = 1; strm->last_msg_incomplete = 0; asoc->stream_locked = 0; } } else { SCTP_PRINTF("Huh no sp TSNH?\n"); strm->last_msg_incomplete = 0; asoc->stream_locked = 0; } SCTP_TCB_SEND_UNLOCK(stcb); if (uio->uio_resid == 0) { got_all_of_the_send = 1; } } else { /* We send in a 0, since we do NOT have any locks */ error = sctp_msg_append(stcb, net, top, srcv, 0); top = NULL; if (srcv->sinfo_flags & SCTP_EOF) { /* * This should only happen for Panda for the mbuf * send case, which does NOT yet support EEOR mode. * Thus, we can just set this flag to do the proper * EOF handling. */ got_all_of_the_send = 1; } } if (error) { goto out; } dataless_eof: /* EOF thing ? */ if ((srcv->sinfo_flags & SCTP_EOF) && (got_all_of_the_send == 1)) { SCTP_STAT_INCR(sctps_sends_with_eof); error = 0; if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && sctp_is_there_unsent_data(stcb, SCTP_SO_LOCKED) == 0) { if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { goto abort_anyway; } /* there is nothing queued to send, so I'm done... */ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { struct sctp_nets *netp; /* only send SHUTDOWN the first time through */ if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) { SCTP_STAT_DECR_GAUGE32(sctps_currestab); } SCTP_SET_STATE(stcb, SCTP_STATE_SHUTDOWN_SENT); sctp_stop_timers_for_shutdown(stcb); if (stcb->asoc.alternate) { netp = stcb->asoc.alternate; } else { netp = stcb->asoc.primary_destination; } sctp_send_shutdown(stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWN, stcb->sctp_ep, stcb, netp); sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); } } else { /*- * we still got (or just got) data to send, so set * SHUTDOWN_PENDING */ /*- * XXX sockets draft says that SCTP_EOF should be * sent with no data. currently, we will allow user * data to be sent first and move to * SHUTDOWN-PENDING */ if ((SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_SENT) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_RECEIVED) && (SCTP_GET_STATE(stcb) != SCTP_STATE_SHUTDOWN_ACK_SENT)) { if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } if ((*asoc->ss_functions.sctp_ss_is_user_msgs_incomplete) (stcb, asoc)) { SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_PARTIAL_MSG_LEFT); } SCTP_ADD_SUBSTATE(stcb, SCTP_STATE_SHUTDOWN_PENDING); if (TAILQ_EMPTY(&asoc->send_queue) && TAILQ_EMPTY(&asoc->sent_queue) && (asoc->state & SCTP_STATE_PARTIAL_MSG_LEFT)) { struct mbuf *op_err; char msg[SCTP_DIAG_INFO_LEN]; abort_anyway: if (free_cnt_applied) { atomic_add_int(&stcb->asoc.refcnt, -1); free_cnt_applied = 0; } snprintf(msg, sizeof(msg), "%s:%d at %s", __FILE__, __LINE__, __func__); op_err = sctp_generate_cause(SCTP_BASE_SYSCTL(sctp_diag_info_code), msg); sctp_abort_an_association(stcb->sctp_ep, stcb, op_err, SCTP_SO_LOCKED); /* * now relock the stcb so everything * is sane */ hold_tcblock = 0; stcb = NULL; goto out; } sctp_timer_start(SCTP_TIMER_TYPE_SHUTDOWNGUARD, stcb->sctp_ep, stcb, asoc->primary_destination); sctp_feature_off(inp, SCTP_PCB_FLAGS_NODELAY); } } } skip_out_eof: if (!TAILQ_EMPTY(&stcb->asoc.control_send_queue)) { some_on_control = 1; } if (queue_only_for_init) { if (hold_tcblock == 0) { SCTP_TCB_LOCK(stcb); hold_tcblock = 1; } if (SCTP_GET_STATE(stcb) == SCTP_STATE_OPEN) { /* a collision took us forward? */ queue_only = 0; } else { sctp_send_initiate(inp, stcb, SCTP_SO_LOCKED); SCTP_SET_STATE(stcb, SCTP_STATE_COOKIE_WAIT); queue_only = 1; } } if ((net->flight_size > net->cwnd) && (stcb->asoc.sctp_cmt_on_off == 0)) { SCTP_STAT_INCR(sctps_send_cwnd_avoid); queue_only = 1; } else if (asoc->ifp_had_enobuf) { SCTP_STAT_INCR(sctps_ifnomemqueued); if (net->flight_size > (2 * net->mtu)) { queue_only = 1; } asoc->ifp_had_enobuf = 0; } un_sent = stcb->asoc.total_output_queue_size - stcb->asoc.total_flight; if ((sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) && (stcb->asoc.total_flight > 0) && (stcb->asoc.stream_queue_cnt < SCTP_MAX_DATA_BUNDLING) && (un_sent < (int)(stcb->asoc.smallest_mtu - SCTP_MIN_OVERHEAD))) { /*- * Ok, Nagle is set on and we have data outstanding. * Don't send anything and let SACKs drive out the * data unless wen have a "full" segment to send. */ if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) { sctp_log_nagle_event(stcb, SCTP_NAGLE_APPLIED); } SCTP_STAT_INCR(sctps_naglequeued); nagle_applies = 1; } else { if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_NAGLE_LOGGING_ENABLE) { if (sctp_is_feature_off(inp, SCTP_PCB_FLAGS_NODELAY)) sctp_log_nagle_event(stcb, SCTP_NAGLE_SKIPPED); } SCTP_STAT_INCR(sctps_naglesent); nagle_applies = 0; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_BLK_LOGGING_ENABLE) { sctp_misc_ints(SCTP_CWNDLOG_PRESEND, queue_only_for_init, queue_only, nagle_applies, un_sent); sctp_misc_ints(SCTP_CWNDLOG_PRESEND, stcb->asoc.total_output_queue_size, stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue, stcb->asoc.total_flight_count); } if ((queue_only == 0) && (nagle_applies == 0) && (stcb->asoc.peers_rwnd && un_sent)) { /* we can attempt to send too. */ if (hold_tcblock == 0) { /* * If there is activity recv'ing sacks no need to * send */ if (SCTP_TCB_TRYLOCK(stcb)) { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED); hold_tcblock = 1; } } else { sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED); } } else if ((queue_only == 0) && (stcb->asoc.peers_rwnd == 0) && (stcb->asoc.total_flight == 0)) { /* We get to have a probe outstanding */ if (hold_tcblock == 0) { hold_tcblock = 1; SCTP_TCB_LOCK(stcb); } sctp_chunk_output(inp, stcb, SCTP_OUTPUT_FROM_USR_SEND, SCTP_SO_LOCKED); } else if (some_on_control) { int num_out, reason, frag_point; /* Here we do control only */ if (hold_tcblock == 0) { hold_tcblock = 1; SCTP_TCB_LOCK(stcb); } frag_point = sctp_get_frag_point(stcb, &stcb->asoc); (void)sctp_med_chunk_output(inp, stcb, &stcb->asoc, &num_out, &reason, 1, 1, &now, &now_filled, frag_point, SCTP_SO_LOCKED); } SCTPDBG(SCTP_DEBUG_OUTPUT1, "USR Send complete qo:%d prw:%d unsent:%d tf:%d cooq:%d toqs:%d err:%d\n", queue_only, stcb->asoc.peers_rwnd, un_sent, stcb->asoc.total_flight, stcb->asoc.chunks_on_out_queue, stcb->asoc.total_output_queue_size, error); out: out_unlocked: if (local_soresv && stcb) { atomic_subtract_int(&stcb->asoc.sb_send_resv, sndlen); } if (create_lock_applied) { SCTP_ASOC_CREATE_UNLOCK(inp); } if ((stcb) && hold_tcblock) { SCTP_TCB_UNLOCK(stcb); } if (stcb && free_cnt_applied) { atomic_add_int(&stcb->asoc.refcnt, -1); } #ifdef INVARIANTS if (stcb) { if (mtx_owned(&stcb->tcb_mtx)) { panic("Leaving with tcb mtx owned?"); } if (mtx_owned(&stcb->tcb_send_mtx)) { panic("Leaving with tcb send mtx owned?"); } } #endif if (top) { sctp_m_freem(top); } if (control) { sctp_m_freem(control); } return (error); } /* * generate an AUTHentication chunk, if required */ struct mbuf * sctp_add_auth_chunk(struct mbuf *m, struct mbuf **m_end, struct sctp_auth_chunk **auth_ret, uint32_t *offset, struct sctp_tcb *stcb, uint8_t chunk) { struct mbuf *m_auth; struct sctp_auth_chunk *auth; int chunk_len; struct mbuf *cn; if ((m_end == NULL) || (auth_ret == NULL) || (offset == NULL) || (stcb == NULL)) return (m); if (stcb->asoc.auth_supported == 0) { return (m); } /* does the requested chunk require auth? */ if (!sctp_auth_is_required_chunk(chunk, stcb->asoc.peer_auth_chunks)) { return (m); } m_auth = sctp_get_mbuf_for_msg(sizeof(*auth), 0, M_NOWAIT, 1, MT_HEADER); if (m_auth == NULL) { /* no mbuf's */ return (m); } /* reserve some space if this will be the first mbuf */ if (m == NULL) SCTP_BUF_RESV_UF(m_auth, SCTP_MIN_OVERHEAD); /* fill in the AUTH chunk details */ auth = mtod(m_auth, struct sctp_auth_chunk *); memset(auth, 0, sizeof(*auth)); auth->ch.chunk_type = SCTP_AUTHENTICATION; auth->ch.chunk_flags = 0; chunk_len = sizeof(*auth) + sctp_get_hmac_digest_len(stcb->asoc.peer_hmac_id); auth->ch.chunk_length = htons(chunk_len); auth->hmac_id = htons(stcb->asoc.peer_hmac_id); /* key id and hmac digest will be computed and filled in upon send */ /* save the offset where the auth was inserted into the chain */ *offset = 0; for (cn = m; cn; cn = SCTP_BUF_NEXT(cn)) { *offset += SCTP_BUF_LEN(cn); } /* update length and return pointer to the auth chunk */ SCTP_BUF_LEN(m_auth) = chunk_len; m = sctp_copy_mbufchain(m_auth, m, m_end, 1, chunk_len, 0); if (auth_ret != NULL) *auth_ret = auth; return (m); } #ifdef INET6 int sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro) { struct nd_prefix *pfx = NULL; struct nd_pfxrouter *pfxrtr = NULL; struct sockaddr_in6 gw6; if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6) return (0); /* get prefix entry of address */ ND6_RLOCK(); LIST_FOREACH(pfx, &MODULE_GLOBAL(nd_prefix), ndpr_entry) { if (pfx->ndpr_stateflags & NDPRF_DETACHED) continue; if (IN6_ARE_MASKED_ADDR_EQUAL(&pfx->ndpr_prefix.sin6_addr, &src6->sin6_addr, &pfx->ndpr_mask)) break; } /* no prefix entry in the prefix list */ if (pfx == NULL) { ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "No prefix entry for "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6); return (0); } SCTPDBG(SCTP_DEBUG_OUTPUT2, "v6src_match_nexthop(), Prefix entry is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)src6); /* search installed gateway from prefix entry */ LIST_FOREACH(pfxrtr, &pfx->ndpr_advrtrs, pfr_entry) { memset(&gw6, 0, sizeof(struct sockaddr_in6)); gw6.sin6_family = AF_INET6; gw6.sin6_len = sizeof(struct sockaddr_in6); memcpy(&gw6.sin6_addr, &pfxrtr->router->rtaddr, sizeof(struct in6_addr)); SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6); SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway); if (sctp_cmpaddr((struct sockaddr *)&gw6, ro->ro_rt->rt_gateway)) { ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n"); return (1); } } ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is not installed\n"); return (0); } #endif int sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro) { #ifdef INET struct sockaddr_in *sin, *mask; struct ifaddr *ifa; struct in_addr srcnetaddr, gwnetaddr; if (ro == NULL || ro->ro_rt == NULL || sifa->address.sa.sa_family != AF_INET) { return (0); } ifa = (struct ifaddr *)sifa->ifa; mask = (struct sockaddr_in *)(ifa->ifa_netmask); sin = &sifa->address.sin; srcnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr); SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: src address is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa); SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr); sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway; gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr); SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway); SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr); if (srcnetaddr.s_addr == gwnetaddr.s_addr) { return (1); } #endif return (0); } Index: projects/clang700-import/sys/sys/syscall.h =================================================================== --- projects/clang700-import/sys/sys/syscall.h (revision 339014) +++ projects/clang700-import/sys/sys/syscall.h (revision 339015) @@ -1,482 +1,502 @@ /* * System call numbers. * * DO NOT EDIT-- this file is automatically generated. * $FreeBSD$ */ #define SYS_syscall 0 #define SYS_exit 1 #define SYS_fork 2 #define SYS_read 3 #define SYS_write 4 #define SYS_open 5 #define SYS_close 6 #define SYS_wait4 7 /* 8 is old creat */ #define SYS_link 9 #define SYS_unlink 10 /* 11 is obsolete execv */ #define SYS_chdir 12 #define SYS_fchdir 13 #define SYS_freebsd11_mknod 14 #define SYS_chmod 15 #define SYS_chown 16 #define SYS_break 17 /* 18 is freebsd4 getfsstat */ /* 19 is old lseek */ #define SYS_getpid 20 #define SYS_mount 21 #define SYS_unmount 22 #define SYS_setuid 23 #define SYS_getuid 24 #define SYS_geteuid 25 #define SYS_ptrace 26 #define SYS_recvmsg 27 #define SYS_sendmsg 28 #define SYS_recvfrom 29 #define SYS_accept 30 #define SYS_getpeername 31 #define SYS_getsockname 32 #define SYS_access 33 #define SYS_chflags 34 #define SYS_fchflags 35 #define SYS_sync 36 #define SYS_kill 37 /* 38 is old stat */ #define SYS_getppid 39 /* 40 is old lstat */ #define SYS_dup 41 #define SYS_freebsd10_pipe 42 #define SYS_getegid 43 #define SYS_profil 44 #define SYS_ktrace 45 /* 46 is old sigaction */ #define SYS_getgid 47 /* 48 is old sigprocmask */ #define SYS_getlogin 49 #define SYS_setlogin 50 #define SYS_acct 51 /* 52 is old sigpending */ #define SYS_sigaltstack 53 #define SYS_ioctl 54 #define SYS_reboot 55 #define SYS_revoke 56 #define SYS_symlink 57 #define SYS_readlink 58 #define SYS_execve 59 #define SYS_umask 60 #define SYS_chroot 61 /* 62 is old fstat */ /* 63 is old getkerninfo */ /* 64 is old getpagesize */ #define SYS_msync 65 #define SYS_vfork 66 /* 67 is obsolete vread */ /* 68 is obsolete vwrite */ #define SYS_sbrk 69 #define SYS_sstk 70 /* 71 is old mmap */ #define SYS_freebsd11_vadvise 72 #define SYS_munmap 73 #define SYS_mprotect 74 #define SYS_madvise 75 /* 76 is obsolete vhangup */ /* 77 is obsolete vlimit */ #define SYS_mincore 78 #define SYS_getgroups 79 #define SYS_setgroups 80 #define SYS_getpgrp 81 #define SYS_setpgid 82 #define SYS_setitimer 83 /* 84 is old wait */ #define SYS_swapon 85 #define SYS_getitimer 86 /* 87 is old gethostname */ /* 88 is old sethostname */ #define SYS_getdtablesize 89 #define SYS_dup2 90 #define SYS_fcntl 92 #define SYS_select 93 #define SYS_fsync 95 #define SYS_setpriority 96 #define SYS_socket 97 #define SYS_connect 98 /* 99 is old accept */ #define SYS_getpriority 100 /* 101 is old send */ /* 102 is old recv */ /* 103 is old sigreturn */ #define SYS_bind 104 #define SYS_setsockopt 105 #define SYS_listen 106 /* 107 is obsolete vtimes */ /* 108 is old sigvec */ /* 109 is old sigblock */ /* 110 is old sigsetmask */ /* 111 is old sigsuspend */ /* 112 is old sigstack */ /* 113 is old recvmsg */ /* 114 is old sendmsg */ /* 115 is obsolete vtrace */ #define SYS_gettimeofday 116 #define SYS_getrusage 117 #define SYS_getsockopt 118 #define SYS_readv 120 #define SYS_writev 121 #define SYS_settimeofday 122 #define SYS_fchown 123 #define SYS_fchmod 124 /* 125 is old recvfrom */ #define SYS_setreuid 126 #define SYS_setregid 127 #define SYS_rename 128 /* 129 is old truncate */ /* 130 is old ftruncate */ #define SYS_flock 131 #define SYS_mkfifo 132 #define SYS_sendto 133 #define SYS_shutdown 134 #define SYS_socketpair 135 #define SYS_mkdir 136 #define SYS_rmdir 137 #define SYS_utimes 138 /* 139 is obsolete 4.2 sigreturn */ #define SYS_adjtime 140 /* 141 is old getpeername */ /* 142 is old gethostid */ /* 143 is old sethostid */ /* 144 is old getrlimit */ /* 145 is old setrlimit */ /* 146 is old killpg */ #define SYS_setsid 147 #define SYS_quotactl 148 /* 149 is old quota */ /* 150 is old getsockname */ #define SYS_nlm_syscall 154 #define SYS_nfssvc 155 /* 156 is old getdirentries */ /* 157 is freebsd4 statfs */ /* 158 is freebsd4 fstatfs */ #define SYS_lgetfh 160 #define SYS_getfh 161 /* 162 is freebsd4 getdomainname */ /* 163 is freebsd4 setdomainname */ /* 164 is freebsd4 uname */ #define SYS_sysarch 165 #define SYS_rtprio 166 #define SYS_semsys 169 #define SYS_msgsys 170 #define SYS_shmsys 171 /* 173 is freebsd6 pread */ /* 174 is freebsd6 pwrite */ #define SYS_setfib 175 #define SYS_ntp_adjtime 176 #define SYS_setgid 181 #define SYS_setegid 182 #define SYS_seteuid 183 + /* 184 is obsolete lfs_bmapv */ + /* 185 is obsolete lfs_markv */ + /* 186 is obsolete lfs_segclean */ + /* 187 is obsolete lfs_segwait */ #define SYS_freebsd11_stat 188 #define SYS_freebsd11_fstat 189 #define SYS_freebsd11_lstat 190 #define SYS_pathconf 191 #define SYS_fpathconf 192 #define SYS_getrlimit 194 #define SYS_setrlimit 195 #define SYS_freebsd11_getdirentries 196 /* 197 is freebsd6 mmap */ #define SYS___syscall 198 /* 199 is freebsd6 lseek */ /* 200 is freebsd6 truncate */ /* 201 is freebsd6 ftruncate */ #define SYS___sysctl 202 #define SYS_mlock 203 #define SYS_munlock 204 #define SYS_undelete 205 #define SYS_futimes 206 #define SYS_getpgid 207 #define SYS_poll 209 #define SYS_freebsd7___semctl 220 #define SYS_semget 221 #define SYS_semop 222 + /* 223 is obsolete semconfig */ #define SYS_freebsd7_msgctl 224 #define SYS_msgget 225 #define SYS_msgsnd 226 #define SYS_msgrcv 227 #define SYS_shmat 228 #define SYS_freebsd7_shmctl 229 #define SYS_shmdt 230 #define SYS_shmget 231 #define SYS_clock_gettime 232 #define SYS_clock_settime 233 #define SYS_clock_getres 234 #define SYS_ktimer_create 235 #define SYS_ktimer_delete 236 #define SYS_ktimer_settime 237 #define SYS_ktimer_gettime 238 #define SYS_ktimer_getoverrun 239 #define SYS_nanosleep 240 #define SYS_ffclock_getcounter 241 #define SYS_ffclock_setestimate 242 #define SYS_ffclock_getestimate 243 #define SYS_clock_nanosleep 244 #define SYS_clock_getcpuclockid2 247 #define SYS_ntp_gettime 248 #define SYS_minherit 250 #define SYS_rfork 251 /* 252 is obsolete openbsd_poll */ #define SYS_issetugid 253 #define SYS_lchown 254 #define SYS_aio_read 255 #define SYS_aio_write 256 #define SYS_lio_listio 257 #define SYS_freebsd11_getdents 272 #define SYS_lchmod 274 /* 275 is obsolete netbsd_lchown */ #define SYS_lutimes 276 /* 277 is obsolete netbsd_msync */ #define SYS_freebsd11_nstat 278 #define SYS_freebsd11_nfstat 279 #define SYS_freebsd11_nlstat 280 #define SYS_preadv 289 #define SYS_pwritev 290 /* 297 is freebsd4 fhstatfs */ #define SYS_fhopen 298 #define SYS_freebsd11_fhstat 299 #define SYS_modnext 300 #define SYS_modstat 301 #define SYS_modfnext 302 #define SYS_modfind 303 #define SYS_kldload 304 #define SYS_kldunload 305 #define SYS_kldfind 306 #define SYS_kldnext 307 #define SYS_kldstat 308 #define SYS_kldfirstmod 309 #define SYS_getsid 310 #define SYS_setresuid 311 #define SYS_setresgid 312 /* 313 is obsolete signanosleep */ #define SYS_aio_return 314 #define SYS_aio_suspend 315 #define SYS_aio_cancel 316 #define SYS_aio_error 317 /* 318 is freebsd6 aio_read */ /* 319 is freebsd6 aio_write */ /* 320 is freebsd6 lio_listio */ #define SYS_yield 321 /* 322 is obsolete thr_sleep */ /* 323 is obsolete thr_wakeup */ #define SYS_mlockall 324 #define SYS_munlockall 325 #define SYS___getcwd 326 #define SYS_sched_setparam 327 #define SYS_sched_getparam 328 #define SYS_sched_setscheduler 329 #define SYS_sched_getscheduler 330 #define SYS_sched_yield 331 #define SYS_sched_get_priority_max 332 #define SYS_sched_get_priority_min 333 #define SYS_sched_rr_get_interval 334 #define SYS_utrace 335 /* 336 is freebsd4 sendfile */ #define SYS_kldsym 337 #define SYS_jail 338 #define SYS_nnpfs_syscall 339 #define SYS_sigprocmask 340 #define SYS_sigsuspend 341 /* 342 is freebsd4 sigaction */ #define SYS_sigpending 343 /* 344 is freebsd4 sigreturn */ #define SYS_sigtimedwait 345 #define SYS_sigwaitinfo 346 #define SYS___acl_get_file 347 #define SYS___acl_set_file 348 #define SYS___acl_get_fd 349 #define SYS___acl_set_fd 350 #define SYS___acl_delete_file 351 #define SYS___acl_delete_fd 352 #define SYS___acl_aclcheck_file 353 #define SYS___acl_aclcheck_fd 354 #define SYS_extattrctl 355 #define SYS_extattr_set_file 356 #define SYS_extattr_get_file 357 #define SYS_extattr_delete_file 358 #define SYS_aio_waitcomplete 359 #define SYS_getresuid 360 #define SYS_getresgid 361 #define SYS_kqueue 362 #define SYS_freebsd11_kevent 363 + /* 364 is obsolete __cap_get_proc */ + /* 365 is obsolete __cap_set_proc */ + /* 366 is obsolete __cap_get_fd */ + /* 367 is obsolete __cap_get_file */ + /* 368 is obsolete __cap_set_fd */ + /* 369 is obsolete __cap_set_file */ #define SYS_extattr_set_fd 371 #define SYS_extattr_get_fd 372 #define SYS_extattr_delete_fd 373 #define SYS___setugid 374 + /* 375 is obsolete nfsclnt */ #define SYS_eaccess 376 #define SYS_afs3_syscall 377 #define SYS_nmount 378 + /* 379 is obsolete kse_exit */ + /* 380 is obsolete kse_wakeup */ + /* 381 is obsolete kse_create */ + /* 382 is obsolete kse_thr_interrupt */ + /* 383 is obsolete kse_release */ #define SYS___mac_get_proc 384 #define SYS___mac_set_proc 385 #define SYS___mac_get_fd 386 #define SYS___mac_get_file 387 #define SYS___mac_set_fd 388 #define SYS___mac_set_file 389 #define SYS_kenv 390 #define SYS_lchflags 391 #define SYS_uuidgen 392 #define SYS_sendfile 393 #define SYS_mac_syscall 394 #define SYS_freebsd11_getfsstat 395 #define SYS_freebsd11_statfs 396 #define SYS_freebsd11_fstatfs 397 #define SYS_freebsd11_fhstatfs 398 #define SYS_ksem_close 400 #define SYS_ksem_post 401 #define SYS_ksem_wait 402 #define SYS_ksem_trywait 403 #define SYS_ksem_init 404 #define SYS_ksem_open 405 #define SYS_ksem_unlink 406 #define SYS_ksem_getvalue 407 #define SYS_ksem_destroy 408 #define SYS___mac_get_pid 409 #define SYS___mac_get_link 410 #define SYS___mac_set_link 411 #define SYS_extattr_set_link 412 #define SYS_extattr_get_link 413 #define SYS_extattr_delete_link 414 #define SYS___mac_execve 415 #define SYS_sigaction 416 #define SYS_sigreturn 417 #define SYS_getcontext 421 #define SYS_setcontext 422 #define SYS_swapcontext 423 #define SYS_swapoff 424 #define SYS___acl_get_link 425 #define SYS___acl_set_link 426 #define SYS___acl_delete_link 427 #define SYS___acl_aclcheck_link 428 #define SYS_sigwait 429 #define SYS_thr_create 430 #define SYS_thr_exit 431 #define SYS_thr_self 432 #define SYS_thr_kill 433 #define SYS_jail_attach 436 #define SYS_extattr_list_fd 437 #define SYS_extattr_list_file 438 #define SYS_extattr_list_link 439 + /* 440 is obsolete kse_switchin */ #define SYS_ksem_timedwait 441 #define SYS_thr_suspend 442 #define SYS_thr_wake 443 #define SYS_kldunloadf 444 #define SYS_audit 445 #define SYS_auditon 446 #define SYS_getauid 447 #define SYS_setauid 448 #define SYS_getaudit 449 #define SYS_setaudit 450 #define SYS_getaudit_addr 451 #define SYS_setaudit_addr 452 #define SYS_auditctl 453 #define SYS__umtx_op 454 #define SYS_thr_new 455 #define SYS_sigqueue 456 #define SYS_kmq_open 457 #define SYS_kmq_setattr 458 #define SYS_kmq_timedreceive 459 #define SYS_kmq_timedsend 460 #define SYS_kmq_notify 461 #define SYS_kmq_unlink 462 #define SYS_abort2 463 #define SYS_thr_set_name 464 #define SYS_aio_fsync 465 #define SYS_rtprio_thread 466 #define SYS_sctp_peeloff 471 #define SYS_sctp_generic_sendmsg 472 #define SYS_sctp_generic_sendmsg_iov 473 #define SYS_sctp_generic_recvmsg 474 #define SYS_pread 475 #define SYS_pwrite 476 #define SYS_mmap 477 #define SYS_lseek 478 #define SYS_truncate 479 #define SYS_ftruncate 480 #define SYS_thr_kill2 481 #define SYS_shm_open 482 #define SYS_shm_unlink 483 #define SYS_cpuset 484 #define SYS_cpuset_setid 485 #define SYS_cpuset_getid 486 #define SYS_cpuset_getaffinity 487 #define SYS_cpuset_setaffinity 488 #define SYS_faccessat 489 #define SYS_fchmodat 490 #define SYS_fchownat 491 #define SYS_fexecve 492 #define SYS_freebsd11_fstatat 493 #define SYS_futimesat 494 #define SYS_linkat 495 #define SYS_mkdirat 496 #define SYS_mkfifoat 497 #define SYS_freebsd11_mknodat 498 #define SYS_openat 499 #define SYS_readlinkat 500 #define SYS_renameat 501 #define SYS_symlinkat 502 #define SYS_unlinkat 503 #define SYS_posix_openpt 504 #define SYS_gssd_syscall 505 #define SYS_jail_get 506 #define SYS_jail_set 507 #define SYS_jail_remove 508 #define SYS_closefrom 509 #define SYS___semctl 510 #define SYS_msgctl 511 #define SYS_shmctl 512 #define SYS_lpathconf 513 /* 514 is obsolete cap_new */ #define SYS___cap_rights_get 515 #define SYS_cap_enter 516 #define SYS_cap_getmode 517 #define SYS_pdfork 518 #define SYS_pdkill 519 #define SYS_pdgetpid 520 #define SYS_pselect 522 #define SYS_getloginclass 523 #define SYS_setloginclass 524 #define SYS_rctl_get_racct 525 #define SYS_rctl_get_rules 526 #define SYS_rctl_get_limits 527 #define SYS_rctl_add_rule 528 #define SYS_rctl_remove_rule 529 #define SYS_posix_fallocate 530 #define SYS_posix_fadvise 531 #define SYS_wait6 532 #define SYS_cap_rights_limit 533 #define SYS_cap_ioctls_limit 534 #define SYS_cap_ioctls_get 535 #define SYS_cap_fcntls_limit 536 #define SYS_cap_fcntls_get 537 #define SYS_bindat 538 #define SYS_connectat 539 #define SYS_chflagsat 540 #define SYS_accept4 541 #define SYS_pipe2 542 #define SYS_aio_mlock 543 #define SYS_procctl 544 #define SYS_ppoll 545 #define SYS_futimens 546 #define SYS_utimensat 547 + /* 548 is obsolete numa_getaffinity */ + /* 549 is obsolete numa_setaffinity */ #define SYS_fdatasync 550 #define SYS_fstat 551 #define SYS_fstatat 552 #define SYS_fhstat 553 #define SYS_getdirentries 554 #define SYS_statfs 555 #define SYS_fstatfs 556 #define SYS_getfsstat 557 #define SYS_fhstatfs 558 #define SYS_mknodat 559 #define SYS_kevent 560 #define SYS_cpuset_getdomain 561 #define SYS_cpuset_setdomain 562 #define SYS_getrandom 563 #define SYS_MAXSYSCALL 564 Index: projects/clang700-import/sys/vm/vm_fault.c =================================================================== --- projects/clang700-import/sys/vm/vm_fault.c (revision 339014) +++ projects/clang700-import/sys/vm/vm_fault.c (revision 339015) @@ -1,1803 +1,1812 @@ /*- * SPDX-License-Identifier: (BSD-4-Clause AND MIT-CMU) * * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * * * This code is derived from software contributed to Berkeley by * The Mach Operating System project at Carnegie-Mellon University. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vm_fault.c 8.4 (Berkeley) 1/12/94 * * * Copyright (c) 1987, 1990 Carnegie-Mellon University. * All rights reserved. * * Authors: Avadis Tevanian, Jr., Michael Wayne Young * * Permission to use, copy, modify and distribute this software and * its documentation is hereby granted, provided that both the copyright * notice and this permission notice appear in all copies of the * software, derivative works or modified versions, and any portions * thereof, and that both notices appear in supporting documentation. * * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. * * Carnegie Mellon requests users of this software to return to * * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU * School of Computer Science * Carnegie Mellon University * Pittsburgh PA 15213-3890 * * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. */ /* * Page fault handling module. */ #include __FBSDID("$FreeBSD$"); #include "opt_ktrace.h" #include "opt_vm.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KTRACE #include #endif #include #include #include #include #include #include #include #include #include #include #include #define PFBAK 4 #define PFFOR 4 #define VM_FAULT_READ_DEFAULT (1 + VM_FAULT_READ_AHEAD_INIT) #define VM_FAULT_READ_MAX (1 + VM_FAULT_READ_AHEAD_MAX) #define VM_FAULT_DONTNEED_MIN 1048576 struct faultstate { vm_page_t m; vm_object_t object; vm_pindex_t pindex; vm_page_t first_m; vm_object_t first_object; vm_pindex_t first_pindex; vm_map_t map; vm_map_entry_t entry; int map_generation; bool lookup_still_valid; struct vnode *vp; }; static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead); static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked); static inline void release_page(struct faultstate *fs) { vm_page_xunbusy(fs->m); vm_page_lock(fs->m); vm_page_deactivate(fs->m); vm_page_unlock(fs->m); fs->m = NULL; } static inline void unlock_map(struct faultstate *fs) { if (fs->lookup_still_valid) { vm_map_lookup_done(fs->map, fs->entry); fs->lookup_still_valid = false; } } static void unlock_vp(struct faultstate *fs) { if (fs->vp != NULL) { vput(fs->vp); fs->vp = NULL; } } static void unlock_and_deallocate(struct faultstate *fs) { vm_object_pip_wakeup(fs->object); VM_OBJECT_WUNLOCK(fs->object); if (fs->object != fs->first_object) { VM_OBJECT_WLOCK(fs->first_object); vm_page_lock(fs->first_m); vm_page_free(fs->first_m); vm_page_unlock(fs->first_m); vm_object_pip_wakeup(fs->first_object); VM_OBJECT_WUNLOCK(fs->first_object); fs->first_m = NULL; } vm_object_deallocate(fs->first_object); unlock_map(fs); unlock_vp(fs); } static void vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot, vm_prot_t fault_type, int fault_flags, bool set_wd) { bool need_dirty; if (((prot & VM_PROT_WRITE) == 0 && (fault_flags & VM_FAULT_DIRTY) == 0) || (m->oflags & VPO_UNMANAGED) != 0) return; VM_OBJECT_ASSERT_LOCKED(m->object); need_dirty = ((fault_type & VM_PROT_WRITE) != 0 && (fault_flags & VM_FAULT_WIRE) == 0) || (fault_flags & VM_FAULT_DIRTY) != 0; if (set_wd) vm_object_set_writeable_dirty(m->object); else /* * If two callers of vm_fault_dirty() with set_wd == * FALSE, one for the map entry with MAP_ENTRY_NOSYNC * flag set, other with flag clear, race, it is * possible for the no-NOSYNC thread to see m->dirty * != 0 and not clear VPO_NOSYNC. Take vm_page lock * around manipulation of VPO_NOSYNC and * vm_page_dirty() call, to avoid the race and keep * m->oflags consistent. */ vm_page_lock(m); /* * If this is a NOSYNC mmap we do not want to set VPO_NOSYNC * if the page is already dirty to prevent data written with * the expectation of being synced from not being synced. * Likewise if this entry does not request NOSYNC then make * sure the page isn't marked NOSYNC. Applications sharing * data should use the same flags to avoid ping ponging. */ if ((entry->eflags & MAP_ENTRY_NOSYNC) != 0) { if (m->dirty == 0) { m->oflags |= VPO_NOSYNC; } } else { m->oflags &= ~VPO_NOSYNC; } /* * If the fault is a write, we know that this page is being * written NOW so dirty it explicitly to save on * pmap_is_modified() calls later. * * Also, since the page is now dirty, we can possibly tell * the pager to release any swap backing the page. Calling * the pager requires a write lock on the object. */ if (need_dirty) vm_page_dirty(m); if (!set_wd) vm_page_unlock(m); else if (need_dirty) vm_pager_page_unswapped(m); } static void vm_fault_fill_hold(vm_page_t *m_hold, vm_page_t m) { if (m_hold != NULL) { *m_hold = m; vm_page_lock(m); vm_page_hold(m); vm_page_unlock(m); } } /* * Unlocks fs.first_object and fs.map on success. */ static int vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { vm_page_t m, m_map; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 vm_page_t m_super; int flags; #endif int psind, rv; MPASS(fs->vp == NULL); m = vm_page_lookup(fs->first_object, fs->first_pindex); /* A busy page can be mapped for read|execute access. */ if (m == NULL || ((prot & VM_PROT_WRITE) != 0 && vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL) return (KERN_FAILURE); m_map = m; psind = 0; #if (defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__)) && VM_NRESERVLEVEL > 0 if ((m->flags & PG_FICTITIOUS) == 0 && (m_super = vm_reserv_to_superpage(m)) != NULL && rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start && roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end && (vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) & (pagesizes[m_super->psind] - 1)) && pmap_ps_enabled(fs->map->pmap)) { flags = PS_ALL_VALID; if ((prot & VM_PROT_WRITE) != 0) { /* * Create a superpage mapping allowing write access * only if none of the constituent pages are busy and * all of them are already dirty (except possibly for * the page that was faulted on). */ flags |= PS_NONE_BUSY; if ((fs->first_object->flags & OBJ_UNMANAGED) == 0) flags |= PS_ALL_DIRTY; } if (vm_page_ps_test(m_super, flags, m)) { m_map = m_super; psind = m_super->psind; vaddr = rounddown2(vaddr, pagesizes[psind]); /* Preset the modified bit for dirty superpages. */ if ((flags & PS_ALL_DIRTY) != 0) fault_type |= VM_PROT_WRITE; } } #endif rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type | PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind); if (rv != KERN_SUCCESS) return (rv); vm_fault_fill_hold(m_hold, m); vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false); if (psind == 0 && !wired) vm_fault_prefault(fs, vaddr, PFBAK, PFFOR, true); VM_OBJECT_RUNLOCK(fs->first_object); vm_map_lookup_done(fs->map, fs->entry); curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); } static void vm_fault_restore_map_lock(struct faultstate *fs) { VM_OBJECT_ASSERT_WLOCKED(fs->first_object); MPASS(fs->first_object->paging_in_progress > 0); if (!vm_map_trylock_read(fs->map)) { VM_OBJECT_WUNLOCK(fs->first_object); vm_map_lock_read(fs->map); VM_OBJECT_WLOCK(fs->first_object); } fs->lookup_still_valid = true; } static void vm_fault_populate_check_page(vm_page_t m) { /* * Check each page to ensure that the pager is obeying the * interface: the page must be installed in the object, fully * valid, and exclusively busied. */ MPASS(m != NULL); MPASS(m->valid == VM_PAGE_BITS_ALL); MPASS(vm_page_xbusied(m)); } static void vm_fault_populate_cleanup(vm_object_t object, vm_pindex_t first, vm_pindex_t last) { vm_page_t m; vm_pindex_t pidx; VM_OBJECT_ASSERT_WLOCKED(object); MPASS(first <= last); for (pidx = first, m = vm_page_lookup(object, pidx); pidx <= last; pidx++, m = vm_page_next(m)) { vm_fault_populate_check_page(m); vm_page_lock(m); vm_page_deactivate(m); vm_page_unlock(m); vm_page_xunbusy(m); } } static int vm_fault_populate(struct faultstate *fs, vm_prot_t prot, int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold) { struct mtx *m_mtx; vm_offset_t vaddr; vm_page_t m; vm_pindex_t map_first, map_last, pager_first, pager_last, pidx; int i, npages, psind, rv; MPASS(fs->object == fs->first_object); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); MPASS(fs->first_object->paging_in_progress > 0); MPASS(fs->first_object->backing_object == NULL); MPASS(fs->lookup_still_valid); pager_first = OFF_TO_IDX(fs->entry->offset); pager_last = pager_first + atop(fs->entry->end - fs->entry->start) - 1; unlock_map(fs); unlock_vp(fs); /* * Call the pager (driver) populate() method. * * There is no guarantee that the method will be called again * if the current fault is for read, and a future fault is * for write. Report the entry's maximum allowed protection * to the driver. */ rv = vm_pager_populate(fs->first_object, fs->first_pindex, fault_type, fs->entry->max_protection, &pager_first, &pager_last); VM_OBJECT_ASSERT_WLOCKED(fs->first_object); if (rv == VM_PAGER_BAD) { /* * VM_PAGER_BAD is the backdoor for a pager to request * normal fault handling. */ vm_fault_restore_map_lock(fs); if (fs->map->timestamp != fs->map_generation) return (KERN_RESOURCE_SHORTAGE); /* RetryFault */ return (KERN_NOT_RECEIVER); } if (rv != VM_PAGER_OK) return (KERN_FAILURE); /* AKA SIGSEGV */ /* Ensure that the driver is obeying the interface. */ MPASS(pager_first <= pager_last); MPASS(fs->first_pindex <= pager_last); MPASS(fs->first_pindex >= pager_first); MPASS(pager_last < fs->first_object->size); vm_fault_restore_map_lock(fs); if (fs->map->timestamp != fs->map_generation) { vm_fault_populate_cleanup(fs->first_object, pager_first, pager_last); return (KERN_RESOURCE_SHORTAGE); /* RetryFault */ } /* * The map is unchanged after our last unlock. Process the fault. * * The range [pager_first, pager_last] that is given to the * pager is only a hint. The pager may populate any range * within the object that includes the requested page index. * In case the pager expanded the range, clip it to fit into * the map entry. */ map_first = OFF_TO_IDX(fs->entry->offset); if (map_first > pager_first) { vm_fault_populate_cleanup(fs->first_object, pager_first, map_first - 1); pager_first = map_first; } map_last = map_first + atop(fs->entry->end - fs->entry->start) - 1; if (map_last < pager_last) { vm_fault_populate_cleanup(fs->first_object, map_last + 1, pager_last); pager_last = map_last; } for (pidx = pager_first, m = vm_page_lookup(fs->first_object, pidx); pidx <= pager_last; pidx += npages, m = vm_page_next(&m[npages - 1])) { vaddr = fs->entry->start + IDX_TO_OFF(pidx) - fs->entry->offset; #if defined(__aarch64__) || defined(__amd64__) || (defined(__arm__) && \ __ARM_ARCH >= 6) || defined(__i386__) psind = m->psind; if (psind > 0 && ((vaddr & (pagesizes[psind] - 1)) != 0 || pidx + OFF_TO_IDX(pagesizes[psind]) - 1 > pager_last || !pmap_ps_enabled(fs->map->pmap))) psind = 0; #else psind = 0; #endif npages = atop(pagesizes[psind]); for (i = 0; i < npages; i++) { vm_fault_populate_check_page(&m[i]); vm_fault_dirty(fs->entry, &m[i], prot, fault_type, fault_flags, true); } VM_OBJECT_WUNLOCK(fs->first_object); pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), psind); VM_OBJECT_WLOCK(fs->first_object); m_mtx = NULL; for (i = 0; i < npages; i++) { vm_page_change_lock(&m[i], &m_mtx); if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(&m[i]); else vm_page_activate(&m[i]); if (m_hold != NULL && m[i].pindex == fs->first_pindex) { *m_hold = &m[i]; vm_page_hold(&m[i]); } vm_page_xunbusy_maybelocked(&m[i]); } if (m_mtx != NULL) mtx_unlock(m_mtx); } curthread->td_ru.ru_majflt++; return (KERN_SUCCESS); } /* * vm_fault: * * Handle a page fault occurring at the given address, * requiring the given permissions, in the map specified. * If successful, the page is inserted into the * associated physical map. * * NOTE: the given address should be truncated to the * proper page address. * * KERN_SUCCESS is returned if the page fault is handled; otherwise, * a standard error specifying why the fault is fatal is returned. * * The map in question must be referenced, and remains so. * Caller may hold no locks. */ int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags) { struct thread *td; int result; td = curthread; if ((td->td_pflags & TDP_NOFAULTING) != 0) return (KERN_PROTECTION_FAILURE); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULT)) ktrfault(vaddr, fault_type); #endif result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags, NULL); #ifdef KTRACE if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND)) ktrfaultend(result); #endif return (result); } int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags, vm_page_t *m_hold) { struct faultstate fs; struct vnode *vp; struct domainset *dset; vm_object_t next_object, retry_object; vm_offset_t e_end, e_start; vm_pindex_t retry_pindex; vm_prot_t prot, retry_prot; int ahead, alloc_req, behind, cluster_offset, error, era, faultcount; int locked, nera, result, rv; u_char behavior; boolean_t wired; /* Passed by reference. */ bool dead, hardfault, is_first_object_locked; VM_CNT_INC(v_vm_faults); fs.vp = NULL; faultcount = 0; nera = -1; hardfault = false; RetryFault:; /* * Find the backing store object and offset into it to begin the * search. */ fs.map = map; result = vm_map_lookup(&fs.map, vaddr, fault_type | VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object, &fs.first_pindex, &prot, &wired); if (result != KERN_SUCCESS) { unlock_vp(&fs); return (result); } fs.map_generation = fs.map->timestamp; if (fs.entry->eflags & MAP_ENTRY_NOFAULT) { panic("%s: fault on nofault entry, addr: %#lx", __func__, (u_long)vaddr); } if (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION && fs.entry->wiring_thread != curthread) { vm_map_unlock_read(fs.map); vm_map_lock(fs.map); if (vm_map_lookup_entry(fs.map, vaddr, &fs.entry) && (fs.entry->eflags & MAP_ENTRY_IN_TRANSITION)) { unlock_vp(&fs); fs.entry->eflags |= MAP_ENTRY_NEEDS_WAKEUP; vm_map_unlock_and_wait(fs.map, 0); } else vm_map_unlock(fs.map); goto RetryFault; } MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0); if (wired) fault_type = prot | (fault_type & VM_PROT_COPY); else KASSERT((fault_flags & VM_FAULT_WIRE) == 0, ("!wired && VM_FAULT_WIRE")); /* * Try to avoid lock contention on the top-level object through * special-case handling of some types of page faults, specifically, * those that are both (1) mapping an existing page from the top- * level object and (2) not having to mark that object as containing * dirty pages. Under these conditions, a read lock on the top-level * object suffices, allowing multiple page faults of a similar type to * run in parallel on the same top-level object. */ if (fs.vp == NULL /* avoid locked vnode leak */ && (fault_flags & (VM_FAULT_WIRE | VM_FAULT_DIRTY)) == 0 && /* avoid calling vm_object_set_writeable_dirty() */ ((prot & VM_PROT_WRITE) == 0 || (fs.first_object->type != OBJT_VNODE && (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) || (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0)) { VM_OBJECT_RLOCK(fs.first_object); if ((prot & VM_PROT_WRITE) == 0 || (fs.first_object->type != OBJT_VNODE && (fs.first_object->flags & OBJ_TMPFS_NODE) == 0) || (fs.first_object->flags & OBJ_MIGHTBEDIRTY) != 0) { rv = vm_fault_soft_fast(&fs, vaddr, prot, fault_type, fault_flags, wired, m_hold); if (rv == KERN_SUCCESS) return (rv); } if (!VM_OBJECT_TRYUPGRADE(fs.first_object)) { VM_OBJECT_RUNLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.first_object); } } else { VM_OBJECT_WLOCK(fs.first_object); } /* * Make a reference to this object to prevent its disposal while we * are messing with it. Once we have the reference, the map is free * to be diddled. Since objects reference their shadows (and copies), * they will stay around as well. * * Bump the paging-in-progress count to prevent size changes (e.g. * truncation operations) during I/O. */ vm_object_reference_locked(fs.first_object); vm_object_pip_add(fs.first_object, 1); fs.lookup_still_valid = true; fs.first_m = NULL; /* * Search for the page at object/offset. */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; while (TRUE) { /* * If the object is marked for imminent termination, * we retry here, since the collapse pass has raced * with us. Otherwise, if we see terminally dead * object, return fail. */ if ((fs.object->flags & OBJ_DEAD) != 0) { dead = fs.object->type == OBJT_DEAD; unlock_and_deallocate(&fs); if (dead) return (KERN_PROTECTION_FAILURE); pause("vmf_de", 1); goto RetryFault; } /* * See if page is resident */ fs.m = vm_page_lookup(fs.object, fs.pindex); if (fs.m != NULL) { /* * Wait/Retry if the page is busy. We have to do this * if the page is either exclusive or shared busy * because the vm_pager may be using read busy for * pageouts (and even pageins if it is the vnode * pager), and we could end up trying to pagein and * pageout the same page simultaneously. * * We can theoretically allow the busy case on a read * fault if the page is marked valid, but since such * pages are typically already pmap'd, putting that * special case in might be more effort then it is * worth. We cannot under any circumstances mess * around with a shared busied page except, perhaps, * to pmap it. */ if (vm_page_busied(fs.m)) { /* * Reference the page before unlocking and * sleeping so that the page daemon is less * likely to reclaim it. */ vm_page_aflag_set(fs.m, PGA_REFERENCED); if (fs.object != fs.first_object) { if (!VM_OBJECT_TRYWLOCK( fs.first_object)) { VM_OBJECT_WUNLOCK(fs.object); VM_OBJECT_WLOCK(fs.first_object); VM_OBJECT_WLOCK(fs.object); } vm_page_lock(fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_object_pip_wakeup(fs.first_object); VM_OBJECT_WUNLOCK(fs.first_object); fs.first_m = NULL; } unlock_map(&fs); if (fs.m == vm_page_lookup(fs.object, fs.pindex)) { vm_page_sleep_if_busy(fs.m, "vmpfw"); } vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); VM_CNT_INC(v_intrans); vm_object_deallocate(fs.first_object); goto RetryFault; } /* * Mark page busy for other processes, and the * pagedaemon. If it still isn't completely valid * (readable), jump to readrest, else break-out ( we * found the page ). */ vm_page_xbusy(fs.m); if (fs.m->valid != VM_PAGE_BITS_ALL) goto readrest; break; /* break to PAGE HAS BEEN FOUND */ } KASSERT(fs.m == NULL, ("fs.m should be NULL, not %p", fs.m)); /* * Page is not resident. If the pager might contain the page * or this is the beginning of the search, allocate a new * page. (Default objects are zero-fill, so there is no real * pager for them.) */ if (fs.object->type != OBJT_DEFAULT || fs.object == fs.first_object) { if (fs.pindex >= fs.object->size) { unlock_and_deallocate(&fs); return (KERN_PROTECTION_FAILURE); } if (fs.object == fs.first_object && (fs.first_object->flags & OBJ_POPULATE) != 0 && fs.first_object->shadow_count == 0) { rv = vm_fault_populate(&fs, prot, fault_type, fault_flags, wired, m_hold); switch (rv) { case KERN_SUCCESS: case KERN_FAILURE: unlock_and_deallocate(&fs); return (rv); case KERN_RESOURCE_SHORTAGE: unlock_and_deallocate(&fs); goto RetryFault; case KERN_NOT_RECEIVER: /* * Pager's populate() method * returned VM_PAGER_BAD. */ break; default: panic("inconsistent return codes"); } } /* * Allocate a new page for this object/offset pair. * * Unlocked read of the p_flag is harmless. At * worst, the P_KILLED might be not observed * there, and allocation can fail, causing * restart and new reading of the p_flag. */ dset = fs.object->domain.dr_policy; if (dset == NULL) dset = curthread->td_domain.dr_policy; if (!vm_page_count_severe_set(&dset->ds_mask) || P_KILLED(curproc)) { #if VM_NRESERVLEVEL > 0 vm_object_color(fs.object, atop(vaddr) - fs.pindex); #endif alloc_req = P_KILLED(curproc) ? VM_ALLOC_SYSTEM : VM_ALLOC_NORMAL; if (fs.object->type != OBJT_VNODE && fs.object->backing_object == NULL) alloc_req |= VM_ALLOC_ZERO; fs.m = vm_page_alloc(fs.object, fs.pindex, alloc_req); } if (fs.m == NULL) { unlock_and_deallocate(&fs); vm_waitpfault(dset); goto RetryFault; } } readrest: /* * At this point, we have either allocated a new page or found * an existing page that is only partially valid. * * We hold a reference on the current object and the page is * exclusive busied. */ /* * If the pager for the current object might have the page, * then determine the number of additional pages to read and * potentially reprioritize previously read pages for earlier * reclamation. These operations should only be performed * once per page fault. Even if the current pager doesn't * have the page, the number of additional pages to read will * apply to subsequent objects in the shadow chain. */ if (fs.object->type != OBJT_DEFAULT && nera == -1 && !P_KILLED(curproc)) { KASSERT(fs.lookup_still_valid, ("map unlocked")); era = fs.entry->read_ahead; behavior = vm_map_entry_behavior(fs.entry); if (behavior == MAP_ENTRY_BEHAV_RANDOM) { nera = 0; } else if (behavior == MAP_ENTRY_BEHAV_SEQUENTIAL) { nera = VM_FAULT_READ_AHEAD_MAX; if (vaddr == fs.entry->next_read) vm_fault_dontneed(&fs, vaddr, nera); } else if (vaddr == fs.entry->next_read) { /* * This is a sequential fault. Arithmetically * increase the requested number of pages in * the read-ahead window. The requested * number of pages is "# of sequential faults * x (read ahead min + 1) + read ahead min" */ nera = VM_FAULT_READ_AHEAD_MIN; if (era > 0) { nera += era + 1; if (nera > VM_FAULT_READ_AHEAD_MAX) nera = VM_FAULT_READ_AHEAD_MAX; } if (era == VM_FAULT_READ_AHEAD_MAX) vm_fault_dontneed(&fs, vaddr, nera); } else { /* * This is a non-sequential fault. */ nera = 0; } if (era != nera) { /* * A read lock on the map suffices to update * the read ahead count safely. */ fs.entry->read_ahead = nera; } /* * Prepare for unlocking the map. Save the map * entry's start and end addresses, which are used to * optimize the size of the pager operation below. * Even if the map entry's addresses change after * unlocking the map, using the saved addresses is * safe. */ e_start = fs.entry->start; e_end = fs.entry->end; } /* * Call the pager to retrieve the page if there is a chance * that the pager has it, and potentially retrieve additional * pages at the same time. */ if (fs.object->type != OBJT_DEFAULT) { /* * Release the map lock before locking the vnode or * sleeping in the pager. (If the current object has * a shadow, then an earlier iteration of this loop * may have already unlocked the map.) */ unlock_map(&fs); if (fs.object->type == OBJT_VNODE && (vp = fs.object->handle) != fs.vp) { /* * Perform an unlock in case the desired vnode * changed while the map was unlocked during a * retry. */ unlock_vp(&fs); locked = VOP_ISLOCKED(vp); if (locked != LK_EXCLUSIVE) locked = LK_SHARED; /* * We must not sleep acquiring the vnode lock * while we have the page exclusive busied or * the object's paging-in-progress count * incremented. Otherwise, we could deadlock. */ error = vget(vp, locked | LK_CANRECURSE | LK_NOWAIT, curthread); if (error != 0) { vhold(vp); release_page(&fs); unlock_and_deallocate(&fs); error = vget(vp, locked | LK_RETRY | LK_CANRECURSE, curthread); vdrop(vp); fs.vp = vp; KASSERT(error == 0, ("vm_fault: vget failed")); goto RetryFault; } fs.vp = vp; } KASSERT(fs.vp == NULL || !fs.map->system_map, ("vm_fault: vnode-backed object mapped by system map")); /* * Page in the requested page and hint the pager, * that it may bring up surrounding pages. */ if (nera == -1 || behavior == MAP_ENTRY_BEHAV_RANDOM || P_KILLED(curproc)) { behind = 0; ahead = 0; } else { /* Is this a sequential fault? */ if (nera > 0) { behind = 0; ahead = nera; } else { /* * Request a cluster of pages that is * aligned to a VM_FAULT_READ_DEFAULT * page offset boundary within the * object. Alignment to a page offset * boundary is more likely to coincide * with the underlying file system * block than alignment to a virtual * address boundary. */ cluster_offset = fs.pindex % VM_FAULT_READ_DEFAULT; behind = ulmin(cluster_offset, atop(vaddr - e_start)); ahead = VM_FAULT_READ_DEFAULT - 1 - cluster_offset; } ahead = ulmin(ahead, atop(e_end - vaddr) - 1); } rv = vm_pager_get_pages(fs.object, &fs.m, 1, &behind, &ahead); if (rv == VM_PAGER_OK) { faultcount = behind + 1 + ahead; hardfault = true; break; /* break to PAGE HAS BEEN FOUND */ } if (rv == VM_PAGER_ERROR) printf("vm_fault: pager read error, pid %d (%s)\n", curproc->p_pid, curproc->p_comm); /* * If an I/O error occurred or the requested page was * outside the range of the pager, clean up and return * an error. */ if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) { vm_page_lock(fs.m); if (fs.m->wire_count == 0) vm_page_free(fs.m); else vm_page_xunbusy_maybelocked(fs.m); vm_page_unlock(fs.m); fs.m = NULL; unlock_and_deallocate(&fs); return (rv == VM_PAGER_ERROR ? KERN_FAILURE : KERN_PROTECTION_FAILURE); } /* * The requested page does not exist at this object/ * offset. Remove the invalid page from the object, * waking up anyone waiting for it, and continue on to * the next object. However, if this is the top-level * object, we must leave the busy page in place to * prevent another process from rushing past us, and * inserting the page in that object at the same time * that we are. */ if (fs.object != fs.first_object) { vm_page_lock(fs.m); if (fs.m->wire_count == 0) vm_page_free(fs.m); else vm_page_xunbusy_maybelocked(fs.m); vm_page_unlock(fs.m); fs.m = NULL; } } /* * We get here if the object has default pager (or unwiring) * or the pager doesn't have the page. */ if (fs.object == fs.first_object) fs.first_m = fs.m; /* * Move on to the next object. Lock the next object before * unlocking the current one. */ next_object = fs.object->backing_object; if (next_object == NULL) { /* * If there's no object left, fill the page in the top * object with zeros. */ if (fs.object != fs.first_object) { vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; VM_OBJECT_WLOCK(fs.object); } fs.first_m = NULL; /* * Zero the page if necessary and mark it valid. */ if ((fs.m->flags & PG_ZERO) == 0) { pmap_zero_page(fs.m); } else { VM_CNT_INC(v_ozfod); } VM_CNT_INC(v_zfod); fs.m->valid = VM_PAGE_BITS_ALL; /* Don't try to prefault neighboring pages. */ faultcount = 1; break; /* break to PAGE HAS BEEN FOUND */ } else { KASSERT(fs.object != next_object, ("object loop %p", next_object)); VM_OBJECT_WLOCK(next_object); vm_object_pip_add(next_object, 1); if (fs.object != fs.first_object) vm_object_pip_wakeup(fs.object); fs.pindex += OFF_TO_IDX(fs.object->backing_object_offset); VM_OBJECT_WUNLOCK(fs.object); fs.object = next_object; } } vm_page_assert_xbusied(fs.m); /* * PAGE HAS BEEN FOUND. [Loop invariant still holds -- the object lock * is held.] */ /* * If the page is being written, but isn't already owned by the * top-level object, we have to copy it into a new page owned by the * top-level object. */ if (fs.object != fs.first_object) { /* * We only really need to copy if we want to write it. */ if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { /* * This allows pages to be virtually copied from a * backing_object into the first_object, where the * backing object has no other refs to it, and cannot * gain any more refs. Instead of a bcopy, we just * move the page from the backing object to the * first object. Note that we must mark the page * dirty in the first object so that it will go out * to swap when needed. */ is_first_object_locked = false; if ( /* * Only one shadow object */ (fs.object->shadow_count == 1) && /* * No COW refs, except us */ (fs.object->ref_count == 1) && /* * No one else can look this object up */ (fs.object->handle == NULL) && /* * No other ways to look the object up */ ((fs.object->type == OBJT_DEFAULT) || (fs.object->type == OBJT_SWAP)) && (is_first_object_locked = VM_OBJECT_TRYWLOCK(fs.first_object)) && /* * We don't chase down the shadow chain */ fs.object == fs.first_object->backing_object) { vm_page_lock(fs.m); vm_page_dequeue(fs.m); vm_page_remove(fs.m); vm_page_unlock(fs.m); vm_page_lock(fs.first_m); vm_page_replace_checked(fs.m, fs.first_object, fs.first_pindex, fs.first_m); vm_page_free(fs.first_m); vm_page_unlock(fs.first_m); vm_page_dirty(fs.m); #if VM_NRESERVLEVEL > 0 /* * Rename the reservation. */ vm_reserv_rename(fs.m, fs.first_object, fs.object, OFF_TO_IDX( fs.first_object->backing_object_offset)); #endif /* * Removing the page from the backing object * unbusied it. */ vm_page_xbusy(fs.m); fs.first_m = fs.m; fs.m = NULL; VM_CNT_INC(v_cow_optim); } else { /* * Oh, well, lets copy it. */ pmap_copy_page(fs.m, fs.first_m); fs.first_m->valid = VM_PAGE_BITS_ALL; if (wired && (fault_flags & VM_FAULT_WIRE) == 0) { vm_page_lock(fs.first_m); vm_page_wire(fs.first_m); vm_page_unlock(fs.first_m); vm_page_lock(fs.m); vm_page_unwire(fs.m, PQ_INACTIVE); vm_page_unlock(fs.m); } /* * We no longer need the old page or object. */ release_page(&fs); } /* * fs.object != fs.first_object due to above * conditional */ vm_object_pip_wakeup(fs.object); VM_OBJECT_WUNLOCK(fs.object); /* * Only use the new page below... */ fs.object = fs.first_object; fs.pindex = fs.first_pindex; fs.m = fs.first_m; if (!is_first_object_locked) VM_OBJECT_WLOCK(fs.object); VM_CNT_INC(v_cow_faults); curthread->td_cow++; } else { prot &= ~VM_PROT_WRITE; } } /* * We must verify that the maps have not changed since our last * lookup. */ if (!fs.lookup_still_valid) { if (!vm_map_trylock_read(fs.map)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } fs.lookup_still_valid = true; if (fs.map->timestamp != fs.map_generation) { result = vm_map_lookup_locked(&fs.map, vaddr, fault_type, &fs.entry, &retry_object, &retry_pindex, &retry_prot, &wired); /* * If we don't need the page any longer, put it on the inactive * list (the easiest thing to do here). If no one needs it, * pageout will grab it eventually. */ if (result != KERN_SUCCESS) { release_page(&fs); unlock_and_deallocate(&fs); /* * If retry of map lookup would have blocked then * retry fault from start. */ if (result == KERN_FAILURE) goto RetryFault; return (result); } if ((retry_object != fs.first_object) || (retry_pindex != fs.first_pindex)) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* * Check whether the protection has changed or the object has * been copied while we left the map unlocked. Changing from * read to write permission is OK - we leave the page * write-protected, and catch the write fault. Changing from * write to read permission means that we can't mark the page * write-enabled after all. */ prot &= retry_prot; fault_type &= retry_prot; if (prot == 0) { release_page(&fs); unlock_and_deallocate(&fs); goto RetryFault; } /* Reassert because wired may have changed. */ KASSERT(wired || (fault_flags & VM_FAULT_WIRE) == 0, ("!wired && VM_FAULT_WIRE")); } } /* * If the page was filled by a pager, save the virtual address that * should be faulted on next under a sequential access pattern to the * map entry. A read lock on the map suffices to update this address * safely. */ if (hardfault) fs.entry->next_read = vaddr + ptoa(ahead) + PAGE_SIZE; vm_fault_dirty(fs.entry, fs.m, prot, fault_type, fault_flags, true); vm_page_assert_xbusied(fs.m); /* * Page must be completely valid or it is not fit to * map into user space. vm_pager_get_pages() ensures this. */ KASSERT(fs.m->valid == VM_PAGE_BITS_ALL, ("vm_fault: page %p partially invalid", fs.m)); VM_OBJECT_WUNLOCK(fs.object); /* * Put this page into the physical map. We had to do the unlock above * because pmap_enter() may sleep. We don't put the page * back on the active queue until later so that the pageout daemon * won't find it (yet). */ pmap_enter(fs.map->pmap, vaddr, fs.m, prot, fault_type | (wired ? PMAP_ENTER_WIRED : 0), 0); if (faultcount != 1 && (fault_flags & VM_FAULT_WIRE) == 0 && wired == 0) vm_fault_prefault(&fs, vaddr, faultcount > 0 ? behind : PFBAK, faultcount > 0 ? ahead : PFFOR, false); VM_OBJECT_WLOCK(fs.object); vm_page_lock(fs.m); /* * If the page is not wired down, then put it where the pageout daemon * can find it. */ if ((fault_flags & VM_FAULT_WIRE) != 0) vm_page_wire(fs.m); else vm_page_activate(fs.m); if (m_hold != NULL) { *m_hold = fs.m; vm_page_hold(fs.m); } vm_page_unlock(fs.m); vm_page_xunbusy(fs.m); /* * Unlock everything, and return */ unlock_and_deallocate(&fs); if (hardfault) { VM_CNT_INC(v_io_faults); curthread->td_ru.ru_majflt++; #ifdef RACCT if (racct_enable && fs.object->type == OBJT_VNODE) { PROC_LOCK(curproc); if ((fault_type & (VM_PROT_COPY | VM_PROT_WRITE)) != 0) { racct_add_force(curproc, RACCT_WRITEBPS, PAGE_SIZE + behind * PAGE_SIZE); racct_add_force(curproc, RACCT_WRITEIOPS, 1); } else { racct_add_force(curproc, RACCT_READBPS, PAGE_SIZE + ahead * PAGE_SIZE); racct_add_force(curproc, RACCT_READIOPS, 1); } PROC_UNLOCK(curproc); } #endif } else curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); } /* * Speed up the reclamation of pages that precede the faulting pindex within * the first object of the shadow chain. Essentially, perform the equivalent * to madvise(..., MADV_DONTNEED) on a large cluster of pages that precedes * the faulting pindex by the cluster size when the pages read by vm_fault() * cross a cluster-size boundary. The cluster size is the greater of the * smallest superpage size and VM_FAULT_DONTNEED_MIN. * * When "fs->first_object" is a shadow object, the pages in the backing object * that precede the faulting pindex are deactivated by vm_fault(). So, this * function must only be concerned with pages in the first object. */ static void vm_fault_dontneed(const struct faultstate *fs, vm_offset_t vaddr, int ahead) { vm_map_entry_t entry; vm_object_t first_object, object; vm_offset_t end, start; vm_page_t m, m_next; vm_pindex_t pend, pstart; vm_size_t size; object = fs->object; VM_OBJECT_ASSERT_WLOCKED(object); first_object = fs->first_object; if (first_object != object) { if (!VM_OBJECT_TRYWLOCK(first_object)) { VM_OBJECT_WUNLOCK(object); VM_OBJECT_WLOCK(first_object); VM_OBJECT_WLOCK(object); } } /* Neither fictitious nor unmanaged pages can be reclaimed. */ if ((first_object->flags & (OBJ_FICTITIOUS | OBJ_UNMANAGED)) == 0) { size = VM_FAULT_DONTNEED_MIN; if (MAXPAGESIZES > 1 && size < pagesizes[1]) size = pagesizes[1]; end = rounddown2(vaddr, size); if (vaddr - end >= size - PAGE_SIZE - ptoa(ahead) && (entry = fs->entry)->start < end) { if (end - entry->start < size) start = entry->start; else start = end - size; pmap_advise(fs->map->pmap, start, end, MADV_DONTNEED); pstart = OFF_TO_IDX(entry->offset) + atop(start - entry->start); m_next = vm_page_find_least(first_object, pstart); pend = OFF_TO_IDX(entry->offset) + atop(end - entry->start); while ((m = m_next) != NULL && m->pindex < pend) { m_next = TAILQ_NEXT(m, listq); if (m->valid != VM_PAGE_BITS_ALL || vm_page_busied(m)) continue; /* * Don't clear PGA_REFERENCED, since it would * likely represent a reference by a different * process. * * Typically, at this point, prefetched pages * are still in the inactive queue. Only * pages that triggered page faults are in the * active queue. */ vm_page_lock(m); if (!vm_page_inactive(m)) vm_page_deactivate(m); vm_page_unlock(m); } } } if (first_object != object) VM_OBJECT_WUNLOCK(first_object); } /* * vm_fault_prefault provides a quick way of clustering * pagefaults into a processes address space. It is a "cousin" * of vm_map_pmap_enter, except it runs at page fault time instead * of mmap time. */ static void vm_fault_prefault(const struct faultstate *fs, vm_offset_t addra, int backward, int forward, bool obj_locked) { pmap_t pmap; vm_map_entry_t entry; vm_object_t backing_object, lobject; vm_offset_t addr, starta; vm_pindex_t pindex; vm_page_t m; int i; pmap = fs->map->pmap; if (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)) return; entry = fs->entry; if (addra < backward * PAGE_SIZE) { starta = entry->start; } else { starta = addra - backward * PAGE_SIZE; if (starta < entry->start) starta = entry->start; } /* * Generate the sequence of virtual addresses that are candidates for * prefaulting in an outward spiral from the faulting virtual address, * "addra". Specifically, the sequence is "addra - PAGE_SIZE", "addra * + PAGE_SIZE", "addra - 2 * PAGE_SIZE", "addra + 2 * PAGE_SIZE", ... * If the candidate address doesn't have a backing physical page, then * the loop immediately terminates. */ for (i = 0; i < 2 * imax(backward, forward); i++) { addr = addra + ((i >> 1) + 1) * ((i & 1) == 0 ? -PAGE_SIZE : PAGE_SIZE); if (addr > addra + forward * PAGE_SIZE) addr = 0; if (addr < starta || addr >= entry->end) continue; if (!pmap_is_prefaultable(pmap, addr)) continue; pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT; lobject = entry->object.vm_object; if (!obj_locked) VM_OBJECT_RLOCK(lobject); while ((m = vm_page_lookup(lobject, pindex)) == NULL && lobject->type == OBJT_DEFAULT && (backing_object = lobject->backing_object) != NULL) { KASSERT((lobject->backing_object_offset & PAGE_MASK) == 0, ("vm_fault_prefault: unaligned object offset")); pindex += lobject->backing_object_offset >> PAGE_SHIFT; VM_OBJECT_RLOCK(backing_object); if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); lobject = backing_object; } if (m == NULL) { if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); break; } if (m->valid == VM_PAGE_BITS_ALL && (m->flags & PG_FICTITIOUS) == 0) pmap_enter_quick(pmap, addr, m, entry->protection); if (!obj_locked || lobject != entry->object.vm_object) VM_OBJECT_RUNLOCK(lobject); } } /* * Hold each of the physical pages that are mapped by the specified range of * virtual addresses, ["addr", "addr" + "len"), if those mappings are valid * and allow the specified types of access, "prot". If all of the implied * pages are successfully held, then the number of held pages is returned * together with pointers to those pages in the array "ma". However, if any * of the pages cannot be held, -1 is returned. */ int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len, vm_prot_t prot, vm_page_t *ma, int max_count) { vm_offset_t end, va; vm_page_t *mp; int count; boolean_t pmap_failed; if (len == 0) return (0); end = round_page(addr + len); addr = trunc_page(addr); /* * Check for illegal addresses. */ if (addr < vm_map_min(map) || addr > end || end > vm_map_max(map)) return (-1); if (atop(end - addr) > max_count) panic("vm_fault_quick_hold_pages: count > max_count"); count = atop(end - addr); /* * Most likely, the physical pages are resident in the pmap, so it is * faster to try pmap_extract_and_hold() first. */ pmap_failed = FALSE; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) { *mp = pmap_extract_and_hold(map->pmap, va, prot); if (*mp == NULL) pmap_failed = TRUE; else if ((prot & VM_PROT_WRITE) != 0 && (*mp)->dirty != VM_PAGE_BITS_ALL) { /* * Explicitly dirty the physical page. Otherwise, the * caller's changes may go unnoticed because they are * performed through an unmanaged mapping or by a DMA * operation. * * The object lock is not held here. * See vm_page_clear_dirty_mask(). */ vm_page_dirty(*mp); } } if (pmap_failed) { /* * One or more pages could not be held by the pmap. Either no * page was mapped at the specified virtual address or that * mapping had insufficient permissions. Attempt to fault in * and hold these pages. * * If vm_fault_disable_pagefaults() was called, * i.e., TDP_NOFAULTING is set, we must not sleep nor * acquire MD VM locks, which means we must not call * vm_fault_hold(). Some (out of tree) callers mark * too wide a code area with vm_fault_disable_pagefaults() * already, use the VM_PROT_QUICK_NOFAULT flag to request * the proper behaviour explicitly. */ if ((prot & VM_PROT_QUICK_NOFAULT) != 0 && (curthread->td_pflags & TDP_NOFAULTING) != 0) goto error; for (mp = ma, va = addr; va < end; mp++, va += PAGE_SIZE) if (*mp == NULL && vm_fault_hold(map, va, prot, VM_FAULT_NORMAL, mp) != KERN_SUCCESS) goto error; } return (count); error: for (mp = ma; mp < ma + count; mp++) if (*mp != NULL) { vm_page_lock(*mp); vm_page_unhold(*mp); vm_page_unlock(*mp); } return (-1); } /* * Routine: * vm_fault_copy_entry * Function: * Create new shadow object backing dst_entry with private copy of * all underlying pages. When src_entry is equal to dst_entry, * function implements COW for wired-down map entry. Otherwise, * it forks wired entry into dst_map. * * In/out conditions: * The source and destination maps must be locked for write. * The source map entry must be wired down (or be a sharing map * entry corresponding to a main map entry that is wired down). */ void vm_fault_copy_entry(vm_map_t dst_map, vm_map_t src_map, vm_map_entry_t dst_entry, vm_map_entry_t src_entry, vm_ooffset_t *fork_charge) { vm_object_t backing_object, dst_object, object, src_object; vm_pindex_t dst_pindex, pindex, src_pindex; vm_prot_t access, prot; vm_offset_t vaddr; vm_page_t dst_m; vm_page_t src_m; boolean_t upgrade; #ifdef lint src_map++; #endif /* lint */ upgrade = src_entry == dst_entry; access = prot = dst_entry->protection; src_object = src_entry->object.vm_object; src_pindex = OFF_TO_IDX(src_entry->offset); if (upgrade && (dst_entry->eflags & MAP_ENTRY_NEEDS_COPY) == 0) { dst_object = src_object; vm_object_reference(dst_object); } else { /* * Create the top-level object for the destination entry. (Doesn't * actually shadow anything - we copy the pages directly.) */ dst_object = vm_object_allocate(OBJT_DEFAULT, atop(dst_entry->end - dst_entry->start)); #if VM_NRESERVLEVEL > 0 dst_object->flags |= OBJ_COLORED; dst_object->pg_color = atop(dst_entry->start); #endif + dst_object->domain = src_object->domain; + dst_object->charge = dst_entry->end - dst_entry->start; } VM_OBJECT_WLOCK(dst_object); KASSERT(upgrade || dst_entry->object.vm_object == NULL, ("vm_fault_copy_entry: vm_object not NULL")); if (src_object != dst_object) { - dst_object->domain = src_object->domain; dst_entry->object.vm_object = dst_object; dst_entry->offset = 0; - dst_object->charge = dst_entry->end - dst_entry->start; } if (fork_charge != NULL) { KASSERT(dst_entry->cred == NULL, ("vm_fault_copy_entry: leaked swp charge")); dst_object->cred = curthread->td_ucred; crhold(dst_object->cred); *fork_charge += dst_object->charge; - } else if (dst_object->cred == NULL) { + } else if ((dst_object->type == OBJT_DEFAULT || + dst_object->type == OBJT_SWAP) && + dst_object->cred == NULL) { KASSERT(dst_entry->cred != NULL, ("no cred for entry %p", dst_entry)); dst_object->cred = dst_entry->cred; dst_entry->cred = NULL; } /* * If not an upgrade, then enter the mappings in the pmap as * read and/or execute accesses. Otherwise, enter them as * write accesses. * * A writeable large page mapping is only created if all of * the constituent small page mappings are modified. Marking * PTEs as modified on inception allows promotion to happen * without taking potentially large number of soft faults. */ if (!upgrade) access &= ~VM_PROT_WRITE; /* * Loop through all of the virtual pages within the entry's * range, copying each page from the source object to the * destination object. Since the source is wired, those pages * must exist. In contrast, the destination is pageable. * Since the destination object doesn't share any backing storage * with the source object, all of its pages must be dirtied, * regardless of whether they can be written. */ for (vaddr = dst_entry->start, dst_pindex = 0; vaddr < dst_entry->end; vaddr += PAGE_SIZE, dst_pindex++) { again: /* * Find the page in the source object, and copy it in. * Because the source is wired down, the page will be * in memory. */ if (src_object != dst_object) VM_OBJECT_RLOCK(src_object); object = src_object; pindex = src_pindex + dst_pindex; while ((src_m = vm_page_lookup(object, pindex)) == NULL && (backing_object = object->backing_object) != NULL) { /* * Unless the source mapping is read-only or * it is presently being upgraded from * read-only, the first object in the shadow * chain should provide all of the pages. In * other words, this loop body should never be * executed when the source mapping is already * read/write. */ KASSERT((src_entry->protection & VM_PROT_WRITE) == 0 || upgrade, ("vm_fault_copy_entry: main object missing page")); VM_OBJECT_RLOCK(backing_object); pindex += OFF_TO_IDX(object->backing_object_offset); if (object != dst_object) VM_OBJECT_RUNLOCK(object); object = backing_object; } KASSERT(src_m != NULL, ("vm_fault_copy_entry: page missing")); if (object != dst_object) { /* * Allocate a page in the destination object. */ dst_m = vm_page_alloc(dst_object, (src_object == dst_object ? src_pindex : 0) + dst_pindex, VM_ALLOC_NORMAL); if (dst_m == NULL) { VM_OBJECT_WUNLOCK(dst_object); VM_OBJECT_RUNLOCK(object); vm_wait(dst_object); VM_OBJECT_WLOCK(dst_object); goto again; } pmap_copy_page(src_m, dst_m); VM_OBJECT_RUNLOCK(object); dst_m->valid = VM_PAGE_BITS_ALL; dst_m->dirty = VM_PAGE_BITS_ALL; } else { dst_m = src_m; if (vm_page_sleep_if_busy(dst_m, "fltupg")) goto again; + if (dst_m->pindex >= dst_object->size) + /* + * We are upgrading. Index can occur + * out of bounds if the object type is + * vnode and the file was truncated. + */ + break; vm_page_xbusy(dst_m); KASSERT(dst_m->valid == VM_PAGE_BITS_ALL, ("invalid dst page %p", dst_m)); } VM_OBJECT_WUNLOCK(dst_object); /* * Enter it in the pmap. If a wired, copy-on-write * mapping is being replaced by a write-enabled * mapping, then wire that new mapping. */ pmap_enter(dst_map->pmap, vaddr, dst_m, prot, access | (upgrade ? PMAP_ENTER_WIRED : 0), 0); /* * Mark it no longer busy, and put it on the active list. */ VM_OBJECT_WLOCK(dst_object); if (upgrade) { if (src_m != dst_m) { vm_page_lock(src_m); vm_page_unwire(src_m, PQ_INACTIVE); vm_page_unlock(src_m); vm_page_lock(dst_m); vm_page_wire(dst_m); vm_page_unlock(dst_m); } else { KASSERT(dst_m->wire_count > 0, ("dst_m %p is not wired", dst_m)); } } else { vm_page_lock(dst_m); vm_page_activate(dst_m); vm_page_unlock(dst_m); } vm_page_xunbusy(dst_m); } VM_OBJECT_WUNLOCK(dst_object); if (upgrade) { dst_entry->eflags &= ~(MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY); vm_object_deallocate(src_object); } } /* * Block entry into the machine-independent layer's page fault handler by * the calling thread. Subsequent calls to vm_fault() by that thread will * return KERN_PROTECTION_FAILURE. Enable machine-dependent handling of * spurious page faults. */ int vm_fault_disable_pagefaults(void) { return (curthread_pflags_set(TDP_NOFAULTING | TDP_RESETSPUR)); } void vm_fault_enable_pagefaults(int save) { curthread_pflags_restore(save); } Index: projects/clang700-import =================================================================== --- projects/clang700-import (revision 339014) +++ projects/clang700-import (revision 339015) Property changes on: projects/clang700-import ___________________________________________________________________ Modified: svn:mergeinfo ## -0,0 +0,1 ## Merged /head:r338988-339014